Source code for pyknp_eventgraph.eventgraph

import json
import pickle
from logging import getLogger
from typing import List, Union, TextIO, BinaryIO, Optional

from pyknp import BList

from pyknp_eventgraph.builder import Builder
from pyknp_eventgraph.component import Component
from pyknp_eventgraph.document import Document, DocumentBuilder, JsonDocumentBuilder
from pyknp_eventgraph.event import Event
from pyknp_eventgraph.relation import Relation, RelationsBuilder, JsonRelationBuilder
from pyknp_eventgraph.sentence import Sentence
from pyknp_eventgraph.base_phrase import BasePhraseBuilder

logger = getLogger(__name__)


[docs]class EventGraph(Component): """EventGraph provides a high-level interface that facilitates NLP application development. The core concept of EventGraph is event, a language information unit that is closely related to predicate-argument structure but more application-oriented. Events are linked to each other based on their syntactic and semantic relations. Attributes: document (Document): A document on which this EventGraph is built. """ def __init__(self): self.document: Optional[Document] = None
[docs] @classmethod def build(cls, blist: List[BList]) -> 'EventGraph': """Build an EventGraph from language analysis by KNP. Args: blist: A list of bunsetsu lists, each of which is a result of analysis performed by KNP on a sentence. Example:: from pyknp import KNP from pyknp_eventgraph import EventGraph # Parse a document. document = ['彼女は海外勤務が長いので、英語がうまいに違いない。', '私はそう確信していた。'] knp = KNP() blists = [knp.parse(sentence) for sentence in document] # Build an EventGraph. evg = EventGraph.build(blists) """ return EventGraphBuilder()(blist)
[docs] @classmethod def load(cls, f: Union[TextIO, BinaryIO], binary: bool = False) -> 'EventGraph': """Deserialize an EventGraph. Args: f: A file descriptor. binary: If true, deserialize an EventGraph using Python's pickle utility. Otherwise, deserialize an EventGraph using Python's json utility. Example:: from pyknp_eventgraph import EventGraph # Load an EventGraph serialized in a JSON format. with open('evg.json', 'r') as f: evg = EventGraph.load(f, binary=False) # Load an EventGraph serialized by Python's pickle utility. with open('evg.pkl', 'rb') as f: evg = EventGraph.load(f, binary=True) Caution: EventGraph deserialized from a JSON file loses several functionality. To keep full functionality, use Python\'s pickle utility for serialization. """ return PickleEventGraphBuilder()(f) if binary else JsonEventGraphBuilder()(f)
[docs] def save(self, path: str, binary: bool = False) -> None: """Save this EventGraph. Args: path: An output file path. binary: If true, serialize this EventGraph using Python's pickle utility. Otherwise, serialize this EventGraph using Python's json utility. Caution: EventGraph deserialized from a JSON file loses several functionality. To keep full functionality, use Python\'s pickle utility for serialization. """ if binary: with open(path, 'wb') as f: pickle.dump(self, f) else: logger.info('EventGraph deserialized from a JSON file loses several functionality. ' 'To keep full functionality, use Python\'s pickle utility for serialization. ' 'For details, refer to https://pyknp-eventgraph.readthedocs.io/en/latest/reference/' 'eventgraph.html#pyknp_eventgraph.eventgraph.EventGraph.save.') with open(path, 'w') as f: json.dump(self.to_dict(), f, ensure_ascii=False, indent=8)
@property def sentences(self) -> List[Sentence]: """A list of sentences.""" return [sentence for sentence in self.document.sentences] @property def events(self) -> List[Event]: """A list of events.""" return [event for sentence in self.sentences for event in sentence.events] @property def relations(self) -> List[Relation]: """A list of relations.""" return [relation for event in self.events for relation in event.outgoing_relations]
[docs] def to_dict(self) -> dict: """Convert this object into a dictionary.""" return dict( sentences=[sentence.to_dict() for sentence in self.sentences], events=[event.to_dict() for event in self.events] )
[docs] def to_string(self) -> str: """Convert this object into a string.""" return f'<EventGraph, ' \ f'#sentences: {len(self.sentences)}, ' \ f'#events: {len(self.events)}, ' \ f'#relations: {len(self.relations)}>'
class EventGraphBuilder(Builder): def __call__(self, blists: List[BList]) -> EventGraph: logger.debug('Create an EventGraph.') Builder.reset() evg = EventGraph() # Assign a document to the EventGraph. # A document is a collection of sentences, and a sentence is a collection of events. DocumentBuilder()(evg, blists) # Assign basic phrases to events. # This process must be performed after constructing a document # because an event may have a basic phrase recognized by inter-sentential cataphora resolution. for event in evg.events: BasePhraseBuilder()(event) # Assign event-to-event relations to events. # This process must be performed after constructing a document. for event in evg.events: RelationsBuilder()(event) logger.debug('Successfully created an EventGraph.') logger.debug(evg) return evg class PickleEventGraphBuilder(Builder): def __call__(self, f: BinaryIO) -> EventGraph: logger.debug('Create an EventGraph by loading a pickled file.') evg = pickle.load(f) assert isinstance(evg, EventGraph) logger.debug('Successfully created an EventGraph.') logger.debug(evg) return evg class JsonEventGraphBuilder(Builder): def __call__(self, f: TextIO) -> EventGraph: logger.debug('Create an EventGraph by loading a JSON file.') logger.info('EventGraph deserialized from a JSON file loses several functionality. ' 'To keep full functionality, use Python\'s pickle utility for serialization. ' 'For details, refer to https://pyknp-eventgraph.readthedocs.io/en/latest/reference/eventgraph.html' '#pyknp_eventgraph.eventgraph.EventGraph.load.') Builder.reset() dump = json.load(f) evg = EventGraph() # Assign a document to the EventGraph. # A document is a collection of sentences, and a sentence is a collection of events. JsonDocumentBuilder()(evg, dump) # Assign event-to-event relations to events. for event_dump in dump['events']: for relation_dump in event_dump['rel']: modifier_evid = event_dump['event_id'] head_evid = relation_dump['event_id'] JsonRelationBuilder()(modifier_evid, head_evid, relation_dump) logger.debug('Successfully created an EventGraph.') logger.debug(evg) return evg