Source code for pyknp_eventgraph.sentence

from logging import getLogger
from typing import List, Optional, TYPE_CHECKING

from pyknp import BList, Tag

from pyknp_eventgraph.builder import Builder
from pyknp_eventgraph.component import Component
from pyknp_eventgraph.event import Event, EventBuilder
from pyknp_eventgraph.helper import convert_mrphs_to_surf

if TYPE_CHECKING:
    from pyknp_eventgraph.document import Document

logger = getLogger(__name__)


[docs]class Sentence(Component): """A sentence is a collection of events. Attributes: document (Document): A document that includes this sentence. sid (str): An original sentence ID. ssid (int): A serial sentence ID. blist (:class:`pyknp.knp.blist.BList`, optional): A list of bunsetsu-s. events (List[Event]): A list of events in this sentence. """ def __init__(self, document: 'Document', sid: str, ssid: int, blist: Optional[BList] = None): self.document: Document = document self.sid: str = sid self.ssid: int = ssid self.blist: BList = blist self.events: List[Event] = [] self._mrphs = None self._reps = None @property def surf(self) -> str: """A surface string.""" return convert_mrphs_to_surf(self.mrphs) @property def mrphs(self) -> str: """A tokenized surface string.""" if self._mrphs is None: self._mrphs = ' '.join(m.midasi for m in self.blist.mrph_list()) return self._mrphs @property def reps(self) -> str: """A representative string.""" if self._reps is None: self._reps = ' '.join(m.repname or f'{m.midasi}/{m.midasi}' for m in self.blist.mrph_list()) return self._reps
[docs] def to_dict(self) -> dict: """Convert this object into a dictionary.""" return dict(sid=self.sid, ssid=self.ssid, surf=self.surf, mrphs=self.mrphs, reps=self.reps)
[docs] def to_string(self) -> str: """Convert this object into a string.""" return f'<Sentence, sid: {self.sid}, ssid: {self.ssid}, surf: {self.surf}>'
class SentenceBuilder(Builder): def __call__(self, document: 'Document', blist: BList) -> Sentence: logger.debug('Create a sentence.') sentence = Sentence(document, blist.sid, Builder.ssid, blist) start: Optional[Tag] = None end: Optional[Tag] = None head: Optional[Tag] = None for tag in blist.tag_list(): if not start: start = tag if not head and '節-主辞' in tag.features: head = tag if not end and '節-区切' in tag.features: end = tag if head: EventBuilder()(sentence, start, head, end) start, end, head = None, None, None document.sentences.append(sentence) Builder.ssid += 1 # Make this sentence and its components accessible from builders. for bid, bnst in enumerate(blist.bnst_list()): for tag in bnst.tag_list(): Builder.stid_bid_map[(sentence.ssid, tag.tag_id)] = bid Builder.stid_tag_map[(sentence.ssid, tag.tag_id)] = tag logger.debug('Successfully created a sentence.') return sentence class JsonSentenceBuilder(Builder): def __call__(self, document: 'Document', dump: dict) -> Sentence: logger.debug('Create a sentence.') sentence = Sentence(document, dump['sid'], dump['ssid']) sentence._mrphs = dump['mrphs'] sentence._reps = dump['reps'] document.sentences.append(sentence) Builder.ssid += 1 logger.debug('Successfully created a sentence.') return sentence