Source code for galahad.server.contrib.pos.spacy_pos

from typing import Optional

try:
    import spacy as spacy
    from spacy.tokens import Doc
except ImportError as error:
    print("Could not import 'spacy', please install it manually via 'pip install spacy'")

from galahad.formats import build_token_labeling_response
from galahad.server.annotations import Annotations
from galahad.server.classifier import (AnnotationFeatures, AnnotationTypes,
                                       Classifier)
from galahad.server.dataclasses import Document


[docs]class SpacyPosTagger(Classifier): def __init__(self, model_name: str): super().__init__() self._token_type = AnnotationTypes.TOKEN.value self._target_feature = AnnotationFeatures.VALUE.value self._model = spacy.load(model_name, disable=["parser"])
[docs] def predict(self, model_id: str, document: Document) -> Optional[Document]: # Extract the tokens from the document and create a spacy doc from it annotations = Annotations.from_dict(document.text, document.annotations) words = [annotations.get_covered_text(token) for token in annotations.select(self._token_type)] spacy_doc = Doc(self._model.vocab, words=words) self._model.get_pipe("tok2vec")(spacy_doc) self._model.get_pipe("tagger")(spacy_doc) list_of_pos_tags = [] for i in range(len(spacy_doc)): list_of_pos_tags.append(spacy_doc[i].tag_) return build_token_labeling_response(document, list_of_pos_tags)