Source code for galahad.server.contrib.ner.spacy_ner

from typing import Optional

try:
    import spacy as spacy
    from spacy.tokens import Doc
except ImportError as error:
    print("Could not import 'spacy', please install it manually via 'pip install spacy'")

from galahad.formats import Span, build_span_classification_response
from galahad.server.annotations import Annotations
from galahad.server.classifier import (AnnotationFeatures, AnnotationTypes,
                                       Classifier)
from galahad.server.dataclasses import Document


[docs]class SpacyNerTagger(Classifier): def __init__(self, model_name: str): super().__init__() self._token_type = AnnotationTypes.TOKEN.value self._target_feature = AnnotationFeatures.VALUE.value self._model = spacy.load(model_name, disable=["parser"])
[docs] def predict(self, model_id: str, document: Document) -> Optional[Document]: # Extract the tokens from the document and create a spacy doc from it annotations = Annotations.from_dict(document.text, document.annotations) words = [annotations.get_covered_text(token) for token in annotations.select(self._token_type)] doc = Doc(self._model.vocab, words=words) # Find the named entities self._model.get_pipe("ner")(doc) # For every entity returned by spacy, create an annotation in the resulting doc spans = [] for named_entity in doc.ents: spans.append(Span(named_entity.start, named_entity.end, named_entity.label_)) return build_span_classification_response(document, spans)