Source code for galahad.server.annotations

from collections import defaultdict
from typing import Any, Dict, List, Tuple

from sortedcontainers import SortedKeyList

from galahad.server.dataclasses import Annotation, Document


[docs]class Annotations: def __init__(self, text: str): self._text = text self._index: Dict[str, SortedKeyList] = defaultdict(lambda: SortedKeyList(key=_sort_func))
[docs] @staticmethod def from_dict(text: str, annotations: Dict[str, List[Annotation]]) -> "Annotations": result = Annotations(text) for type_name, annotations_for_type in annotations.items(): result._index[type_name].update(annotations_for_type) return result
[docs] @staticmethod def from_document(document: Document) -> "Annotations": result = Annotations(document.text) for type_name, annotations in document.annotations.items(): for annotation in annotations: result.create_annotation(type_name, annotation.begin, annotation.end, annotation.features) return result
[docs] def to_dict(self) -> Dict[str, List[Dict[str, Any]]]: result = defaultdict(list) for type_name, annotations in self._index.items(): for annotation in annotations: result[type_name].append( {"begin": annotation.begin, "end": annotation.end, "features": annotation.features} ) return result
[docs] def get_annotations(self) -> Dict[str, List[Annotation]]: return {type_name: list(annos) for type_name, annos in self._index.items()}
[docs] def create_annotation(self, type_name, begin: int, end: int, features: Dict[str, Any] = None) -> Annotation: if features is None: features = {} annotation = Annotation(begin=begin, end=end, features=features) self._index[type_name].add(annotation) return annotation
[docs] def get_covered_text(self, annotation: Annotation) -> str: return self._text[annotation.begin : annotation.end]
[docs] def select(self, type_name: str) -> List[Annotation]: return list(self._index[type_name])
[docs] def select_covered(self, type_name: str, covering_annotation: Annotation) -> List[Annotation]: """Returns a list of covered annotations. Return all annotations that are covered Only returns annotations that are fully covered, overlapping annotations are ignored. Args: type_name: The type name of the annotations to be returned. covering_annotation: The name of the annotation which covers. Returns: A list of covered annotations """ c_begin = covering_annotation.begin c_end = covering_annotation.end result = [] for annotation in self._get_feature_structures_in_range(type_name, c_begin, c_end): if annotation.begin >= c_begin and annotation.end <= c_end: result.append(annotation) return result
def _get_feature_structures_in_range(self, type_name: str, begin: int, end: int) -> List[Annotation]: """Returns a list of all feature structures of type `type_name`. Only features are returned that are in [begin, end] or close to it. If you use this function, you should always check bound in the calling method. """ annotations = self._index[type_name] # We use binary search to find indices for the first and last annotations that are inside # the window of [begin, end]. idx_begin = annotations.bisect_key_left((begin, begin)) idx_end = annotations.bisect_key_right((end, end)) return annotations[idx_begin:idx_end] @property def text(self) -> str: return self._text
def _sort_func(a: Annotation) -> Tuple[int, int]: return a.begin, a.end