Source code for galahad.server.dataclasses

from typing import Any, Dict, List

from pydantic import BaseModel, Field


[docs]class Annotation(BaseModel): begin: int end: int features: Dict[str, Any] = Field(default_factory=dict)
Layer = List[Annotation] Layers = Dict[str, Layer] # Datasets
[docs]class DatasetList(BaseModel): names: List[str]
[docs] class Config: schema_extra = {"example": {"names": ["dataset1", "dataset2", "dataset3"]}}
[docs]class Document(BaseModel): text: str # Document text annotations: Dict[ str, Layer ] # The annotations in the document, one dict per type, start and end offsets index into `text` version: int = Field(default=0) # Version of the document, needs to be monotonically increasing
[docs] class Config: schema_extra = { "example": { "text": "Joe waited for the train . The train was late .", "version": 23, "annotations": { "t.token": [ {"begin": 0, "end": 3}, {"begin": 4, "end": 10}, {"begin": 11, "end": 14}, {"begin": 15, "end": 18}, {"begin": 19, "end": 24}, {"begin": 25, "end": 26}, {"begin": 27, "end": 30}, {"begin": 31, "end": 36}, {"begin": 37, "end": 40}, {"begin": 41, "end": 45}, {"begin": 46, "end": 47}, ], "t.sentence": [ {"begin": 0, "end": 26}, {"begin": 27, "end": 47}, ], "t.named_entity": [ {"begin": 0, "end": 3, "features": {"f.value": "PER"}}, ], }, } }
[docs]class DocumentList(BaseModel): names: List[str] versions: List[int]
[docs] class Config: schema_extra = { "example": {"names": ["document1.xmi", "document2.txt", "document3.pdf"], "versions": [7, 6, 7]} }
# Classifier
[docs]class ClassifierInfo(BaseModel): name: str
[docs] class Config: schema_extra = {"example": {"name": "ExampleClassifier"}}
# Training
[docs]class TrainingRequest(BaseModel): metadata: Dict[str, Any]
# Predicting
[docs]class PredictionRequest(BaseModel): metadata: Dict[str, Any] text: str data: Dict[str, Layer]
[docs] class Config: schema_extra = { "example": { "metadata": { "token_type": "t.token", "target_type": "t.ner", "target_feature": "label", }, "text": "Joe waited for the train . The train was late .", "data": { "t.token": [ {"begin": 0, "end": 3}, {"begin": 4, "end": 10}, {"begin": 11, "end": 14}, {"begin": 15, "end": 18}, {"begin": 19, "end": 24}, {"begin": 25, "end": 26}, {"begin": 27, "end": 30}, {"begin": 31, "end": 36}, {"begin": 37, "end": 40}, {"begin": 41, "end": 45}, {"begin": 46, "end": 47}, ], "t.sentence": [ {"begin": 0, "end": 26}, {"begin": 27, "end": 47}, ], }, } }
[docs]class PredictionResponse(BaseModel): data: Dict[str, Layer]
[docs] class Config: schema_extra = { "example": { "data": { "t.ner": [ {"begin": 0, "end": 3}, ] } } }