Source code for galahad.server.dataclasses

from typing import Any, Dict, List

from pydantic import BaseModel, Field


[docs]class Annotation(BaseModel):
    begin: int
    end: int
    features: Dict[str, Any] = Field(default_factory=dict)


Layer = List[Annotation]
Layers = Dict[str, Layer]


# Datasets


[docs]class DatasetList(BaseModel):
    names: List[str]

[docs]    class Config:
        schema_extra = {"example": {"names": ["dataset1", "dataset2", "dataset3"]}}


[docs]class Document(BaseModel):
    text: str  # Document text
    annotations: Dict[
        str, Layer
    ]  # The annotations in the document, one dict per type, start and end offsets index into `text`
    version: int = Field(default=0)  # Version of the document, needs to be monotonically increasing

[docs]    class Config:
        schema_extra = {
            "example": {
                "text": "Joe waited for the train . The train was late .",
                "version": 23,
                "annotations": {
                    "t.token": [
                        {"begin": 0, "end": 3},
                        {"begin": 4, "end": 10},
                        {"begin": 11, "end": 14},
                        {"begin": 15, "end": 18},
                        {"begin": 19, "end": 24},
                        {"begin": 25, "end": 26},
                        {"begin": 27, "end": 30},
                        {"begin": 31, "end": 36},
                        {"begin": 37, "end": 40},
                        {"begin": 41, "end": 45},
                        {"begin": 46, "end": 47},
                    ],
                    "t.sentence": [
                        {"begin": 0, "end": 26},
                        {"begin": 27, "end": 47},
                    ],
                    "t.named_entity": [
                        {"begin": 0, "end": 3, "features": {"f.value": "PER"}},
                    ],
                },
            }
        }


[docs]class DocumentList(BaseModel):
    names: List[str]
    versions: List[int]

[docs]    class Config:
        schema_extra = {
            "example": {"names": ["document1.xmi", "document2.txt", "document3.pdf"], "versions": [7, 6, 7]}
        }


# Classifier


[docs]class ClassifierInfo(BaseModel):
    name: str

[docs]    class Config:
        schema_extra = {"example": {"name": "ExampleClassifier"}}


# Training


[docs]class TrainingRequest(BaseModel):
    metadata: Dict[str, Any]


# Predicting


[docs]class PredictionRequest(BaseModel):
    metadata: Dict[str, Any]
    text: str
    data: Dict[str, Layer]

[docs]    class Config:
        schema_extra = {
            "example": {
                "metadata": {
                    "token_type": "t.token",
                    "target_type": "t.ner",
                    "target_feature": "label",
                },
                "text": "Joe waited for the train . The train was late .",
                "data": {
                    "t.token": [
                        {"begin": 0, "end": 3},
                        {"begin": 4, "end": 10},
                        {"begin": 11, "end": 14},
                        {"begin": 15, "end": 18},
                        {"begin": 19, "end": 24},
                        {"begin": 25, "end": 26},
                        {"begin": 27, "end": 30},
                        {"begin": 31, "end": 36},
                        {"begin": 37, "end": 40},
                        {"begin": 41, "end": 45},
                        {"begin": 46, "end": 47},
                    ],
                    "t.sentence": [
                        {"begin": 0, "end": 26},
                        {"begin": 27, "end": 47},
                    ],
                },
            }
        }


[docs]class PredictionResponse(BaseModel):
    data: Dict[str, Layer]

[docs]    class Config:
        schema_extra = {
            "example": {
                "data": {
                    "t.ner": [
                        {"begin": 0, "end": 3},
                    ]
                }
            }
        }