Source code for dataio.validate.contracts.models

from __future__ import annotations

from datetime import UTC, datetime
from enum import StrEnum
from typing import Any, Literal

from pydantic import BaseModel, ConfigDict, Field, model_validator


[docs] class DatasetKind(StrEnum): TABULAR = "tabular" GEOJSON = "geojson"
[docs] class ManifestField(BaseModel): model_config = ConfigDict(extra="allow") type: str description: str | None = None nullable: bool = True format: str | None = None allowedValues: list[Any] | None = None enumRef: str | None = None range: list[float] | None = None min: float | None = None max: float | None = None comments: str | list[str] | None = None
[docs] @model_validator(mode="after") def validate_contract(self) -> ManifestField: if self.range is not None and (self.min is not None or self.max is not None): raise ValueError("range cannot be combined with min or max") if self.type == "enum" and self.allowedValues is None and self.enumRef is None: raise ValueError("enum fields must define allowedValues or enumRef") if self.type in {"date", "dateTime"} and not self.format: raise ValueError(f"{self.type} fields must define format") if self.type in {"date", "dateTime"} and self.format: if "%" not in self.format: raise ValueError( f"{self.type} format must use strftime directives like %Y or %Y-%m-%d" ) sample = datetime(2024, 3, 13, 12, 30, 45, tzinfo=UTC) try: rendered = sample.strftime(self.format) datetime.strptime(rendered, self.format) except ValueError as exc: raise ValueError( f"{self.type} format must be a valid strftime format" ) from exc if self.type == "dateTime" and self.format and "%z" not in self.format: raise ValueError("dateTime formats must include timezone information via %z") return self
[docs] class ManifestTable(BaseModel): model_config = ConfigDict(extra="allow") description: str | None = None path: str | None = None required: bool = True dataDictionary: dict[str, ManifestField] = Field(default_factory=dict)
[docs] class EnumValueDefinition(BaseModel): model_config = ConfigDict(extra="allow") description: str | None = None
[docs] class EnumDefinition(BaseModel): model_config = ConfigDict(extra="allow") description: str | None = None values: dict[str, EnumValueDefinition] = Field(default_factory=dict)
[docs] class DatasetManifest(BaseModel): model_config = ConfigDict(extra="allow") metadataSpecVersion: str datasetTitle: str datasetSlug: str datasetDescription: str source: Any category: dict[str, Any] collection: dict[str, Any] datasetID: str | None = None datasetKind: DatasetKind = DatasetKind.TABULAR datasetTables: dict[str, ManifestTable] = Field(default_factory=dict) enumDefinitions: dict[str, EnumDefinition] = Field(default_factory=dict) specs: list[str] = Field(default_factory=list)
[docs] @model_validator(mode="after") def validate_shape(self) -> DatasetManifest: if "ID" not in self.category or "name" not in self.category: raise ValueError("category must define ID and name") if "ID" not in self.collection or "name" not in self.collection: raise ValueError("collection must define ID and name") if self.datasetKind == DatasetKind.TABULAR and not self.datasetTables: raise ValueError("tabular manifests must define datasetTables") return self
[docs] class ValidationRequest(BaseModel): model_config = ConfigDict(arbitrary_types_allowed=True) dataset_kind: DatasetKind manifest_source: str | bytes | dict[str, Any] | DatasetManifest data: str | bytes | dict[str, Any] | None = None data_files: dict[str, str] = Field(default_factory=dict) enabled_specs: list[str] = Field(default_factory=list) strict: bool = False deep_check: bool = False validate_data: bool = True full_scan: bool = True max_rows: int | None = None extra_column_policy: Literal["warn", "error", "ignore"] = "warn"