Module dynamicio.config.pydantic.table_schema
This module defines Config schema for data source (pandas dataframe)
Expand source code
# pylint: disable=no-member, no-self-argument, unused-argument
"""This module defines Config schema for data source (pandas dataframe)"""
import enum
from typing import Mapping, Sequence
import pydantic
from pandas.core.dtypes.common import pandas_dtype
@enum.unique
class MetricsName(str, enum.Enum):
"""The list of valid metrics names."""
# pylint: disable=invalid-name
min = "Min"
max = "Max"
mean = "Mean"
stddev = "Std"
variance = "Variance"
counts = "Counts"
counts_per_label = "CountsPerLabel"
unique_counts = "UniqueCounts"
class ColumnValidationBase(pydantic.BaseModel):
"""A single column validator."""
name: str
apply: bool
options: Mapping[str, object]
class SchemaColumn(pydantic.BaseModel):
"""Definition os a single data source column."""
name: str
data_type: str = pydantic.Field(alias="type")
validations: Sequence[ColumnValidationBase] = pydantic.Field(default_factory=list)
metrics: Sequence[MetricsName] = ()
@pydantic.validator("data_type")
def is_valid_pandas_type(cls, info):
"""Checks that the data_type is understood by pandas."""
try:
pandas_dtype(info)
except TypeError:
raise ValueError(f"Unexpected data type {info}") from None
return info
@pydantic.validator("validations", pre=True)
def remap_validations(cls, info):
"""Remap the yaml structure of {validation_type: <params>} to a list with validation_type as a key"""
if not isinstance(info, dict):
raise ValueError(f"{info!r} should be a dict")
out = []
for (key, params) in info.items():
new_el = params.copy()
new_el.update({"name": key})
out.append(new_el)
return out
@pydantic.validator("metrics", pre=True, always=True)
def validate_metrics(cls, info):
"""Remap any false-ish `metrics` value to an empty list."""
if info:
out = info
else:
out = []
return out
class DataframeSchema(pydantic.BaseModel):
"""Pydantic model describing the tabular data provided by the data source."""
name: str
columns: Mapping[str, SchemaColumn]
@pydantic.validator("columns", pre=True)
def supply_column_names(cls, info):
"""Tell each column its name (the key it is listed under)"""
if not isinstance(info, Mapping):
raise ValueError(f"{info!r} shoudl be a dict.")
return {str(col_name): {**{"name": str(col_name)}, **col_data} for (col_name, col_data) in info.items()}
@property
def validations(self) -> Mapping[str, Sequence[ColumnValidationBase]]:
"""A short-hand property to access the validators for each column."""
return {col_name: col.validations for (col_name, col) in self.columns.items()}
@property
def metrics(self) -> Mapping[str, Sequence[MetricsName]]:
"""A short-hand property to access the metrics for each column."""
return {col_name: col.metrics for (col_name, col) in self.columns.items()}
@property
def column_names(self) -> Sequence[str]:
"""Property providing the list of all column names."""
return tuple(self.columns.keys())
Classes
class ColumnValidationBase (**data: Any)
-
A single column validator.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError
][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.self
is explicitly positional-only to allowself
as a field name.Expand source code
class ColumnValidationBase(pydantic.BaseModel): """A single column validator.""" name: str apply: bool options: Mapping[str, object]
Ancestors
- pydantic.main.BaseModel
Class variables
var apply : bool
var model_computed_fields
var model_config
var model_fields
var name : str
var options : Mapping[str, object]
class DataframeSchema (**data: Any)
-
Pydantic model describing the tabular data provided by the data source.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError
][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.self
is explicitly positional-only to allowself
as a field name.Expand source code
class DataframeSchema(pydantic.BaseModel): """Pydantic model describing the tabular data provided by the data source.""" name: str columns: Mapping[str, SchemaColumn] @pydantic.validator("columns", pre=True) def supply_column_names(cls, info): """Tell each column its name (the key it is listed under)""" if not isinstance(info, Mapping): raise ValueError(f"{info!r} shoudl be a dict.") return {str(col_name): {**{"name": str(col_name)}, **col_data} for (col_name, col_data) in info.items()} @property def validations(self) -> Mapping[str, Sequence[ColumnValidationBase]]: """A short-hand property to access the validators for each column.""" return {col_name: col.validations for (col_name, col) in self.columns.items()} @property def metrics(self) -> Mapping[str, Sequence[MetricsName]]: """A short-hand property to access the metrics for each column.""" return {col_name: col.metrics for (col_name, col) in self.columns.items()} @property def column_names(self) -> Sequence[str]: """Property providing the list of all column names.""" return tuple(self.columns.keys())
Ancestors
- pydantic.main.BaseModel
Class variables
var columns : Mapping[str, SchemaColumn]
var model_computed_fields
var model_config
var model_fields
var name : str
Static methods
def supply_column_names(info)
-
Tell each column its name (the key it is listed under)
Expand source code
@pydantic.validator("columns", pre=True) def supply_column_names(cls, info): """Tell each column its name (the key it is listed under)""" if not isinstance(info, Mapping): raise ValueError(f"{info!r} shoudl be a dict.") return {str(col_name): {**{"name": str(col_name)}, **col_data} for (col_name, col_data) in info.items()}
Instance variables
var column_names : Sequence[str]
-
Property providing the list of all column names.
Expand source code
@property def column_names(self) -> Sequence[str]: """Property providing the list of all column names.""" return tuple(self.columns.keys())
var metrics : Mapping[str, Sequence[MetricsName]]
-
A short-hand property to access the metrics for each column.
Expand source code
@property def metrics(self) -> Mapping[str, Sequence[MetricsName]]: """A short-hand property to access the metrics for each column.""" return {col_name: col.metrics for (col_name, col) in self.columns.items()}
var validations : Mapping[str, Sequence[ColumnValidationBase]]
-
A short-hand property to access the validators for each column.
Expand source code
@property def validations(self) -> Mapping[str, Sequence[ColumnValidationBase]]: """A short-hand property to access the validators for each column.""" return {col_name: col.validations for (col_name, col) in self.columns.items()}
class MetricsName (value, names=None, *, module=None, qualname=None, type=None, start=1)
-
The list of valid metrics names.
Expand source code
class MetricsName(str, enum.Enum): """The list of valid metrics names.""" # pylint: disable=invalid-name min = "Min" max = "Max" mean = "Mean" stddev = "Std" variance = "Variance" counts = "Counts" counts_per_label = "CountsPerLabel" unique_counts = "UniqueCounts"
Ancestors
- builtins.str
- enum.Enum
Class variables
var counts
var counts_per_label
var max
var mean
var min
var stddev
var unique_counts
var variance
class SchemaColumn (**data: Any)
-
Definition os a single data source column.
Create a new model by parsing and validating input data from keyword arguments.
Raises [
ValidationError
][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.self
is explicitly positional-only to allowself
as a field name.Expand source code
class SchemaColumn(pydantic.BaseModel): """Definition os a single data source column.""" name: str data_type: str = pydantic.Field(alias="type") validations: Sequence[ColumnValidationBase] = pydantic.Field(default_factory=list) metrics: Sequence[MetricsName] = () @pydantic.validator("data_type") def is_valid_pandas_type(cls, info): """Checks that the data_type is understood by pandas.""" try: pandas_dtype(info) except TypeError: raise ValueError(f"Unexpected data type {info}") from None return info @pydantic.validator("validations", pre=True) def remap_validations(cls, info): """Remap the yaml structure of {validation_type: <params>} to a list with validation_type as a key""" if not isinstance(info, dict): raise ValueError(f"{info!r} should be a dict") out = [] for (key, params) in info.items(): new_el = params.copy() new_el.update({"name": key}) out.append(new_el) return out @pydantic.validator("metrics", pre=True, always=True) def validate_metrics(cls, info): """Remap any false-ish `metrics` value to an empty list.""" if info: out = info else: out = [] return out
Ancestors
- pydantic.main.BaseModel
Class variables
var data_type : str
var metrics : Sequence[MetricsName]
var model_computed_fields
var model_config
var model_fields
var name : str
var validations : Sequence[ColumnValidationBase]
Static methods
def is_valid_pandas_type(info)
-
Checks that the data_type is understood by pandas.
Expand source code
@pydantic.validator("data_type") def is_valid_pandas_type(cls, info): """Checks that the data_type is understood by pandas.""" try: pandas_dtype(info) except TypeError: raise ValueError(f"Unexpected data type {info}") from None return info
def remap_validations(info)
-
Remap the yaml structure of {validation_type:
} to a list with validation_type as a key Expand source code
@pydantic.validator("validations", pre=True) def remap_validations(cls, info): """Remap the yaml structure of {validation_type: <params>} to a list with validation_type as a key""" if not isinstance(info, dict): raise ValueError(f"{info!r} should be a dict") out = [] for (key, params) in info.items(): new_el = params.copy() new_el.update({"name": key}) out.append(new_el) return out
def validate_metrics(info)
-
Remap any false-ish
metrics
value to an empty list.Expand source code
@pydantic.validator("metrics", pre=True, always=True) def validate_metrics(cls, info): """Remap any false-ish `metrics` value to an empty list.""" if info: out = info else: out = [] return out