Module dynamicio.config.pydantic.table_schema

This module defines Config schema for data source (pandas dataframe)

Expand source code
# pylint: disable=no-member, no-self-argument, unused-argument

"""This module defines Config schema for data source (pandas dataframe)"""

import enum
from typing import Mapping, Sequence

import pydantic
from pandas.core.dtypes.common import pandas_dtype


@enum.unique
class MetricsName(str, enum.Enum):
    """The list of valid metrics names."""

    # pylint: disable=invalid-name
    min = "Min"
    max = "Max"
    mean = "Mean"
    stddev = "Std"
    variance = "Variance"
    counts = "Counts"
    counts_per_label = "CountsPerLabel"
    unique_counts = "UniqueCounts"


class ColumnValidationBase(pydantic.BaseModel):
    """A single column validator."""

    name: str
    apply: bool
    options: Mapping[str, object]


class SchemaColumn(pydantic.BaseModel):
    """Definition os a single data source column."""

    name: str
    data_type: str = pydantic.Field(alias="type")
    validations: Sequence[ColumnValidationBase] = pydantic.Field(default_factory=list)
    metrics: Sequence[MetricsName] = ()

    @pydantic.validator("data_type")
    def is_valid_pandas_type(cls, info):
        """Checks that the data_type is understood by pandas."""
        try:
            pandas_dtype(info)
        except TypeError:
            raise ValueError(f"Unexpected data type {info}") from None
        return info

    @pydantic.validator("validations", pre=True)
    def remap_validations(cls, info):
        """Remap the yaml structure of {validation_type: <params>} to a list with validation_type as a key"""
        if not isinstance(info, dict):
            raise ValueError(f"{info!r} should be a dict")
        out = []
        for (key, params) in info.items():
            new_el = params.copy()
            new_el.update({"name": key})
            out.append(new_el)
        return out

    @pydantic.validator("metrics", pre=True, always=True)
    def validate_metrics(cls, info):
        """Remap any false-ish `metrics` value to an empty list."""
        if info:
            out = info
        else:
            out = []
        return out


class DataframeSchema(pydantic.BaseModel):
    """Pydantic model describing the tabular data provided by the data source."""

    name: str
    columns: Mapping[str, SchemaColumn]

    @pydantic.validator("columns", pre=True)
    def supply_column_names(cls, info):
        """Tell each column its name (the key it is listed under)"""
        if not isinstance(info, Mapping):
            raise ValueError(f"{info!r} shoudl be a dict.")

        return {str(col_name): {**{"name": str(col_name)}, **col_data} for (col_name, col_data) in info.items()}

    @property
    def validations(self) -> Mapping[str, Sequence[ColumnValidationBase]]:
        """A short-hand property to access the validators for each column."""
        return {col_name: col.validations for (col_name, col) in self.columns.items()}

    @property
    def metrics(self) -> Mapping[str, Sequence[MetricsName]]:
        """A short-hand property to access the metrics for each column."""
        return {col_name: col.metrics for (col_name, col) in self.columns.items()}

    @property
    def column_names(self) -> Sequence[str]:
        """Property providing the list of all column names."""
        return tuple(self.columns.keys())

Classes

class ColumnValidationBase (**data: Any)

A single column validator.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code
class ColumnValidationBase(pydantic.BaseModel):
    """A single column validator."""

    name: str
    apply: bool
    options: Mapping[str, object]

Ancestors

  • pydantic.main.BaseModel

Class variables

var apply : bool
var model_computed_fields
var model_config
var model_fields
var name : str
var options : Mapping[str, object]
class DataframeSchema (**data: Any)

Pydantic model describing the tabular data provided by the data source.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code
class DataframeSchema(pydantic.BaseModel):
    """Pydantic model describing the tabular data provided by the data source."""

    name: str
    columns: Mapping[str, SchemaColumn]

    @pydantic.validator("columns", pre=True)
    def supply_column_names(cls, info):
        """Tell each column its name (the key it is listed under)"""
        if not isinstance(info, Mapping):
            raise ValueError(f"{info!r} shoudl be a dict.")

        return {str(col_name): {**{"name": str(col_name)}, **col_data} for (col_name, col_data) in info.items()}

    @property
    def validations(self) -> Mapping[str, Sequence[ColumnValidationBase]]:
        """A short-hand property to access the validators for each column."""
        return {col_name: col.validations for (col_name, col) in self.columns.items()}

    @property
    def metrics(self) -> Mapping[str, Sequence[MetricsName]]:
        """A short-hand property to access the metrics for each column."""
        return {col_name: col.metrics for (col_name, col) in self.columns.items()}

    @property
    def column_names(self) -> Sequence[str]:
        """Property providing the list of all column names."""
        return tuple(self.columns.keys())

Ancestors

  • pydantic.main.BaseModel

Class variables

var columns : Mapping[str, SchemaColumn]
var model_computed_fields
var model_config
var model_fields
var name : str

Static methods

def supply_column_names(info)

Tell each column its name (the key it is listed under)

Expand source code
@pydantic.validator("columns", pre=True)
def supply_column_names(cls, info):
    """Tell each column its name (the key it is listed under)"""
    if not isinstance(info, Mapping):
        raise ValueError(f"{info!r} shoudl be a dict.")

    return {str(col_name): {**{"name": str(col_name)}, **col_data} for (col_name, col_data) in info.items()}

Instance variables

var column_names : Sequence[str]

Property providing the list of all column names.

Expand source code
@property
def column_names(self) -> Sequence[str]:
    """Property providing the list of all column names."""
    return tuple(self.columns.keys())
var metrics : Mapping[str, Sequence[MetricsName]]

A short-hand property to access the metrics for each column.

Expand source code
@property
def metrics(self) -> Mapping[str, Sequence[MetricsName]]:
    """A short-hand property to access the metrics for each column."""
    return {col_name: col.metrics for (col_name, col) in self.columns.items()}
var validations : Mapping[str, Sequence[ColumnValidationBase]]

A short-hand property to access the validators for each column.

Expand source code
@property
def validations(self) -> Mapping[str, Sequence[ColumnValidationBase]]:
    """A short-hand property to access the validators for each column."""
    return {col_name: col.validations for (col_name, col) in self.columns.items()}
class MetricsName (value, names=None, *, module=None, qualname=None, type=None, start=1)

The list of valid metrics names.

Expand source code
class MetricsName(str, enum.Enum):
    """The list of valid metrics names."""

    # pylint: disable=invalid-name
    min = "Min"
    max = "Max"
    mean = "Mean"
    stddev = "Std"
    variance = "Variance"
    counts = "Counts"
    counts_per_label = "CountsPerLabel"
    unique_counts = "UniqueCounts"

Ancestors

  • builtins.str
  • enum.Enum

Class variables

var counts
var counts_per_label
var max
var mean
var min
var stddev
var unique_counts
var variance
class SchemaColumn (**data: Any)

Definition os a single data source column.

Create a new model by parsing and validating input data from keyword arguments.

Raises [ValidationError][pydantic_core.ValidationError] if the input data cannot be validated to form a valid model.

self is explicitly positional-only to allow self as a field name.

Expand source code
class SchemaColumn(pydantic.BaseModel):
    """Definition os a single data source column."""

    name: str
    data_type: str = pydantic.Field(alias="type")
    validations: Sequence[ColumnValidationBase] = pydantic.Field(default_factory=list)
    metrics: Sequence[MetricsName] = ()

    @pydantic.validator("data_type")
    def is_valid_pandas_type(cls, info):
        """Checks that the data_type is understood by pandas."""
        try:
            pandas_dtype(info)
        except TypeError:
            raise ValueError(f"Unexpected data type {info}") from None
        return info

    @pydantic.validator("validations", pre=True)
    def remap_validations(cls, info):
        """Remap the yaml structure of {validation_type: <params>} to a list with validation_type as a key"""
        if not isinstance(info, dict):
            raise ValueError(f"{info!r} should be a dict")
        out = []
        for (key, params) in info.items():
            new_el = params.copy()
            new_el.update({"name": key})
            out.append(new_el)
        return out

    @pydantic.validator("metrics", pre=True, always=True)
    def validate_metrics(cls, info):
        """Remap any false-ish `metrics` value to an empty list."""
        if info:
            out = info
        else:
            out = []
        return out

Ancestors

  • pydantic.main.BaseModel

Class variables

var data_type : str
var metrics : Sequence[MetricsName]
var model_computed_fields
var model_config
var model_fields
var name : str
var validations : Sequence[ColumnValidationBase]

Static methods

def is_valid_pandas_type(info)

Checks that the data_type is understood by pandas.

Expand source code
@pydantic.validator("data_type")
def is_valid_pandas_type(cls, info):
    """Checks that the data_type is understood by pandas."""
    try:
        pandas_dtype(info)
    except TypeError:
        raise ValueError(f"Unexpected data type {info}") from None
    return info
def remap_validations(info)

Remap the yaml structure of {validation_type: } to a list with validation_type as a key

Expand source code
@pydantic.validator("validations", pre=True)
def remap_validations(cls, info):
    """Remap the yaml structure of {validation_type: <params>} to a list with validation_type as a key"""
    if not isinstance(info, dict):
        raise ValueError(f"{info!r} should be a dict")
    out = []
    for (key, params) in info.items():
        new_el = params.copy()
        new_el.update({"name": key})
        out.append(new_el)
    return out
def validate_metrics(info)

Remap any false-ish metrics value to an empty list.

Expand source code
@pydantic.validator("metrics", pre=True, always=True)
def validate_metrics(cls, info):
    """Remap any false-ish `metrics` value to an empty list."""
    if info:
        out = info
    else:
        out = []
    return out