Module dynamicio.metrics

A module responsible for metrics generation and logging.

Expand source code
"""A module responsible for metrics generation and logging."""
# pylint: disable=missing-function-docstring,missing-class-docstring
import json
import logging
import sys
from datetime import date, timedelta
from numbers import Number
from typing import Any, Dict, Mapping, Type, Union

import pandas as pd
from magic_logger import logger
from numpy import datetime64, timedelta64
from pythonjsonlogger import jsonlogger  # type: ignore

logHandler = logging.StreamHandler(sys.stdout)
formatter = jsonlogger.JsonFormatter()
logHandler.setFormatter(formatter)
logger.addHandler(logHandler)

__metrics__: Dict[str, Type["Metric"]] = {}


def get_metric(name: str) -> Type["Metric"]:
    return __metrics__[name]


def log_metric(dataset: str, column: str, metric: str, value: float):
    """Logs a metric in a structured way for a given dataset column.

    Args:
        dataset: The dataset for which the metric is logged
        column: Column for which the metric is logged
        metric: name fo the metric, e.g. "unique_vals"
        value: The metric's value, e.g. "10000"
    """
    logger.info(json.dumps({"message": "METRIC", "dataset": dataset, "column": column, "metric": metric, "value": float(value)}))


class Metric:
    """A base class for implementing metrics classes."""

    def __init__(self, dataset_name: str, df: pd.DataFrame, column: str):  # noqa
        self.dataset_name = dataset_name
        self.df = df
        self.column = column

    def __init_subclass__(cls):  # noqa
        __metrics__[cls.__name__] = cls
        assert "calculate_metric" in cls.__dict__

    def __call__(self) -> Any:  # noqa
        metric_value = self.calculate_metric()

        if isinstance(metric_value, Mapping):
            for entity in sorted(metric_value.keys()):  # pylint: disable=no-member
                column = metric_value[entity]  # pylint: disable=unsubscriptable-object
                log_metric(self.dataset_name, entity, self.metric_name, column)
        else:
            log_metric(dataset=self.dataset_name, column=self.column, metric=self.metric_name, value=metric_value)
        return metric_value

    @property
    def metric_name(self) -> str:
        """Retrieves the name of the metric from the class name.

        Returns:
            The name of the metric, e.g. "Min or Mean".
        """
        return self.__class__.__name__

    def calculate_metric(self) -> Any:
        """Dictates that subclasses need to implement this method.

        Returns:
            NotImplemented is returned if the method is not implemented, by the subclass
            inevitably pointing to the parent implementation.
        """
        return NotImplemented


class Min(Metric):
    """A metric instance that enables generating and returning the minimum value of a column."""

    def calculate_metric(self) -> Number:
        """Generate and return the minimum value of a column.

        Returns:
             The minimum value of a column.
        """
        return self.df[self.column].min()


class Max(Metric):
    """A metric instance that enables generating and returning the maximum value of a column."""

    def calculate_metric(self) -> Number:
        """Generate and return the maximum value of a column.

        Returns:
            The maximum value of a column.
        """
        return self.df[self.column].max()


class Mean(Metric):
    """A metric instance that enables generating and returning the mean value of a column."""

    def calculate_metric(self) -> float:
        """Generate and return the mean value of a column.

        Returns:
            The mean value of a column.
        """
        return self.df[self.column].mean()


class Std(Metric):
    """A metric instance that enables generating and returning the standard deviation of a column."""

    def calculate_metric(self) -> float:
        """Generate and return the standard deviation of a column.

        Returns:
            The standard deviation of a column.
        """
        return self.df[self.column].std()


class Variance(Metric):
    """A metric instance that generated and returns the variance of a column."""

    def calculate_metric(self) -> Union[str, bytes, date, timedelta, datetime64, timedelta64, int, float, complex]:
        """Generate and return the variance of a column.

        Returns:
            The variance of a column.
        """
        return self.df[self.column].var()


class Counts(Metric):
    """A metric instance that enables generating and returning the length of a column."""

    def calculate_metric(self) -> int:
        """Generate and return the length of a column.

        Returns:
            The length of a column.
        """
        return len(self.df[self.column])


class UniqueCounts(Metric):
    """A metric instance that enables generating and returning the unique values of a column."""

    def calculate_metric(self) -> int:
        """Generate and return the unique values of a column.

        Returns:
            The unique values of a column.
        """
        return len(self.df[self.column].unique())


class CountsPerLabel(Metric):
    """A metric instance that enables generating and returning the counts per label in a categorical column."""

    def calculate_metric(self) -> Mapping:
        """Generate and return the counts per label in a categorical column.

        Returns:
            The counts per label in a categorical column
        """
        column_vs_metric_value = self.df[self.column].value_counts().to_dict()
        label_vs_metric_value_with_column_prefix = {}
        for key in column_vs_metric_value.keys():
            new_key = self.column + "-" + key
            label_vs_metric_value_with_column_prefix[new_key] = column_vs_metric_value[key]
        return label_vs_metric_value_with_column_prefix

Functions

def get_metric(name: str) ‑> Type[Metric]
Expand source code
def get_metric(name: str) -> Type["Metric"]:
    return __metrics__[name]
def log_metric(dataset: str, column: str, metric: str, value: float)

Logs a metric in a structured way for a given dataset column.

Args

dataset
The dataset for which the metric is logged
column
Column for which the metric is logged
metric
name fo the metric, e.g. "unique_vals"
value
The metric's value, e.g. "10000"
Expand source code
def log_metric(dataset: str, column: str, metric: str, value: float):
    """Logs a metric in a structured way for a given dataset column.

    Args:
        dataset: The dataset for which the metric is logged
        column: Column for which the metric is logged
        metric: name fo the metric, e.g. "unique_vals"
        value: The metric's value, e.g. "10000"
    """
    logger.info(json.dumps({"message": "METRIC", "dataset": dataset, "column": column, "metric": metric, "value": float(value)}))

Classes

class Counts (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that enables generating and returning the length of a column.

Expand source code
class Counts(Metric):
    """A metric instance that enables generating and returning the length of a column."""

    def calculate_metric(self) -> int:
        """Generate and return the length of a column.

        Returns:
            The length of a column.
        """
        return len(self.df[self.column])

Ancestors

Methods

def calculate_metric(self) ‑> int

Generate and return the length of a column.

Returns

The length of a column.

Expand source code
def calculate_metric(self) -> int:
    """Generate and return the length of a column.

    Returns:
        The length of a column.
    """
    return len(self.df[self.column])

Inherited members

class CountsPerLabel (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that enables generating and returning the counts per label in a categorical column.

Expand source code
class CountsPerLabel(Metric):
    """A metric instance that enables generating and returning the counts per label in a categorical column."""

    def calculate_metric(self) -> Mapping:
        """Generate and return the counts per label in a categorical column.

        Returns:
            The counts per label in a categorical column
        """
        column_vs_metric_value = self.df[self.column].value_counts().to_dict()
        label_vs_metric_value_with_column_prefix = {}
        for key in column_vs_metric_value.keys():
            new_key = self.column + "-" + key
            label_vs_metric_value_with_column_prefix[new_key] = column_vs_metric_value[key]
        return label_vs_metric_value_with_column_prefix

Ancestors

Methods

def calculate_metric(self) ‑> Mapping[~KT, +VT_co]

Generate and return the counts per label in a categorical column.

Returns

The counts per label in a categorical column

Expand source code
def calculate_metric(self) -> Mapping:
    """Generate and return the counts per label in a categorical column.

    Returns:
        The counts per label in a categorical column
    """
    column_vs_metric_value = self.df[self.column].value_counts().to_dict()
    label_vs_metric_value_with_column_prefix = {}
    for key in column_vs_metric_value.keys():
        new_key = self.column + "-" + key
        label_vs_metric_value_with_column_prefix[new_key] = column_vs_metric_value[key]
    return label_vs_metric_value_with_column_prefix

Inherited members

class Max (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that enables generating and returning the maximum value of a column.

Expand source code
class Max(Metric):
    """A metric instance that enables generating and returning the maximum value of a column."""

    def calculate_metric(self) -> Number:
        """Generate and return the maximum value of a column.

        Returns:
            The maximum value of a column.
        """
        return self.df[self.column].max()

Ancestors

Methods

def calculate_metric(self) ‑> numbers.Number

Generate and return the maximum value of a column.

Returns

The maximum value of a column.

Expand source code
def calculate_metric(self) -> Number:
    """Generate and return the maximum value of a column.

    Returns:
        The maximum value of a column.
    """
    return self.df[self.column].max()

Inherited members

class Mean (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that enables generating and returning the mean value of a column.

Expand source code
class Mean(Metric):
    """A metric instance that enables generating and returning the mean value of a column."""

    def calculate_metric(self) -> float:
        """Generate and return the mean value of a column.

        Returns:
            The mean value of a column.
        """
        return self.df[self.column].mean()

Ancestors

Methods

def calculate_metric(self) ‑> float

Generate and return the mean value of a column.

Returns

The mean value of a column.

Expand source code
def calculate_metric(self) -> float:
    """Generate and return the mean value of a column.

    Returns:
        The mean value of a column.
    """
    return self.df[self.column].mean()

Inherited members

class Metric (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A base class for implementing metrics classes.

Expand source code
class Metric:
    """A base class for implementing metrics classes."""

    def __init__(self, dataset_name: str, df: pd.DataFrame, column: str):  # noqa
        self.dataset_name = dataset_name
        self.df = df
        self.column = column

    def __init_subclass__(cls):  # noqa
        __metrics__[cls.__name__] = cls
        assert "calculate_metric" in cls.__dict__

    def __call__(self) -> Any:  # noqa
        metric_value = self.calculate_metric()

        if isinstance(metric_value, Mapping):
            for entity in sorted(metric_value.keys()):  # pylint: disable=no-member
                column = metric_value[entity]  # pylint: disable=unsubscriptable-object
                log_metric(self.dataset_name, entity, self.metric_name, column)
        else:
            log_metric(dataset=self.dataset_name, column=self.column, metric=self.metric_name, value=metric_value)
        return metric_value

    @property
    def metric_name(self) -> str:
        """Retrieves the name of the metric from the class name.

        Returns:
            The name of the metric, e.g. "Min or Mean".
        """
        return self.__class__.__name__

    def calculate_metric(self) -> Any:
        """Dictates that subclasses need to implement this method.

        Returns:
            NotImplemented is returned if the method is not implemented, by the subclass
            inevitably pointing to the parent implementation.
        """
        return NotImplemented

Subclasses

Instance variables

var metric_name : str

Retrieves the name of the metric from the class name.

Returns

The name of the metric, e.g. "Min or Mean".

Expand source code
@property
def metric_name(self) -> str:
    """Retrieves the name of the metric from the class name.

    Returns:
        The name of the metric, e.g. "Min or Mean".
    """
    return self.__class__.__name__

Methods

def calculate_metric(self) ‑> Any

Dictates that subclasses need to implement this method.

Returns

NotImplemented is returned if the method is not implemented, by the subclass inevitably pointing to the parent implementation.

Expand source code
def calculate_metric(self) -> Any:
    """Dictates that subclasses need to implement this method.

    Returns:
        NotImplemented is returned if the method is not implemented, by the subclass
        inevitably pointing to the parent implementation.
    """
    return NotImplemented
class Min (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that enables generating and returning the minimum value of a column.

Expand source code
class Min(Metric):
    """A metric instance that enables generating and returning the minimum value of a column."""

    def calculate_metric(self) -> Number:
        """Generate and return the minimum value of a column.

        Returns:
             The minimum value of a column.
        """
        return self.df[self.column].min()

Ancestors

Methods

def calculate_metric(self) ‑> numbers.Number

Generate and return the minimum value of a column.

Returns

The minimum value of a column.

Expand source code
def calculate_metric(self) -> Number:
    """Generate and return the minimum value of a column.

    Returns:
         The minimum value of a column.
    """
    return self.df[self.column].min()

Inherited members

class Std (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that enables generating and returning the standard deviation of a column.

Expand source code
class Std(Metric):
    """A metric instance that enables generating and returning the standard deviation of a column."""

    def calculate_metric(self) -> float:
        """Generate and return the standard deviation of a column.

        Returns:
            The standard deviation of a column.
        """
        return self.df[self.column].std()

Ancestors

Methods

def calculate_metric(self) ‑> float

Generate and return the standard deviation of a column.

Returns

The standard deviation of a column.

Expand source code
def calculate_metric(self) -> float:
    """Generate and return the standard deviation of a column.

    Returns:
        The standard deviation of a column.
    """
    return self.df[self.column].std()

Inherited members

class UniqueCounts (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that enables generating and returning the unique values of a column.

Expand source code
class UniqueCounts(Metric):
    """A metric instance that enables generating and returning the unique values of a column."""

    def calculate_metric(self) -> int:
        """Generate and return the unique values of a column.

        Returns:
            The unique values of a column.
        """
        return len(self.df[self.column].unique())

Ancestors

Methods

def calculate_metric(self) ‑> int

Generate and return the unique values of a column.

Returns

The unique values of a column.

Expand source code
def calculate_metric(self) -> int:
    """Generate and return the unique values of a column.

    Returns:
        The unique values of a column.
    """
    return len(self.df[self.column].unique())

Inherited members

class Variance (dataset_name: str, df: pandas.core.frame.DataFrame, column: str)

A metric instance that generated and returns the variance of a column.

Expand source code
class Variance(Metric):
    """A metric instance that generated and returns the variance of a column."""

    def calculate_metric(self) -> Union[str, bytes, date, timedelta, datetime64, timedelta64, int, float, complex]:
        """Generate and return the variance of a column.

        Returns:
            The variance of a column.
        """
        return self.df[self.column].var()

Ancestors

Methods

def calculate_metric(self) ‑> Union[str, bytes, datetime.date, datetime.timedelta, numpy.datetime64, numpy.timedelta64, int, float, complex]

Generate and return the variance of a column.

Returns

The variance of a column.

Expand source code
def calculate_metric(self) -> Union[str, bytes, date, timedelta, datetime64, timedelta64, int, float, complex]:
    """Generate and return the variance of a column.

    Returns:
        The variance of a column.
    """
    return self.df[self.column].var()

Inherited members