Source code for mizani.scale

"""
According to *On the theory of scales of measurement* by **S.S. Stevens**,
scales can be classified in four ways -- *nominal*, *ordinal*,
*interval* and *ratio*. Using current(2016) terminology, *nominal* data
is made up of unordered categories, *ordinal* data is made up of ordered
categories and the two can be classified as *discrete*. On the other hand
both *interval* and *ratio* data are *continuous*.

The scale classes below show how the rest of the Mizani package can be
used to implement the two categories of scales. The key tasks are
*training* and *mapping* and these correspond to the **train** and
**map** methods.

To train a scale on data means, to make the scale learn the limits of
the data. This is elaborate (or worthy of a dedicated method) for two
reasons:

    - *Practical* -- data may be split up across more than one object,
      yet all will be represented by a single scale.
    - *Conceptual* -- training is a key action that may need to be inserted
      into multiple locations of the data processing pipeline before a
      graphic can be created.

To map data onto a scale means, to associate data values with
values(potential readings) on a scale. This is perhaps the most important
concept unpinning a scale.

The **apply** methods are simple examples of how to put it all together.
"""

from __future__ import annotations

from typing import TYPE_CHECKING, cast

import numpy as np
import pandas as pd

from .bounds import censor, rescale
from .utils import (
    CONTINUOUS_KINDS,
    DISCRETE_KINDS,
    get_categories,
    has_dtype,
    match,
    min_max,
)

if TYPE_CHECKING:
    from typing import Any, Optional, Sequence, TypeVar

    from mizani.typing import (
        AnyArrayLike,
        Callable,
        ContinuousPalette,
        DiscretePalette,
        FloatArrayLike,
        NDArrayFloat,
        Trans,
        TupleFloat2,
    )

    TVector = TypeVar("TVector", NDArrayFloat, pd.Series[float])


__all__ = ["scale_continuous", "scale_discrete"]


[docs] class scale_continuous: """ Continuous scale """
[docs] @classmethod def apply( cls, x: FloatArrayLike, palette: ContinuousPalette, na_value: Any = None, trans: Optional[Trans] = None, ) -> NDArrayFloat: """ Scale data continuously Parameters ---------- x : array_like Continuous values to scale palette : callable ``f(x)`` Palette to use na_value : object Value to use for missing values. trans : trans How to transform the data before scaling. If ``None``, no transformation is done. Returns ------- out : array_like Scaled values """ if trans is not None: x = trans.transform(x) limits = cls.train(x) return cls.map(x, palette, limits, na_value)
[docs] @classmethod def train( cls, new_data: FloatArrayLike, old: Optional[TupleFloat2] = None ) -> TupleFloat2: """ Train a continuous scale Parameters ---------- new_data : array_like New values old : array_like Old range Returns ------- out : tuple Limits(range) of the scale """ if old is None: old = (-np.inf, np.inf) if not len(new_data): return old new_data = np.asarray(new_data) if new_data.dtype.kind not in CONTINUOUS_KINDS: raise TypeError("Discrete value supplied to continuous scale") new_data = np.hstack([new_data, old]) return min_max(new_data, na_rm=True, finite=True)
[docs] @classmethod def map( cls, x: FloatArrayLike, palette: ContinuousPalette, limits: TupleFloat2, na_value: Any = None, oob: Callable[[TVector], TVector] = censor, ) -> NDArrayFloat: """ Map values to a continuous palette Parameters ---------- x : array_like Continuous values to scale palette : callable ``f(x)`` palette to use na_value : object Value to use for missing values. oob : callable ``f(x)`` Function to deal with values that are beyond the limits Returns ------- out : array_like Values mapped onto a palette """ x = oob(rescale(x, _from=limits)) # pyright: ignore pal = np.asarray(palette(x)) pal[pd.isna(x)] = na_value return pal
[docs] class scale_discrete: """ Discrete scale """
[docs] @classmethod def apply( cls, x: AnyArrayLike, palette: DiscretePalette, na_value: Any = None, ): """ Scale data discretely Parameters ---------- x : array_like Discrete values to scale palette : callable ``f(x)`` Palette to use na_value : object Value to use for missing values. Returns ------- out : array_like Scaled values """ limits = cls.train(x) return cls.map(x, palette, limits, na_value)
[docs] @classmethod def train( cls, new_data: AnyArrayLike, old: Optional[Sequence[Any]] = None, drop: bool = False, na_rm: bool = False, ) -> Sequence[Any]: """ Train a continuous scale Parameters ---------- new_data : array_like New values old : array_like Old range. List of values known to the scale. drop : bool Whether to drop(not include) unused categories na_rm : bool If ``True``, remove missing values. Missing values are either ``NaN`` or ``None``. Returns ------- out : list Values covered by the scale """ old = [] if old is None else list(old) if not len(new_data): return old old_set = set(old) # Get the missing values (NaN & Nones) locations and remove them nan_bool_idx = pd.isna(new_data) # type: ignore has_na = np.any(nan_bool_idx) if not has_dtype(new_data): new_data = np.asarray(new_data) new_data = cast(np.ndarray, new_data) if new_data.dtype.kind not in DISCRETE_KINDS: raise TypeError("Continuous value supplied to discrete scale") new_data = new_data[~nan_bool_idx] # 1. Train i.e. get the new values # 2. Update old if isinstance(new_data.dtype, pd.CategoricalDtype): categories = get_categories(new_data) if drop: present = set(new_data) new = [i for i in categories if i in present] else: new = list(categories) all_set = old_set | set(new) ordered_cats = categories.union(old, sort=False) limits = [c for c in ordered_cats if c in all_set] else: new = np.unique(new_data) new.sort() limits = old + [i for i in new if (i not in old_set)] # Add nan if required has_na_limits = any(pd.isna(limits)) if not has_na_limits and not na_rm and has_na: limits.append(np.nan) return limits
[docs] @classmethod def map( cls, x: AnyArrayLike, palette: DiscretePalette, limits: Sequence[Any], na_value: Any = None, ) -> AnyArrayLike: """ Map values to a discrete palette Parameters ---------- palette : callable ``f(x)`` palette to use x : array_like Continuous values to scale na_value : object Value to use for missing values. Returns ------- out : array_like Values mapped onto a palette """ n = len(limits) pal = np.asarray(palette(n))[match(x, limits)] nas = pd.isna(x) # type: ignore try: pal[nas] = na_value except TypeError: pal = [na_value if isna else v for v, isna in zip(pal, nas)] return pal