Source code for mizani.scale

"""
According to *On the theory of scales of measurement* by **S.S. Stevens**,
scales can be classified in four ways -- *norminal*, *ordinal*,
*interval* and *ratio*. Using current(2016) terminology, *norminal* data
is made up of unordered categories, *ordinal* data is made up of ordered
categories and the two can be classified as *discrete*. On the other hand
both *interval* and *ratio* data are *continuous*.

The scale classes below show how the rest of the Mizani package can be
used to implement the two categories of scales. The key tasks are
*training* and *mapping* and these correspond to the **train** and
**map** methods.

To train a scale on data means, to make the scale learn the limits of
the data. This is elaborate (or worthy of a dedicated method) for two
reasons:

    - *Practical* -- data may be split up across more than one object,
      yet all will be represented by a single scale.
    - *Conceptual* -- training is a key action that may need to be inserted
      into multiple locations of the data processing pipeline before a
      graphic can be created.

To map data onto a scale means, to associate data values with
values(potential readings) on a scale. This is perhaps the most important
concept unpinning a scale.

The **apply** methods are simple examples of how to put it all together.
"""
import numpy as np
import pandas as pd
import pandas.api.types as pdtypes

from .bounds import censor, rescale
from .utils import CONTINUOUS_KINDS, DISCRETE_KINDS, min_max, match
from .utils import multitype_sort


__all__ = ['scale_continuous', 'scale_discrete']


[docs]class scale_continuous: """ Continuous scale """
[docs] @classmethod def apply(cls, x, palette, na_value=None, trans=None): """ Scale data continuously Parameters ---------- x : array_like Continuous values to scale palette : callable ``f(x)`` Palette to use na_value : object Value to use for missing values. trans : trans How to transform the data before scaling. If ``None``, no transformation is done. Returns ------- out : array_like Scaled values """ if trans is not None: x = trans.transform(x) limits = cls.train(x) return cls.map(x, palette, limits, na_value)
[docs] @classmethod def train(cls, new_data, old=None): """ Train a continuous scale Parameters ---------- new_data : array_like New values old : array_like Old range. Most likely a tuple of length 2. Returns ------- out : tuple Limits(range) of the scale """ if not len(new_data): return old if not hasattr(new_data, 'dtype'): new_data = np.asarray(new_data) if new_data.dtype.kind not in CONTINUOUS_KINDS: raise TypeError( "Discrete value supplied to continuous scale") if old is not None: new_data = np.hstack([new_data, old]) return min_max(new_data, na_rm=True, finite=True)
[docs] @classmethod def map(cls, x, palette, limits, na_value=None, oob=censor): """ Map values to a continuous palette Parameters ---------- x : array_like Continuous values to scale palette : callable ``f(x)`` palette to use na_value : object Value to use for missing values. oob : callable ``f(x)`` Function to deal with values that are beyond the limits Returns ------- out : array_like Values mapped onto a palette """ x = oob(rescale(x, _from=limits)) pal = palette(x) try: pal[pd.isnull(x)] = na_value except TypeError: pal = [v if not pd.isnull(v) else na_value for v in pal] return pal
[docs]class scale_discrete: """ Discrete scale """
[docs] @classmethod def apply(cls, x, palette, na_value=None): """ Scale data discretely Parameters ---------- x : array_like Discrete values to scale palette : callable ``f(x)`` Palette to use na_value : object Value to use for missing values. Returns ------- out : array_like Scaled values """ limits = cls.train(x) return cls.map(x, palette, limits, na_value)
[docs] @classmethod def train(cls, new_data, old=None, drop=False, na_rm=False): """ Train a continuous scale Parameters ---------- new_data : array_like New values old : array_like Old range. List of values known to the scale. drop : bool Whether to drop(not include) unused categories na_rm : bool If ``True``, remove missing values. Missing values are either ``NaN`` or ``None``. Returns ------- out : list Values covered by the scale """ if not len(new_data): return old if old is None: old = [] # Get the missing values (NaN & Nones) locations and remove them nan_bool_idx = pd.isnull(new_data) has_na = np.any(nan_bool_idx) if not hasattr(new_data, 'dtype'): new_data = np.asarray(new_data) new_data = new_data[~nan_bool_idx] if new_data.dtype.kind not in DISCRETE_KINDS: raise TypeError( "Continuous value supplied to discrete scale") # Train i.e. get the new values if pdtypes.is_categorical_dtype(new_data): try: new = list(new_data.cat.categories) # series except AttributeError: new = list(new_data.categories) # plain categorical if drop: present = set(new_data.drop_duplicates()) new = [i for i in new if i in present] else: try: new = np.unique(new_data) new.sort() except TypeError: # new_data probably has nans and other types new = list(set(new_data)) new = multitype_sort(new) # Add nan if required if has_na and not na_rm: new = np.hstack([new, np.nan]) # update old old_set = set(old) return list(old) + [i for i in new if (i not in old_set)]
[docs] @classmethod def map(cls, x, palette, limits, na_value=None): """ Map values to a discrete palette Parameters ---------- palette : callable ``f(x)`` palette to use x : array_like Continuous values to scale na_value : object Value to use for missing values. Returns ------- out : array_like Values mapped onto a palette """ n = len(limits) pal = palette(n)[match(x, limits)] try: pal[pd.isnull(x)] = na_value except TypeError: pal = [v if not pd.isnull(v) else na_value for v in pal] return pal