Source code for mizani.bounds

"""
Continuous variables have values anywhere in the range minus
infinite to plus infinite. However, when creating a visual
representation of these values what usually matters is the
relative difference between the values. This is where rescaling
comes into play.

The values are mapped onto a range that a scale can deal with. For
graphical representation that range tends to be :math:`[0, 1]` or
:math:`[0, n]`, where :math:`n` is some number that makes the
plotted object overflow the plotting area.

Although a scale may be able handle the :math:`[0, n]` range, it
may be desirable to have a lower bound greater than zero. For
example, if data values get mapped to zero on a scale whose
graphical representation is the size/area/radius/length some data
will be invisible. The solution is to restrict the lower bound
e.g. :math:`[0.1, 1]`. Similarly you can restrict the upper bound
-- using these functions.
"""

import datetime

import numpy as np
import pandas as pd
import pandas.api.types as pdtypes

from .utils import first_element

__all__ = [
    "censor",
    "expand_range",
    "rescale",
    "rescale_max",
    "rescale_mid",
    "squish_infinite",
    "zero_range",
    "expand_range_distinct",
    "squish",
]


[docs]def rescale(x, to=(0, 1), _from=None): """ Rescale numeric vector to have specified minimum and maximum. Parameters ---------- x : array_like | numeric 1D vector of values to manipulate. to : tuple output range (numeric vector of length two) _from : tuple input range (numeric vector of length two). If not given, is calculated from the range of x Returns ------- out : array_like Rescaled values Examples -------- >>> x = [0, 2, 4, 6, 8, 10] >>> rescale(x) array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]) >>> rescale(x, to=(0, 2)) array([0. , 0.4, 0.8, 1.2, 1.6, 2. ]) >>> rescale(x, to=(0, 2), _from=(0, 20)) array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]) """ if _from is None: _from = np.min(x), np.max(x) return np.interp(x, _from, to)
[docs]def rescale_mid(x, to=(0, 1), _from=None, mid=0): """ Rescale numeric vector to have specified minimum, midpoint, and maximum. Parameters ---------- x : array_like | numeric 1D vector of values to manipulate. to : tuple output range (numeric vector of length two) _from : tuple input range (numeric vector of length two). If not given, is calculated from the range of x mid : numeric mid-point of input range Returns ------- out : array_like Rescaled values Examples -------- >>> rescale_mid([1, 2, 3], mid=1) array([0.5 , 0.75, 1. ]) >>> rescale_mid([1, 2, 3], mid=2) array([0. , 0.5, 1. ]) """ array_like = True try: len(x) except TypeError: array_like = False x = [x] if not hasattr(x, "dtype"): x = np.asarray(x) if _from is None: _from = np.array([np.min(x), np.max(x)]) else: _from = np.asarray(_from) if zero_range(_from) or zero_range(to): out = np.repeat(np.mean(to), len(x)) else: extent = 2 * np.max(np.abs(_from - mid)) out = (x - mid) / extent * np.diff(to) + np.mean(to) if not array_like: out = out[0] return out
[docs]def rescale_max(x, to=(0, 1), _from=None): """ Rescale numeric vector to have specified maximum. Parameters ---------- x : array_like | numeric 1D vector of values to manipulate. to : tuple output range (numeric vector of length two) _from : tuple input range (numeric vector of length two). If not given, is calculated from the range of x. Only the 2nd (max) element is essential to the output. Returns ------- out : array_like Rescaled values Examples -------- >>> x = [0, 2, 4, 6, 8, 10] >>> rescale_max(x, (0, 3)) array([0. , 0.6, 1.2, 1.8, 2.4, 3. ]) Only the 2nd (max) element of the parameters ``to`` and ``_from`` are essential to the output. >>> rescale_max(x, (1, 3)) array([0. , 0.6, 1.2, 1.8, 2.4, 3. ]) >>> rescale_max(x, (0, 20)) array([ 0., 4., 8., 12., 16., 20.]) If :python:`max(x) < _from[1]` then values will be scaled beyond the requested maximum (:python:`to[1]`). >>> rescale_max(x, to=(1, 3), _from=(-1, 6)) array([0., 1., 2., 3., 4., 5.]) If the values are the same, they taken on the requested maximum. This includes an array of all zeros. >>> rescale_max([5, 5, 5]) array([1., 1., 1.]) >>> rescale_max([0, 0, 0]) array([1, 1, 1]) """ array_like = True try: len(x) except TypeError: array_like = False x = [x] if not hasattr(x, "dtype"): x = np.asarray(x) if _from is None: _from = (np.min(x), np.max(x)) if np.any(x < 0): out = rescale(x, (0, to[1]), _from) elif np.all(x == 0) and _from[1] == 0: out = np.repeat(to[1], len(x)) else: out = x / _from[1] * to[1] if not array_like: out = out[0] return out
[docs]def squish_infinite(x, range=(0, 1)): """ Truncate infinite values to a range. Parameters ---------- x : array_like Values that should have infinities squished. range : tuple The range onto which to squish the infinites. Must be of size 2. Returns ------- out : array_like Values with infinites squished. Examples -------- >>> squish_infinite([0, .5, .25, np.inf, .44]) [0.0, 0.5, 0.25, 1.0, 0.44] >>> squish_infinite([0, -np.inf, .5, .25, np.inf], (-10, 9)) [0.0, -10.0, 0.5, 0.25, 9.0] """ xtype = type(x) if not hasattr(x, "dtype"): x = np.asarray(x) x[x == -np.inf] = range[0] x[x == np.inf] = range[1] if not isinstance(x, xtype): x = xtype(x) return x
[docs]def squish(x, range=(0, 1), only_finite=True): """ Squish values into range. Parameters ---------- x : array_like Values that should have out of range values squished. range : tuple The range onto which to squish the values. only_finite: boolean When true, only squishes finite values. Returns ------- out : array_like Values with out of range values squished. Examples -------- >>> squish([-1.5, 0.2, 0.5, 0.8, 1.0, 1.2]) [0.0, 0.2, 0.5, 0.8, 1.0, 1.0] >>> squish([-np.inf, -1.5, 0.2, 0.5, 0.8, 1.0, np.inf], only_finite=False) [0.0, 0.0, 0.2, 0.5, 0.8, 1.0, 1.0] """ xtype = type(x) if not hasattr(x, "dtype"): x = np.asarray(x) finite = np.isfinite(x) if only_finite else True x[np.logical_and(x < range[0], finite)] = range[0] x[np.logical_and(x > range[1], finite)] = range[1] if not isinstance(x, xtype): x = xtype(x) return x
[docs]def censor(x, range=(0, 1), only_finite=True): """ Convert any values outside of range to a **NULL** type object. Parameters ---------- x : array_like Values to manipulate range : tuple (min, max) giving desired output range only_finite : bool If True (the default), will only modify finite values. Returns ------- x : array_like Censored array Examples -------- >>> a = [1, 2, np.inf, 3, 4, -np.inf, 5] >>> censor(a, (0, 10)) [1, 2, inf, 3, 4, -inf, 5] >>> censor(a, (0, 10), False) [1, 2, nan, 3, 4, nan, 5] >>> censor(a, (2, 4)) [nan, 2, inf, 3, 4, -inf, nan] Notes ----- All values in ``x`` should be of the same type. ``only_finite`` parameter is not considered for Datetime and Timedelta types. The **NULL** type object depends on the type of values in **x**. - :class:`float` - :py:`float('nan')` - :class:`int` - :py:`float('nan')` - :class:`datetime.datetime` : :py:`np.datetime64(NaT)` - :class:`datetime.timedelta` : :py:`np.timedelta64(NaT)` """ if not len(x): return x py_time_types = (datetime.datetime, datetime.timedelta) np_pd_time_types = ( pd.Timestamp, pd.Timedelta, np.datetime64, np.timedelta64, ) x0 = first_element(x) # Yes, we want type not isinstance if type(x0) in py_time_types: return _censor_with(x, range, "NaT") if not hasattr(x, "dtype") and isinstance(x0, np_pd_time_types): return _censor_with(x, range, type(x0)("NaT")) x_array = np.asarray(x) if pdtypes.is_number(x0) and not isinstance(x0, np.timedelta64): null = float("nan") elif isinstance(x0, pd.Timestamp): null = pd.Timestamp("NaT") elif pdtypes.is_datetime64_dtype(x_array): null = np.datetime64("NaT") elif isinstance(x0, pd.Timedelta): null = pd.Timedelta("NaT") elif pdtypes.is_timedelta64_dtype(x_array): null = np.timedelta64("NaT") else: raise ValueError( "Do not know how to censor values of type " "{}".format(type(x0)) ) if only_finite: try: finite = np.isfinite(x) except TypeError: finite = np.repeat(True, len(x)) else: finite = np.repeat(True, len(x)) if hasattr(x, "dtype"): # Ignore RuntimeWarning when x contains nans with np.errstate(invalid="ignore"): outside = (x < range[0]) | (x > range[1]) bool_idx = finite & outside x = x.copy() x[bool_idx] = null else: x = [ null if not range[0] <= val <= range[1] and f else val for val, f in zip(x, finite) ] return x
def _censor_with(x, range, value=None): """ Censor any values outside of range with ``None`` """ return [val if range[0] <= val <= range[1] else value for val in x]
[docs]def zero_range(x, tol=np.finfo(float).eps * 100): """ Determine if range of vector is close to zero. Parameters ---------- x : array_like | numeric Value(s) to check. If it is an array_like, it should be of length 2. tol : float Tolerance. Default tolerance is the `machine epsilon`_ times :math:`10^2`. Returns ------- out : bool Whether ``x`` has zero range. Examples -------- >>> zero_range([1, 1]) True >>> zero_range([1, 2]) False >>> zero_range([1, 2], tol=2) True .. _machine epsilon: https://en.wikipedia.org/wiki/Machine_epsilon """ try: if len(x) == 1: return True except TypeError: return True if len(x) != 2: raise ValueError("x must be length 1 or 2") # Also deals with array_likes that have non-standard indices x = sorted(x) low, high = x # datetime - pandas, cpython if isinstance(low, (pd.Timestamp, datetime.datetime)): from matplotlib.dates import date2num # date2num include timezone info, .toordinal() does not low, high = date2num(x) # datetime - numpy elif isinstance(low, np.datetime64): return low == high # timedelta - pandas, cpython elif isinstance(low, (pd.Timedelta, datetime.timedelta)): low, high = low.total_seconds(), high.total_seconds() # timedelta - numpy elif isinstance(low, np.timedelta64): return low == high elif not isinstance(low, (float, int, np.number)): raise TypeError( "zero_range objects cannot work with objects " "of type '{}'".format(type(low)) ) if any(np.isnan((low, high))): return np.nan if low == high: return True if any(np.isinf((low, high))): return False low_abs = np.abs(low) if low_abs == 0: return False return ((high - low) / low_abs) < tol
[docs]def expand_range(range, mul=0, add=0, zero_width=1): """ Expand a range with a multiplicative or additive constant Parameters ---------- range : tuple Range of data. Size 2. mul : int | float Multiplicative constant add : int | float | timedelta Additive constant zero_width : int | float | timedelta Distance to use if range has zero width Returns ------- out : tuple Expanded range Examples -------- >>> expand_range((3, 8)) (3, 8) >>> expand_range((0, 10), mul=0.1) (-1.0, 11.0) >>> expand_range((0, 10), add=2) (-2, 12) >>> expand_range((0, 10), mul=.1, add=2) (-3.0, 13.0) >>> expand_range((0, 1)) (0, 1) When the range has zero width >>> expand_range((5, 5)) (4.5, 5.5) Notes ----- If expanding *datetime* or *timedelta* types, **add** and **zero_width** must be suitable *timedeltas* i.e. You should not mix types between **Numpy**, **Pandas** and the :mod:`datetime` module. """ x = range # Enforce tuple try: x[0] except TypeError: x = (x, x) # The expansion cases if zero_range(x): new = x[0] - zero_width / 2, x[0] + zero_width / 2 else: dx = (x[1] - x[0]) * mul + add new = x[0] - dx, x[1] + dx return new
[docs]def expand_range_distinct(range, expand=(0, 0, 0, 0), zero_width=1): """ Expand a range with a multiplicative or additive constants Similar to :func:`expand_range` but both sides of the range expanded using different constants Parameters ---------- range : tuple Range of data. Size 2 expand : tuple Length 2 or 4. If length is 2, then the same constants are used for both sides. If length is 4 then the first two are are the Multiplicative (*mul*) and Additive (*add*) constants for the lower limit, and the second two are the constants for the upper limit. zero_width : int | float | timedelta Distance to use if range has zero width Returns ------- out : tuple Expanded range Examples -------- >>> expand_range_distinct((3, 8)) (3, 8) >>> expand_range_distinct((0, 10), (0.1, 0)) (-1.0, 11.0) >>> expand_range_distinct((0, 10), (0.1, 0, 0.1, 0)) (-1.0, 11.0) >>> expand_range_distinct((0, 10), (0.1, 0, 0, 0)) (-1.0, 10) >>> expand_range_distinct((0, 10), (0, 2)) (-2, 12) >>> expand_range_distinct((0, 10), (0, 2, 0, 2)) (-2, 12) >>> expand_range_distinct((0, 10), (0, 0, 0, 2)) (0, 12) >>> expand_range_distinct((0, 10), (.1, 2)) (-3.0, 13.0) >>> expand_range_distinct((0, 10), (.1, 2, .1, 2)) (-3.0, 13.0) >>> expand_range_distinct((0, 10), (0, 0, .1, 2)) (0, 13.0) """ if len(expand) == 2: expand = tuple(expand) * 2 lower = expand_range(range, expand[0], expand[1], zero_width)[0] upper = expand_range(range, expand[2], expand[3], zero_width)[1] return (lower, upper)