Source code for mizani.bounds

"""
Continuous variables have values anywhere in the range minus
infinite to plus infinite. However, when creating a visual
representation of these values what usually matters is the
relative difference between the values. This is where rescaling
comes into play.

The values are mapped onto a range that a scale can deal with. For
graphical representation that range tends to be :math:`[0, 1]` or
:math:`[0, n]`, where :math:`n` is some number that makes the
plotted object overflow the plotting area.

Although a scale may be able handle the :math:`[0, n]` range, it
may be desirable to have a lower bound greater than zero. For
example, if data values get mapped to zero on a scale whose
graphical representation is the size/area/radius/length some data
will be invisible. The solution is to restrict the lower bound
e.g. :math:`[0.1, 1]`. Similarly you can restrict the upper bound
-- using these functions.
"""

from __future__ import annotations

import datetime
import sys
import typing
from copy import copy
from typing import overload

import numpy as np
import pandas as pd

from .utils import get_null_value, is_vector

if typing.TYPE_CHECKING:
    from typing import Any, Optional, Sequence

    from mizani.typing import (
        FloatArrayLike,
        FloatSeries,
        NDArrayFloat,
        TupleFloat2,
        TupleFloat4,
    )


__all__ = [
    "censor",
    "expand_range",
    "rescale",
    "rescale_max",
    "rescale_mid",
    "squish_infinite",
    "zero_range",
    "expand_range_distinct",
    "squish",
]

EPSILON = sys.float_info.epsilon


[docs] def rescale( x: FloatArrayLike, to: TupleFloat2 = (0, 1), _from: Optional[TupleFloat2] = None, ) -> NDArrayFloat: """ Rescale numeric vector to have specified minimum and maximum. Parameters ---------- x : array_like | numeric 1D vector of values to manipulate. to : tuple output range (numeric vector of length two) _from : tuple input range (numeric vector of length two). If not given, is calculated from the range of x Returns ------- out : array_like Rescaled values Examples -------- >>> x = [0, 2, 4, 6, 8, 10] >>> rescale(x) array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]) >>> rescale(x, to=(0, 2)) array([0. , 0.4, 0.8, 1.2, 1.6, 2. ]) >>> rescale(x, to=(0, 2), _from=(0, 20)) array([0. , 0.2, 0.4, 0.6, 0.8, 1. ]) """ __from = (np.min(x), np.max(x)) if _from is None else _from return np.interp(x, __from, to)
[docs] def rescale_mid( x: FloatArrayLike, to: TupleFloat2 = (0, 1), _from: Optional[TupleFloat2] = None, mid: float = 0, ) -> NDArrayFloat: """ Rescale numeric vector to have specified minimum, midpoint, and maximum. Parameters ---------- x : array_like 1D vector of values to manipulate. to : tuple output range (numeric vector of length two) _from : tuple input range (numeric vector of length two). If not given, is calculated from the range of x mid : numeric mid-point of input range Returns ------- out : array_like Rescaled values Examples -------- >>> rescale_mid([1, 2, 3], mid=1) array([0.5 , 0.75, 1. ]) >>> rescale_mid([1, 2, 3], mid=2) array([0. , 0.5, 1. ]) """ __from: NDArrayFloat = np.array( (np.min(x), np.max(x)) if _from is None else _from ) if zero_range(__from) or zero_range(to): # type: ignore out = np.repeat(np.mean(to), len(x)) else: extent = 2 * np.max(np.abs(__from - mid)) out = (np.asarray(x) - mid) / extent * np.diff(to) + np.mean(to) return out
[docs] def rescale_max( x: FloatArrayLike, to: TupleFloat2 = (0, 1), _from: Optional[TupleFloat2] = None, ) -> NDArrayFloat: """ Rescale numeric vector to have specified maximum. Parameters ---------- x : array_like 1D vector of values to manipulate. to : tuple output range (numeric vector of length two) _from : tuple input range (numeric vector of length two). If not given, is calculated from the range of x. Only the 2nd (max) element is essential to the output. Returns ------- out : array_like Rescaled values Examples -------- >>> x = np.array([0, 2, 4, 6, 8, 10]) >>> rescale_max(x, (0, 3)) array([0. , 0.6, 1.2, 1.8, 2.4, 3. ]) Only the 2nd (max) element of the parameters ``to`` and ``_from`` are essential to the output. >>> rescale_max(x, (1, 3)) array([0. , 0.6, 1.2, 1.8, 2.4, 3. ]) >>> rescale_max(x, (0, 20)) array([ 0., 4., 8., 12., 16., 20.]) If :python:`max(x) < _from[1]` then values will be scaled beyond the requested maximum (:python:`to[1]`). >>> rescale_max(x, to=(1, 3), _from=(-1, 6)) array([0., 1., 2., 3., 4., 5.]) If the values are the same, they taken on the requested maximum. This includes an array of all zeros. >>> rescale_max(np.array([5, 5, 5])) array([1., 1., 1.]) >>> rescale_max(np.array([0, 0, 0])) array([1, 1, 1]) """ x = np.asarray(x) if _from is None: _from = np.min(x), np.max(x) # type: ignore assert _from is not None # type narrowing if np.any(x < 0): out = rescale(x, (0, to[1]), _from) elif np.all(x == 0) and _from[1] == 0: out = np.repeat(to[1], len(x)) else: out = x / _from[1] * to[1] return out
[docs] def squish_infinite( x: FloatArrayLike, range: TupleFloat2 = (0, 1) ) -> NDArrayFloat: """ Truncate infinite values to a range. Parameters ---------- x : array_like Values that should have infinities squished. range : tuple The range onto which to squish the infinites. Must be of size 2. Returns ------- out : array_like Values with infinites squished. Examples -------- >>> arr1 = np.array([0, .5, .25, np.inf, .44]) >>> arr2 = np.array([0, -np.inf, .5, .25, np.inf]) >>> list(squish_infinite(arr1)) [0.0, 0.5, 0.25, 1.0, 0.44] >>> list(squish_infinite(arr2, (-10, 9))) [0.0, -10.0, 0.5, 0.25, 9.0] """ _x = np.asarray(x) _x[np.isneginf(_x)] = range[0] _x[np.isposinf(_x)] = range[1] return _x
[docs] def squish( x: FloatArrayLike, range: TupleFloat2 = (0, 1), only_finite: bool = True ) -> NDArrayFloat: """ Squish values into range. Parameters ---------- x : array_like Values that should have out of range values squished. range : tuple The range onto which to squish the values. only_finite: boolean When true, only squishes finite values. Returns ------- out : array_like Values with out of range values squished. Examples -------- >>> list(squish([-1.5, 0.2, 0.8, 1.0, 1.2])) [0.0, 0.2, 0.8, 1.0, 1.0] >>> list(squish([-np.inf, -1.5, 0.2, 0.8, 1.0, np.inf], only_finite=False)) [0.0, 0.0, 0.2, 0.8, 1.0, 1.0] """ _x = np.asarray(x) finite = np.isfinite(_x) if only_finite else True _x[np.logical_and(_x < range[0], finite)] = range[0] _x[np.logical_and(_x > range[1], finite)] = range[1] return _x
@overload def censor( x: NDArrayFloat | Sequence[float], range: TupleFloat2 = (0, 1), only_finite: bool = True, ) -> NDArrayFloat: ... @overload def censor( x: FloatSeries, range: TupleFloat2 = (0, 1), only_finite: bool = True ) -> FloatSeries: ...
[docs] def censor( x: NDArrayFloat | Sequence[float] | FloatSeries, range: TupleFloat2 = (0, 1), only_finite: bool = True, ) -> NDArrayFloat | FloatSeries: """ Convert any values outside of range to a **NULL** type object. Parameters ---------- x : array_like Values to manipulate range : tuple (min, max) giving desired output range only_finite : bool If True (the default), will only modify finite values. Returns ------- x : array_like Censored array Examples -------- >>> a = np.array([1, 2, np.inf, 3, 4, -np.inf, 5]) >>> list(censor(a, (0, 10))) [1.0, 2.0, inf, 3.0, 4.0, -inf, 5.0] >>> list(censor(a, (0, 10), False)) [1.0, 2.0, nan, 3.0, 4.0, nan, 5.0] >>> list(censor(a, (2, 4))) [nan, 2.0, inf, 3.0, 4.0, -inf, nan] Notes ----- All values in ``x`` should be of the same type. ``only_finite`` parameter is not considered for Datetime and Timedelta types. The **NULL** type object depends on the type of values in **x**. - :class:`float` - :py:`float('nan')` - :class:`int` - :py:`float('nan')` - :class:`datetime.datetime` : :py:`np.datetime64(NaT)` - :class:`datetime.timedelta` : :py:`np.timedelta64(NaT)` """ if not len(x): return np.array([]) if not is_vector(x): x = np.asarray(x) null = get_null_value(x) if only_finite: try: finite = np.isfinite(x) except TypeError: finite = np.repeat(True, len(x)) else: finite = np.repeat(True, len(x)) # Ignore RuntimeWarning when x contains nans with np.errstate(invalid="ignore"): outside = (x < range[0]) | (x > range[1]) bool_idx = finite & outside res = copy(x) if bool_idx.any(): if res.dtype.kind == "i": res = np.asarray(res, dtype=float) res[bool_idx] = null return res
[docs] def zero_range(x: tuple[Any, Any], tol: float = EPSILON * 100) -> bool: """ Determine if range of vector is close to zero. Parameters ---------- x : array_like Value(s) to check. If it is an array_like, it should be of length 2. tol : float Tolerance. Default tolerance is the `machine epsilon`_ times :math:`10^2`. Returns ------- out : bool Whether ``x`` has zero range. Examples -------- >>> zero_range([1, 1]) True >>> zero_range([1, 2]) False >>> zero_range([1, 2], tol=2) True .. _machine epsilon: https://en.wikipedia.org/wiki/Machine_epsilon """ if x[0] > x[1]: x = x[1], x[0] # datetime - pandas, cpython if isinstance(x[0], (pd.Timestamp, datetime.datetime)): from mizani._core.dates import datetime_to_num l, h = datetime_to_num(x) return l == h # datetime - numpy elif isinstance(x[0], np.datetime64): return x[0] == x[1] # timedelta - pandas, cpython elif isinstance(x[0], (pd.Timedelta, datetime.timedelta)): return x[0].total_seconds() == x[1].total_seconds() # timedelta - numpy elif isinstance(x[0], np.timedelta64): return x[0] == x[1] elif not isinstance(x[0], (float, int, np.number)): raise TypeError( "zero_range objects cannot work with objects " "of type '{}'".format(type(x[0])) ) else: low, high = x if any(np.isnan((low, high))): return True if low == high: return True if any(np.isinf((low, high))): return False low_abs = np.abs(low) if low_abs == 0: return False return ((high - low) / low_abs) < tol
[docs] def expand_range( range: TupleFloat2, mul: float = 0, add: float = 0, zero_width: float = 1 ) -> TupleFloat2: """ Expand a range with a multiplicative or additive constant Parameters ---------- range : tuple Range of data. Size 2. mul : int | float Multiplicative constant add : int | float | timedelta Additive constant zero_width : int | float | timedelta Distance to use if range has zero width Returns ------- out : tuple Expanded range Examples -------- >>> expand_range((3, 8)) (3, 8) >>> expand_range((0, 10), mul=0.1) (-1.0, 11.0) >>> expand_range((0, 10), add=2) (-2, 12) >>> expand_range((0, 10), mul=.1, add=2) (-3.0, 13.0) >>> expand_range((0, 1)) (0, 1) When the range has zero width >>> expand_range((5, 5)) (4.5, 5.5) Notes ----- If expanding *datetime* or *timedelta* types, **add** and **zero_width** must be suitable *timedeltas* i.e. You should not mix types between **Numpy**, **Pandas** and the :mod:`datetime` module. """ x = range low, high = x # The expansion cases if zero_range(x): new = low - zero_width / 2, low + zero_width / 2 else: dx = (high - low) * mul + add new = low - dx, high + dx return new
[docs] def expand_range_distinct( range: TupleFloat2, expand: TupleFloat2 | TupleFloat4 = (0, 0, 0, 0), zero_width: float = 1, ) -> TupleFloat2: """ Expand a range with a multiplicative or additive constants Similar to :func:`expand_range` but both sides of the range expanded using different constants Parameters ---------- range : tuple Range of data. Size 2 expand : tuple Length 2 or 4. If length is 2, then the same constants are used for both sides. If length is 4 then the first two are are the Multiplicative (*mul*) and Additive (*add*) constants for the lower limit, and the second two are the constants for the upper limit. zero_width : int | float | timedelta Distance to use if range has zero width Returns ------- out : tuple Expanded range Examples -------- >>> expand_range_distinct((3, 8)) (3, 8) >>> expand_range_distinct((0, 10), (0.1, 0)) (-1.0, 11.0) >>> expand_range_distinct((0, 10), (0.1, 0, 0.1, 0)) (-1.0, 11.0) >>> expand_range_distinct((0, 10), (0.1, 0, 0, 0)) (-1.0, 10) >>> expand_range_distinct((0, 10), (0, 2)) (-2, 12) >>> expand_range_distinct((0, 10), (0, 2, 0, 2)) (-2, 12) >>> expand_range_distinct((0, 10), (0, 0, 0, 2)) (0, 12) >>> expand_range_distinct((0, 10), (.1, 2)) (-3.0, 13.0) >>> expand_range_distinct((0, 10), (.1, 2, .1, 2)) (-3.0, 13.0) >>> expand_range_distinct((0, 10), (0, 0, .1, 2)) (0, 13.0) """ if len(expand) == 2: low_mul = high_mul = expand[0] low_add = high_add = expand[1] else: low_mul, low_add, high_mul, high_add = expand lower = expand_range(range, low_mul, low_add, zero_width)[0] upper = expand_range(range, high_mul, high_add, zero_width)[1] return (lower, upper)