Source code for mizani.formatters

"""
Scales have guides and these are what help users make sense of
the data mapped onto the scale. Common examples of guides include
the x-axis, the y-axis, the keyed legend and a colorbar legend.
The guides have demarcations(breaks), some of which must be labelled.

The `*_format` functions below create functions that convert data
values as understood by a specific scale and return string
representations of those values. Manipulating the string
representation of a value helps improve readability of the guide.
"""
import re
from bisect import bisect_right

try:
    from zoneinfo import ZoneInfo
except ImportError:
    # python < 3.9
    from backports.zoneinfo import ZoneInfo

import numpy as np

from .breaks import timedelta_helper
from .utils import (
    get_timezone,
    match,
    precision,
    round_any,
    same_log10_order_of_magnitude,
)

__all__ = [
    "comma_format",
    "custom_format",
    "currency_format",
    "dollar_format",
    "percent_format",
    "scientific_format",
    "date_format",
    "mpl_format",
    "log_format",
    "timedelta_format",
    "pvalue_format",
    "ordinal_format",
    "number_bytes_format",
]


[docs]class custom_format: """ Custom format Parameters ---------- fmt : str, optional Format string. Default is the generic new style format braces, ``{}``. style : 'new' | 'old' Whether to use new style or old style formatting. New style uses the :meth:`str.format` while old style uses ``%``. The format string must be written accordingly. Examples -------- >>> formatter = custom_format('{:.2f} USD') >>> formatter([3.987, 2, 42.42]) ['3.99 USD', '2.00 USD', '42.42 USD'] """ def __init__(self, fmt="{}", style="new"): self.fmt = fmt self.style = style
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ if self.style == "new": return [self.fmt.format(val) for val in x] elif self.style == "old": return [self.fmt % val for val in x] else: raise ValueError("style should be either 'new' or 'old'")
# formatting functions
[docs]class currency_format: """ Currency formatter Parameters ---------- prefix : str What to put before the value. suffix : str What to put after the value. digits : int Number of significant digits big_mark : str The thousands separator. This is usually a comma or a dot. Examples -------- >>> x = [1.232, 99.2334, 4.6, 9, 4500] >>> currency_format()(x) ['$1.23', '$99.23', '$4.60', '$9.00', '$4500.00'] >>> currency_format('C$', digits=0, big_mark=',')(x) ['C$1', 'C$99', 'C$5', 'C$9', 'C$4,500'] """ def __init__(self, prefix="$", suffix="", digits=2, big_mark=""): self.prefix = prefix self.suffix = suffix self.digits = digits self.big_mark = big_mark
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ # create {:.2f} or {:,.2f} big_mark = self.big_mark comma = "," if big_mark else "" tpl = "".join( ( self.prefix, "{:", comma, ".", str(self.digits), "f}", self.suffix, ) ) labels = [tpl.format(val) for val in x] if big_mark and big_mark != ",": labels = [val.replace(",", big_mark) for val in labels] return labels
dollar_format = currency_format dollar = dollar_format()
[docs]class comma_format: """ Format number with commas separating thousands Parameters ---------- digits : int Number of digits after the decimal point. Examples -------- >>> comma_format()([1000, 2, 33000, 400]) ['1,000', '2', '33,000', '400'] """ def __init__(self, digits=0): self.formatter = currency_format( prefix="", digits=digits, big_mark="," )
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ return self.formatter(x)
[docs]class percent_format: """ Percent formatter Multiply by one hundred and display percent sign Parameters ---------- use_comma : bool If True, use a comma to separate the thousands. Default is False. Examples -------- >>> formatter = percent_format() >>> formatter([.45, 9.515, .01]) ['45%', '952%', '1%'] >>> formatter([.654, .8963, .1]) ['65.4%', '89.6%', '10.0%'] """ def __init__(self, use_comma=False): self.big_mark = "," if use_comma else ""
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ if len(x) == 0: return [] _precision = precision(x) x = round_any(x, _precision / 100) * 100 # When the precision is less than 1, we show if _precision > 1: digits = 0 else: digits = abs(int(np.log10(_precision))) formatter = currency_format( prefix="", suffix="%", digits=digits, big_mark=self.big_mark ) labels = formatter(x) # Remove unnecessary zeros after the decimal pattern = re.compile(r"\.0+%$") if all(pattern.search(val) for val in labels): labels = [pattern.sub("%", val) for val in labels] return labels
percent = percent_format()
[docs]class scientific_format: """ Scientific formatter Parameters ---------- digits : int Significant digits. Examples -------- >>> x = [.12, .23, .34, 45] >>> scientific_format()(x) ['1.2e-01', '2.3e-01', '3.4e-01', '4.5e+01'] Notes ----- Be careful when using many digits (15+ on a 64 bit computer). Consider of the `machine epsilon`_. .. _machine epsilon: https://en.wikipedia.org/wiki/Machine_epsilon """ def __init__(self, digits=3): tpl = "".join(["{:.", str(digits), "e}"]) self.formatter = custom_format(tpl)
[docs] def __call__(self, x): if len(x) == 0: return [] zeros_re = re.compile(r"(0+)e") def count_zeros(s): match = zeros_re.search(s) if match: return len(match.group(1)) else: return 0 # format and then remove superfluous zeros labels = self.formatter(x) n = min([count_zeros(val) for val in labels]) if n: labels = [val.replace("0" * n + "e", "e") for val in labels] return labels
scientific = scientific_format() def _format(formatter, x): """ Helper to format and tidy up """ # For MPL to play nice formatter.create_dummy_axis() # For sensible decimal places formatter.set_locs([val for val in x if ~np.isnan(val)]) try: oom = int(formatter.orderOfMagnitude) except AttributeError: oom = 0 labels = [formatter(tick) for tick in x] # Remove unnecessary decimals pattern = re.compile(r"\.0+$") for i, label in enumerate(labels): match = pattern.search(label) if match: labels[i] = pattern.sub("", label) # MPL does not add the exponential component if oom: labels = ["{}e{}".format(s, oom) if s != "0" else s for s in labels] return labels
[docs]class mpl_format: """ Format using MPL formatter for scalars Examples -------- >>> mpl_format()([.654, .8963, .1]) ['0.6540', '0.8963', '0.1000'] """ def __init__(self): from matplotlib.ticker import ScalarFormatter self.formatter = ScalarFormatter(useOffset=False)
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ return _format(self.formatter, x)
[docs]class log_format: """ Log Formatter Parameters ---------- base : int Base of the logarithm. Default is 10. exponent_limits : tuple limits (int, int) where if the any of the powers of the numbers falls outside, then the labels will be in exponent form. This only applies for base 10. mathtex : bool If True, return the labels in mathtex format as understood by Matplotlib. Examples -------- >>> log_format()([0.001, 0.1, 100]) ['0.001', '0.1', '100'] >>> log_format()([0.0001, 0.1, 10000]) ['1e-4', '1e-1', '1e4'] >>> log_format(mathtex=True)([0.0001, 0.1, 10000]) ['$10^{-4}$', '$10^{-1}$', '$10^{4}$'] """ def __init__(self, base=10, exponent_limits=(-4, 4), mathtex=False): self.base = base self.exponent_limits = exponent_limits self.mathtex = mathtex def _tidyup_labels(self, labels): """ Make all labels uniform in format Remove redundant zeros for labels in exponential format. Parameters ---------- labels : list-like Labels to be tidied. Returns ------- out : list-like Labels """ def remove_zeroes(s): """ Remove unnecessary zeros for float string s """ tup = s.split("e") if len(tup) == 2: mantissa = tup[0].rstrip("0").rstrip(".") exponent = int(tup[1]) if exponent: s = "%se%d" % (mantissa, exponent) else: s = mantissa return s def as_exp(s): """ Float string s as in exponential format """ return s if "e" in s else "{:1.0e}".format(float(s)) def as_mathtex(s): """ Mathtex for maplotlib """ if "e" not in s: assert s == "1", f"Unexpected value {s = }, instead of '1'" return f"${self.base}^{{0}}$" exp = s.split("e")[1] return f"${self.base}^{{{exp}}}$" # If any are in exponential format, make all of # them expontential has_e = ["e" in x for x in labels] if not all(has_e) and sum(has_e): labels = [as_exp(x) for x in labels] labels = [remove_zeroes(x) for x in labels] has_e = ["e" in x for x in labels] if self.mathtex and any(has_e): labels = [as_mathtex(x) for x in labels] return labels
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ if len(x) == 0: return [] # Decide on using exponents if self.base == 10: xmin = int(np.floor(np.log10(np.min(x)))) xmax = int(np.ceil(np.log10(np.max(x)))) emin, emax = self.exponent_limits all_multiples = np.all([np.log10(num).is_integer() for num in x]) # Order of magnitude of the minimum and maximum if same_log10_order_of_magnitude(x): f = mpl_format() f.formatter.set_powerlimits((emin, emax)) return f(x) elif all_multiples and (xmin <= emin or xmax >= emax): fmt = "{:1.0e}" else: fmt = "{:g}" labels = [fmt.format(num) for num in x] return self._tidyup_labels(labels) else: def _exp(num, base): e = np.log(num) / np.log(base) e_round = np.round(e) if np.isclose(e, e_round): e = int(e_round) else: e = np.round(e, 3) return e base_txt = f"{self.base}" if self.base == np.e: base_txt = "e" if self.mathtex: fmt_parts = (f"${base_txt}^", "{{{e}}}$") else: fmt_parts = (f"{base_txt}^", "{e}") fmt = "".join(fmt_parts) exps = [_exp(num, self.base) for num in x] labels = [fmt.format(e=e) for e in exps] return labels
[docs]class date_format: """ Datetime formatter Parameters ---------- fmt : str Format string. See :ref:`strftime <strftime-strptime-behavior>`. tz : datetime.tzinfo, optional Time zone information. If none is specified, the time zone will be that of the first date. If the first date has no time information then a time zone is chosen by other means. Examples -------- >>> from datetime import datetime >>> x = [datetime(x, 1, 1) for x in [2010, 2014, 2018, 2022]] >>> date_format()(x) ['2010-01-01', '2014-01-01', '2018-01-01', '2022-01-01'] >>> date_format('%Y')(x) ['2010', '2014', '2018', '2022'] Can format time >>> x = [datetime(2017, 12, 1, 16, 5, 7)] >>> date_format("%Y-%m-%d %H:%M:%S")(x) ['2017-12-01 16:05:07'] Time zones are respected >>> UTC = ZoneInfo('UTC') >>> UG = ZoneInfo('Africa/Kampala') >>> x = [datetime(2010, 1, 1, i) for i in [8, 15]] >>> x_tz = [datetime(2010, 1, 1, i, tzinfo=UG) for i in [8, 15]] >>> date_format('%Y-%m-%d %H:%M')(x) ['2010-01-01 08:00', '2010-01-01 15:00'] >>> date_format('%Y-%m-%d %H:%M')(x_tz) ['2010-01-01 08:00', '2010-01-01 15:00'] Format with a specific time zone >>> date_format('%Y-%m-%d %H:%M', tz=UTC)(x_tz) ['2010-01-01 05:00', '2010-01-01 12:00'] >>> date_format('%Y-%m-%d %H:%M', tz='EST')(x_tz) ['2010-01-01 00:00', '2010-01-01 07:00'] """ def __init__(self, fmt="%Y-%m-%d", tz=None): if isinstance(tz, str): tz = ZoneInfo(tz) from matplotlib.dates import DateFormatter self.formatter = DateFormatter(fmt, tz=tz) self.tz = tz
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ from matplotlib.dates import date2num # Formatter timezone if self.tz is None and len(x): self.formatter.tz = get_timezone(x) # The formatter is tied to axes and takes # breaks in ordinal format. x = [date2num(val) for val in x] return _format(self.formatter, x)
[docs]class timedelta_format: """ Timedelta formatter Parameters ---------- units : str, optional The units in which the breaks will be computed. If None, they are decided automatically. Otherwise, the value should be one of:: 'ns' # nanoseconds 'us' # microseconds 'ms' # milliseconds 's' # secondss 'm' # minute 'h' # hour 'd' # day 'w' # week 'M' # month 'y' # year add_units : bool Whether to append the units identifier string to the values. usetext : bool If True, they microseconds identifier string is rendered with greek letter *mu*. Default is False. Examples -------- >>> from datetime import timedelta >>> x = [timedelta(days=31*i) for i in range(5)] >>> timedelta_format()(x) ['0', '1 month', '2 months', '3 months', '4 months'] >>> timedelta_format(units='d')(x) ['0', '31 days', '62 days', '93 days', '124 days'] >>> timedelta_format(units='d', add_units=False)(x) ['0', '31', '62', '93', '124'] """ abbreviations = { "ns": "ns", "us": "us", "ms": "ms", "s": "s", "m": " minute", "h": " hour", "d": " day", "w": " week", "M": " month", "y": " year", } def __init__(self, units=None, add_units=True, usetex=False): self.units = units self.add_units = add_units self.usetex = usetex self._mpl_format = mpl_format()
[docs] def __call__(self, x): if len(x) == 0: return [] labels = [] values, _units = timedelta_helper.format_info(x, self.units) plural = "" if _units.endswith("s") else "s" ulabel = self.abbreviations[_units] if ulabel == "us" and self.usetex: ulabel = r"$\mu s$" _labels = self._mpl_format(values) if not self.add_units: return _labels for num, num_label in zip(values, _labels): s = "" if num == 1 else plural # 0 has no units _ulabel = "" if num == 0 else ulabel + s labels.append("".join([num_label, _ulabel])) return labels
[docs]class pvalue_format: """ p-values Formatter Parameters ---------- accuracy : float Number to round to add_p : bool Whether to prepend "p=" or "p<" to the output Examples -------- >>> x = [.90, .15, .015, .009, 0.0005] >>> pvalue_format()(x) ['0.9', '0.15', '0.015', '0.009', '<0.001'] >>> pvalue_format(0.1)(x) ['0.9', '0.1', '<0.1', '<0.1', '<0.1'] >>> pvalue_format(0.1, True)(x) ['p=0.9', 'p=0.1', 'p<0.1', 'p<0.1', 'p<0.1'] """ def __init__(self, accuracy=0.001, add_p=False): self.accuracy = accuracy self.add_p = add_p
[docs] def __call__(self, x): """ Format a sequence of inputs Parameters ---------- x : array Input Returns ------- out : list List of strings. """ x = round_any(x, self.accuracy) below = [num < self.accuracy for num in x] if self.add_p: eq_fmt = "p={:g}".format below_label = "p<{:g}".format(self.accuracy) else: eq_fmt = "{:g}".format below_label = "<{:g}".format(self.accuracy) labels = [below_label if b else eq_fmt(i) for i, b in zip(x, below)] return labels
def ordinal(n, prefix="", suffix="", big_mark=""): # General Case: 0th, 1st, 2nd, 3rd, 4th, 5th, 6th, 7th, 8th, 9th # Special Case: 10th, 11th, 12th, 13th n = int(n) idx = np.min((n % 10, 4)) _suffix = ["th", "st", "nd", "rd", "th"][idx] if 11 <= (n % 100) <= 13: _suffix = "th" if big_mark: s = "{:,}".format(n) if big_mark != ",": s = s.replace(",", big_mark) else: s = "{}".format(n) return "{}{}{}{}".format(prefix, s, _suffix, suffix)
[docs]class ordinal_format: """ Ordinal Formatter Parameters ---------- prefix : str What to put before the value. suffix : str What to put after the value. big_mark : str The thousands separator. This is usually a comma or a dot. Examples -------- >>> ordinal_format()(range(8)) ['0th', '1st', '2nd', '3rd', '4th', '5th', '6th', '7th'] >>> ordinal_format(suffix=' Number')(range(11, 15)) ['11th Number', '12th Number', '13th Number', '14th Number'] """ def __init__(self, prefix="", suffix="", big_mark=""): self.prefix = prefix self.suffix = suffix self.big_mark = big_mark
[docs] def __call__(self, x): labels = [ ordinal(num, self.prefix, self.suffix, self.big_mark) for num in x ] return labels
[docs]class number_bytes_format: """ Bytes Formatter Parameters ---------- symbol : str Valid symbols are "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", and "YB" for SI units, and the "iB" variants for binary units. Default is "auto" where the symbol to be used is determined separately for each value of 1x. units : "binary" | "si" Which unit base to use, 1024 for "binary" or 1000 for "si". fmt : str, optional Format sting. Default is ``{:.0f}``. Examples -------- >>> x = [1000, 1000000, 4e5] >>> number_bytes_format()(x) ['1000 B', '977 KiB', '391 KiB'] >>> number_bytes_format(units='si')(x) ['1 kB', '1 MB', '400 kB'] """ def __init__(self, symbol="auto", units="binary", fmt="{:.0f} "): self.symbol = symbol self.units = units self.fmt = fmt if units == "si": self.base = 1000 self._all_symbols = [ "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", ] else: self.base = 1024 self._all_symbols = [ "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", ] # possible exponents of base: eg 1000^1, 1000^2, 1000^3, ... exponents = np.arange(1, len(self._all_symbols) + 1, dtype=float) self._powers = self.base**exponents self._validate_symbol(symbol, ["auto"] + self._all_symbols)
[docs] def __call__(self, x): _all_symbols = self._all_symbols symbol = self.symbol if symbol == "auto": power = [bisect_right(self._powers, val) for val in x] symbols = [_all_symbols[p] for p in power] else: power = np.array(match([symbol], _all_symbols)) symbols = [symbol] * len(x) x = np.asarray(x) power = np.asarray(power, dtype=float) values = x / self.base**power fmt = (self.fmt + "{}").format labels = [fmt(v, s) for v, s in zip(values, symbols)] return labels
def _validate_symbol(self, symbol, allowed_symbols): if symbol not in allowed_symbols: raise ValueError( "Symbol must be one of {}".format(allowed_symbols) )