AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/seaborn/categorical.py

3457 lines
119 KiB
Python
Raw Normal View History

2024-10-02 22:15:59 +04:00
from collections import namedtuple
from textwrap import dedent
import warnings
from colorsys import rgb_to_hls
from functools import partial
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib.cbook import normalize_kwargs
from matplotlib.collections import PatchCollection
from matplotlib.markers import MarkerStyle
from matplotlib.patches import Rectangle
import matplotlib.pyplot as plt
from seaborn._core.typing import default, deprecated
from seaborn._base import VectorPlotter, infer_orient, categorical_order
from seaborn._stats.density import KDE
from seaborn import utils
from seaborn.utils import (
desaturate,
_check_argument,
_draw_figure,
_default_color,
_get_patch_legend_artist,
_get_transform_functions,
_scatter_legend_artist,
_version_predates,
)
from seaborn._compat import groupby_apply_include_groups
from seaborn._statistics import (
EstimateAggregator,
LetterValues,
WeightedAggregator,
)
from seaborn.palettes import light_palette
from seaborn.axisgrid import FacetGrid, _facet_docs
__all__ = [
"catplot",
"stripplot", "swarmplot",
"boxplot", "violinplot", "boxenplot",
"pointplot", "barplot", "countplot",
]
class _CategoricalPlotter(VectorPlotter):
wide_structure = {"x": "@columns", "y": "@values", "hue": "@columns"}
flat_structure = {"y": "@values"}
_legend_attributes = ["color"]
def __init__(
self,
data=None,
variables={},
order=None,
orient=None,
require_numeric=False,
color=None,
legend="auto",
):
super().__init__(data=data, variables=variables)
# This method takes care of some bookkeeping that is necessary because the
# original categorical plots (prior to the 2021 refactor) had some rules that
# don't fit exactly into VectorPlotter logic. It may be wise to have a second
# round of refactoring that moves the logic deeper, but this will keep things
# relatively sensible for now.
# For wide data, orient determines assignment to x/y differently from the
# default VectorPlotter rules. If we do decide to make orient part of the
# _base variable assignment, we'll want to figure out how to express that.
if self.input_format == "wide" and orient in ["h", "y"]:
self.plot_data = self.plot_data.rename(columns={"x": "y", "y": "x"})
orig_variables = set(self.variables)
orig_x = self.variables.pop("x", None)
orig_y = self.variables.pop("y", None)
orig_x_type = self.var_types.pop("x", None)
orig_y_type = self.var_types.pop("y", None)
if "x" in orig_variables:
self.variables["y"] = orig_x
self.var_types["y"] = orig_x_type
if "y" in orig_variables:
self.variables["x"] = orig_y
self.var_types["x"] = orig_y_type
# Initially there was more special code for wide-form data where plots were
# multi-colored by default and then either palette or color could be used.
# We want to provide backwards compatibility for this behavior in a relatively
# simply way, so we delete the hue information when color is specified.
if (
self.input_format == "wide"
and "hue" in self.variables
and color is not None
):
self.plot_data.drop("hue", axis=1)
self.variables.pop("hue")
# The concept of an "orientation" is important to the original categorical
# plots, but there's no provision for it in VectorPlotter, so we need it here.
# Note that it could be useful for the other functions in at least two ways
# (orienting a univariate distribution plot from long-form data and selecting
# the aggregation axis in lineplot), so we may want to eventually refactor it.
self.orient = infer_orient(
x=self.plot_data.get("x", None),
y=self.plot_data.get("y", None),
orient=orient,
require_numeric=False,
)
self.legend = legend
# Short-circuit in the case of an empty plot
if not self.has_xy_data:
return
# Categorical plots can be "univariate" in which case they get an anonymous
# category label on the opposite axis. Note: this duplicates code in the core
# scale_categorical function. We need to do it here because of the next line.
if self.orient not in self.variables:
self.variables[self.orient] = None
self.var_types[self.orient] = "categorical"
self.plot_data[self.orient] = ""
# Categorical variables have discrete levels that we need to track
cat_levels = categorical_order(self.plot_data[self.orient], order)
self.var_levels[self.orient] = cat_levels
def _hue_backcompat(self, color, palette, hue_order, force_hue=False):
"""Implement backwards compatibility for hue parametrization.
Note: the force_hue parameter is used so that functions can be shown to
pass existing tests during refactoring and then tested for new behavior.
It can be removed after completion of the work.
"""
# The original categorical functions applied a palette to the categorical axis
# by default. We want to require an explicit hue mapping, to be more consistent
# with how things work elsewhere now. I don't think there's any good way to
# do this gently -- because it's triggered by the default value of hue=None,
# users would always get a warning, unless we introduce some sentinel "default"
# argument for this change. That's possible, but asking users to set `hue=None`
# on every call is annoying.
# We are keeping the logic for implementing the old behavior in with the current
# system so that (a) we can punt on that decision and (b) we can ensure that
# refactored code passes old tests.
default_behavior = color is None or palette is not None
if force_hue and "hue" not in self.variables and default_behavior:
self._redundant_hue = True
self.plot_data["hue"] = self.plot_data[self.orient]
self.variables["hue"] = self.variables[self.orient]
self.var_types["hue"] = "categorical"
hue_order = self.var_levels[self.orient]
# Because we convert the categorical axis variable to string,
# we need to update a dictionary palette too
if isinstance(palette, dict):
palette = {str(k): v for k, v in palette.items()}
else:
if "hue" in self.variables:
redundant = (self.plot_data["hue"] == self.plot_data[self.orient]).all()
else:
redundant = False
self._redundant_hue = redundant
# Previously, categorical plots had a trick where color= could seed the palette.
# Because that's an explicit parameterization, we are going to give it one
# release cycle with a warning before removing.
if "hue" in self.variables and palette is None and color is not None:
if not isinstance(color, str):
color = mpl.colors.to_hex(color)
palette = f"dark:{color}"
msg = (
"\n\nSetting a gradient palette using color= is deprecated and will be "
f"removed in v0.14.0. Set `palette='{palette}'` for the same effect.\n"
)
warnings.warn(msg, FutureWarning, stacklevel=3)
return palette, hue_order
def _palette_without_hue_backcompat(self, palette, hue_order):
"""Provide one cycle where palette= implies hue= when not provided"""
if "hue" not in self.variables and palette is not None:
msg = (
"\n\nPassing `palette` without assigning `hue` is deprecated "
f"and will be removed in v0.14.0. Assign the `{self.orient}` variable "
"to `hue` and set `legend=False` for the same effect.\n"
)
warnings.warn(msg, FutureWarning, stacklevel=3)
self.legend = False
self.plot_data["hue"] = self.plot_data[self.orient]
self.variables["hue"] = self.variables.get(self.orient)
self.var_types["hue"] = self.var_types.get(self.orient)
hue_order = self.var_levels.get(self.orient)
self._var_levels.pop("hue", None)
return hue_order
def _point_kwargs_backcompat(self, scale, join, kwargs):
"""Provide two cycles where scale= and join= work, but redirect to kwargs."""
if scale is not deprecated:
lw = mpl.rcParams["lines.linewidth"] * 1.8 * scale
mew = lw * .75
ms = lw * 2
msg = (
"\n\n"
"The `scale` parameter is deprecated and will be removed in v0.15.0. "
"You can now control the size of each plot element using matplotlib "
"`Line2D` parameters (e.g., `linewidth`, `markersize`, etc.)."
"\n"
)
warnings.warn(msg, stacklevel=3)
kwargs.update(linewidth=lw, markeredgewidth=mew, markersize=ms)
if join is not deprecated:
msg = (
"\n\n"
"The `join` parameter is deprecated and will be removed in v0.15.0."
)
if not join:
msg += (
" You can remove the line between points with `linestyle='none'`."
)
kwargs.update(linestyle="")
msg += "\n"
warnings.warn(msg, stacklevel=3)
def _err_kws_backcompat(self, err_kws, errcolor, errwidth, capsize):
"""Provide two cycles where existing signature-level err_kws are handled."""
def deprecate_err_param(name, key, val):
if val is deprecated:
return
suggest = f"err_kws={{'{key}': {val!r}}}"
msg = (
f"\n\nThe `{name}` parameter is deprecated. And will be removed "
f"in v0.15.0. Pass `{suggest}` instead.\n"
)
warnings.warn(msg, FutureWarning, stacklevel=4)
err_kws[key] = val
if errcolor is not None:
deprecate_err_param("errcolor", "color", errcolor)
deprecate_err_param("errwidth", "linewidth", errwidth)
if capsize is None:
capsize = 0
msg = (
"\n\nPassing `capsize=None` is deprecated and will be removed "
"in v0.15.0. Pass `capsize=0` to disable caps.\n"
)
warnings.warn(msg, FutureWarning, stacklevel=3)
return err_kws, capsize
def _violin_scale_backcompat(self, scale, scale_hue, density_norm, common_norm):
"""Provide two cycles of backcompat for scale kwargs"""
if scale is not deprecated:
density_norm = scale
msg = (
"\n\nThe `scale` parameter has been renamed and will be removed "
f"in v0.15.0. Pass `density_norm={scale!r}` for the same effect."
)
warnings.warn(msg, FutureWarning, stacklevel=3)
if scale_hue is not deprecated:
common_norm = scale_hue
msg = (
"\n\nThe `scale_hue` parameter has been replaced and will be removed "
f"in v0.15.0. Pass `common_norm={not scale_hue}` for the same effect."
)
warnings.warn(msg, FutureWarning, stacklevel=3)
return density_norm, common_norm
def _violin_bw_backcompat(self, bw, bw_method):
"""Provide two cycles of backcompat for violin bandwidth parameterization."""
if bw is not deprecated:
bw_method = bw
msg = dedent(f"""\n
The `bw` parameter is deprecated in favor of `bw_method`/`bw_adjust`.
Setting `bw_method={bw!r}`, but please see docs for the new parameters
and update your code. This will become an error in seaborn v0.15.0.
""")
warnings.warn(msg, FutureWarning, stacklevel=3)
return bw_method
def _boxen_scale_backcompat(self, scale, width_method):
"""Provide two cycles of backcompat for scale kwargs"""
if scale is not deprecated:
width_method = scale
msg = (
"\n\nThe `scale` parameter has been renamed to `width_method` and "
f"will be removed in v0.15. Pass `width_method={scale!r}"
)
if scale == "area":
msg += ", but note that the result for 'area' will appear different."
else:
msg += " for the same effect."
warnings.warn(msg, FutureWarning, stacklevel=3)
return width_method
def _complement_color(self, color, base_color, hue_map):
"""Allow a color to be set automatically using a basis of comparison."""
if color == "gray":
msg = (
'Use "auto" to set automatic grayscale colors. From v0.14.0, '
'"gray" will default to matplotlib\'s definition.'
)
warnings.warn(msg, FutureWarning, stacklevel=3)
color = "auto"
elif color is None or color is default:
color = "auto"
if color != "auto":
return color
if hue_map.lookup_table is None:
if base_color is None:
return None
basis = [mpl.colors.to_rgb(base_color)]
else:
basis = [mpl.colors.to_rgb(c) for c in hue_map.lookup_table.values()]
unique_colors = np.unique(basis, axis=0)
light_vals = [rgb_to_hls(*rgb[:3])[1] for rgb in unique_colors]
lum = min(light_vals) * .6
return (lum, lum, lum)
def _map_prop_with_hue(self, name, value, fallback, plot_kws):
"""Support pointplot behavior of modifying the marker/linestyle with hue."""
if value is default:
value = plot_kws.pop(name, fallback)
if "hue" in self.variables:
levels = self._hue_map.levels
if isinstance(value, list):
mapping = {k: v for k, v in zip(levels, value)}
else:
mapping = {k: value for k in levels}
else:
mapping = {None: value}
return mapping
def _adjust_cat_axis(self, ax, axis):
"""Set ticks and limits for a categorical variable."""
# Note: in theory, this could happen in _attach for all categorical axes
# But two reasons not to do that:
# - If it happens before plotting, autoscaling messes up the plot limits
# - It would change existing plots from other seaborn functions
if self.var_types[axis] != "categorical":
return
# If both x/y data are empty, the correct way to set up the plot is
# somewhat undefined; because we don't add null category data to the plot in
# this case we don't *have* a categorical axis (yet), so best to just bail.
if self.plot_data[axis].empty:
return
# We can infer the total number of categories (including those from previous
# plots that are not part of the plot we are currently making) from the number
# of ticks, which matplotlib sets up while doing unit conversion. This feels
# slightly risky, as if we are relying on something that may be a matplotlib
# implementation detail. But I cannot think of a better way to keep track of
# the state from previous categorical calls (see GH2516 for context)
n = len(getattr(ax, f"get_{axis}ticks")())
if axis == "x":
ax.xaxis.grid(False)
ax.set_xlim(-.5, n - .5, auto=None)
else:
ax.yaxis.grid(False)
# Note limits that correspond to previously-inverted y axis
ax.set_ylim(n - .5, -.5, auto=None)
def _dodge_needed(self):
"""Return True when use of `hue` would cause overlaps."""
groupers = list({self.orient, "col", "row"} & set(self.variables))
if "hue" in self.variables:
orient = self.plot_data[groupers].value_counts()
paired = self.plot_data[[*groupers, "hue"]].value_counts()
return orient.size != paired.size
return False
def _dodge(self, keys, data):
"""Apply a dodge transform to coordinates in place."""
if "hue" not in self.variables:
# Short-circuit if hue variable was not assigned
# We could potentially warn when hue=None, dodge=True, user may be confused
# But I think it's fine to just treat it as a no-op.
return
hue_idx = self._hue_map.levels.index(keys["hue"])
n = len(self._hue_map.levels)
data["width"] /= n
full_width = data["width"] * n
offset = data["width"] * hue_idx + data["width"] / 2 - full_width / 2
data[self.orient] += offset
def _invert_scale(self, ax, data, vars=("x", "y")):
"""Undo scaling after computation so data are plotted correctly."""
for var in vars:
_, inv = _get_transform_functions(ax, var[0])
if var == self.orient and "width" in data:
hw = data["width"] / 2
data["edge"] = inv(data[var] - hw)
data["width"] = inv(data[var] + hw) - data["edge"].to_numpy()
for suf in ["", "min", "max"]:
if (col := f"{var}{suf}") in data:
data[col] = inv(data[col])
def _configure_legend(self, ax, func, common_kws=None, semantic_kws=None):
if self.legend == "auto":
show_legend = not self._redundant_hue and self.input_format != "wide"
else:
show_legend = bool(self.legend)
if show_legend:
self.add_legend_data(ax, func, common_kws, semantic_kws=semantic_kws)
handles, _ = ax.get_legend_handles_labels()
if handles:
ax.legend(title=self.legend_title)
@property
def _native_width(self):
"""Return unit of width separating categories on native numeric scale."""
# Categorical data always have a unit width
if self.var_types[self.orient] == "categorical":
return 1
# Otherwise, define the width as the smallest space between observations
unique_values = np.unique(self.comp_data[self.orient])
if len(unique_values) > 1:
native_width = np.nanmin(np.diff(unique_values))
else:
native_width = 1
return native_width
def _nested_offsets(self, width, dodge):
"""Return offsets for each hue level for dodged plots."""
offsets = None
if "hue" in self.variables and self._hue_map.levels is not None:
n_levels = len(self._hue_map.levels)
if dodge:
each_width = width / n_levels
offsets = np.linspace(0, width - each_width, n_levels)
offsets -= offsets.mean()
else:
offsets = np.zeros(n_levels)
return offsets
# Note that the plotting methods here aim (in most cases) to produce the
# exact same artists as the original (pre 0.12) version of the code, so
# there is some weirdness that might not otherwise be clean or make sense in
# this context, such as adding empty artists for combinations of variables
# with no observations
def plot_strips(
self,
jitter,
dodge,
color,
plot_kws,
):
width = .8 * self._native_width
offsets = self._nested_offsets(width, dodge)
if jitter is True:
jlim = 0.1
else:
jlim = float(jitter)
if "hue" in self.variables and dodge and self._hue_map.levels is not None:
jlim /= len(self._hue_map.levels)
jlim *= self._native_width
jitterer = partial(np.random.uniform, low=-jlim, high=+jlim)
iter_vars = [self.orient]
if dodge:
iter_vars.append("hue")
ax = self.ax
dodge_move = jitter_move = 0
if "marker" in plot_kws and not MarkerStyle(plot_kws["marker"]).is_filled():
plot_kws.pop("edgecolor", None)
for sub_vars, sub_data in self.iter_data(iter_vars,
from_comp_data=True,
allow_empty=True):
ax = self._get_axes(sub_vars)
if offsets is not None and (offsets != 0).any():
dodge_move = offsets[sub_data["hue"].map(self._hue_map.levels.index)]
jitter_move = jitterer(size=len(sub_data)) if len(sub_data) > 1 else 0
adjusted_data = sub_data[self.orient] + dodge_move + jitter_move
sub_data[self.orient] = adjusted_data
self._invert_scale(ax, sub_data)
points = ax.scatter(sub_data["x"], sub_data["y"], color=color, **plot_kws)
if "hue" in self.variables:
points.set_facecolors(self._hue_map(sub_data["hue"]))
self._configure_legend(ax, _scatter_legend_artist, common_kws=plot_kws)
def plot_swarms(
self,
dodge,
color,
warn_thresh,
plot_kws,
):
width = .8 * self._native_width
offsets = self._nested_offsets(width, dodge)
iter_vars = [self.orient]
if dodge:
iter_vars.append("hue")
ax = self.ax
point_collections = {}
dodge_move = 0
if "marker" in plot_kws and not MarkerStyle(plot_kws["marker"]).is_filled():
plot_kws.pop("edgecolor", None)
for sub_vars, sub_data in self.iter_data(iter_vars,
from_comp_data=True,
allow_empty=True):
ax = self._get_axes(sub_vars)
if offsets is not None:
dodge_move = offsets[sub_data["hue"].map(self._hue_map.levels.index)]
if not sub_data.empty:
sub_data[self.orient] = sub_data[self.orient] + dodge_move
self._invert_scale(ax, sub_data)
points = ax.scatter(sub_data["x"], sub_data["y"], color=color, **plot_kws)
if "hue" in self.variables:
points.set_facecolors(self._hue_map(sub_data["hue"]))
if not sub_data.empty:
point_collections[(ax, sub_data[self.orient].iloc[0])] = points
beeswarm = Beeswarm(width=width, orient=self.orient, warn_thresh=warn_thresh)
for (ax, center), points in point_collections.items():
if points.get_offsets().shape[0] > 1:
def draw(points, renderer, *, center=center):
beeswarm(points, center)
if self.orient == "y":
scalex = False
scaley = ax.get_autoscaley_on()
else:
scalex = ax.get_autoscalex_on()
scaley = False
# This prevents us from undoing the nice categorical axis limits
# set in _adjust_cat_axis, because that method currently leave
# the autoscale flag in its original setting. It may be better
# to disable autoscaling there to avoid needing to do this.
fixed_scale = self.var_types[self.orient] == "categorical"
ax.update_datalim(points.get_datalim(ax.transData))
if not fixed_scale and (scalex or scaley):
ax.autoscale_view(scalex=scalex, scaley=scaley)
super(points.__class__, points).draw(renderer)
points.draw = draw.__get__(points)
_draw_figure(ax.figure)
self._configure_legend(ax, _scatter_legend_artist, plot_kws)
def plot_boxes(
self,
width,
dodge,
gap,
fill,
whis,
color,
linecolor,
linewidth,
fliersize,
plot_kws, # TODO rename user_kws?
):
iter_vars = ["hue"]
value_var = {"x": "y", "y": "x"}[self.orient]
def get_props(element, artist=mpl.lines.Line2D):
return normalize_kwargs(plot_kws.pop(f"{element}props", {}), artist)
if not fill and linewidth is None:
linewidth = mpl.rcParams["lines.linewidth"]
bootstrap = plot_kws.pop("bootstrap", mpl.rcParams["boxplot.bootstrap"])
plot_kws.setdefault("shownotches", plot_kws.pop("notch", False))
box_artist = mpl.patches.Rectangle if fill else mpl.lines.Line2D
props = {
"box": get_props("box", box_artist),
"median": get_props("median"),
"whisker": get_props("whisker"),
"flier": get_props("flier"),
"cap": get_props("cap"),
}
props["median"].setdefault("solid_capstyle", "butt")
props["whisker"].setdefault("solid_capstyle", "butt")
props["flier"].setdefault("markersize", fliersize)
ax = self.ax
for sub_vars, sub_data in self.iter_data(iter_vars,
from_comp_data=True,
allow_empty=False):
ax = self._get_axes(sub_vars)
grouped = sub_data.groupby(self.orient)[value_var]
positions = sorted(sub_data[self.orient].unique().astype(float))
value_data = [x.to_numpy() for _, x in grouped]
stats = pd.DataFrame(mpl.cbook.boxplot_stats(value_data, whis=whis,
bootstrap=bootstrap))
orig_width = width * self._native_width
data = pd.DataFrame({self.orient: positions, "width": orig_width})
if dodge:
self._dodge(sub_vars, data)
if gap:
data["width"] *= 1 - gap
capwidth = plot_kws.get("capwidths", 0.5 * data["width"])
self._invert_scale(ax, data)
_, inv = _get_transform_functions(ax, value_var)
for stat in ["mean", "med", "q1", "q3", "cilo", "cihi", "whislo", "whishi"]:
stats[stat] = inv(stats[stat])
stats["fliers"] = stats["fliers"].map(inv)
linear_orient_scale = getattr(ax, f"get_{self.orient}scale")() == "linear"
maincolor = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
if fill:
boxprops = {
"facecolor": maincolor, "edgecolor": linecolor, **props["box"]
}
medianprops = {"color": linecolor, **props["median"]}
whiskerprops = {"color": linecolor, **props["whisker"]}
flierprops = {"markeredgecolor": linecolor, **props["flier"]}
capprops = {"color": linecolor, **props["cap"]}
else:
boxprops = {"color": maincolor, **props["box"]}
medianprops = {"color": maincolor, **props["median"]}
whiskerprops = {"color": maincolor, **props["whisker"]}
flierprops = {"markeredgecolor": maincolor, **props["flier"]}
capprops = {"color": maincolor, **props["cap"]}
if linewidth is not None:
for prop_dict in [boxprops, medianprops, whiskerprops, capprops]:
prop_dict.setdefault("linewidth", linewidth)
default_kws = dict(
bxpstats=stats.to_dict("records"),
positions=data[self.orient],
# Set width to 0 to avoid going out of domain
widths=data["width"] if linear_orient_scale else 0,
patch_artist=fill,
vert=self.orient == "x",
manage_ticks=False,
boxprops=boxprops,
medianprops=medianprops,
whiskerprops=whiskerprops,
flierprops=flierprops,
capprops=capprops,
# Added in matplotlib 3.6.0; see below
# capwidths=capwidth,
**(
{} if _version_predates(mpl, "3.6.0")
else {"capwidths": capwidth}
)
)
boxplot_kws = {**default_kws, **plot_kws}
artists = ax.bxp(**boxplot_kws)
# Reset artist widths after adding so everything stays positive
ori_idx = ["x", "y"].index(self.orient)
if not linear_orient_scale:
for i, box in enumerate(data.to_dict("records")):
p0 = box["edge"]
p1 = box["edge"] + box["width"]
if artists["boxes"]:
box_artist = artists["boxes"][i]
if fill:
box_verts = box_artist.get_path().vertices.T
else:
box_verts = box_artist.get_data()
box_verts[ori_idx][0] = p0
box_verts[ori_idx][3:] = p0
box_verts[ori_idx][1:3] = p1
if not fill:
# When fill is True, the data get changed in place
box_artist.set_data(box_verts)
ax.update_datalim(
np.transpose(box_verts),
updatex=self.orient == "x",
updatey=self.orient == "y",
)
if artists["medians"]:
verts = artists["medians"][i].get_xydata().T
verts[ori_idx][:] = p0, p1
artists["medians"][i].set_data(verts)
if artists["caps"]:
f_fwd, f_inv = _get_transform_functions(ax, self.orient)
for line in artists["caps"][2 * i:2 * i + 2]:
p0 = f_inv(f_fwd(box[self.orient]) - capwidth[i] / 2)
p1 = f_inv(f_fwd(box[self.orient]) + capwidth[i] / 2)
verts = line.get_xydata().T
verts[ori_idx][:] = p0, p1
line.set_data(verts)
ax.add_container(BoxPlotContainer(artists))
legend_artist = _get_patch_legend_artist(fill)
self._configure_legend(ax, legend_artist, boxprops)
def plot_boxens(
self,
width,
dodge,
gap,
fill,
color,
linecolor,
linewidth,
width_method,
k_depth,
outlier_prop,
trust_alpha,
showfliers,
box_kws,
flier_kws,
line_kws,
plot_kws,
):
iter_vars = [self.orient, "hue"]
value_var = {"x": "y", "y": "x"}[self.orient]
estimator = LetterValues(k_depth, outlier_prop, trust_alpha)
width_method_options = ["exponential", "linear", "area"]
_check_argument("width_method", width_method_options, width_method)
box_kws = plot_kws if box_kws is None else {**plot_kws, **box_kws}
flier_kws = {} if flier_kws is None else flier_kws.copy()
line_kws = {} if line_kws is None else line_kws.copy()
if linewidth is None:
if fill:
linewidth = 0.5 * mpl.rcParams["lines.linewidth"]
else:
linewidth = mpl.rcParams["lines.linewidth"]
ax = self.ax
for sub_vars, sub_data in self.iter_data(iter_vars,
from_comp_data=True,
allow_empty=False):
ax = self._get_axes(sub_vars)
_, inv_ori = _get_transform_functions(ax, self.orient)
_, inv_val = _get_transform_functions(ax, value_var)
# Statistics
lv_data = estimator(sub_data[value_var])
n = lv_data["k"] * 2 - 1
vals = lv_data["values"]
pos_data = pd.DataFrame({
self.orient: [sub_vars[self.orient]],
"width": [width * self._native_width],
})
if dodge:
self._dodge(sub_vars, pos_data)
if gap:
pos_data["width"] *= 1 - gap
# Letter-value boxes
levels = lv_data["levels"]
exponent = (levels - 1 - lv_data["k"]).astype(float)
if width_method == "linear":
rel_widths = levels + 1
elif width_method == "exponential":
rel_widths = 2 ** exponent
elif width_method == "area":
tails = levels < (lv_data["k"] - 1)
rel_widths = 2 ** (exponent - tails) / np.diff(lv_data["values"])
center = pos_data[self.orient].item()
widths = rel_widths / rel_widths.max() * pos_data["width"].item()
box_vals = inv_val(vals)
box_pos = inv_ori(center - widths / 2)
box_heights = inv_val(vals[1:]) - inv_val(vals[:-1])
box_widths = inv_ori(center + widths / 2) - inv_ori(center - widths / 2)
maincolor = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
flier_colors = {
"facecolor": "none", "edgecolor": ".45" if fill else maincolor
}
if fill:
cmap = light_palette(maincolor, as_cmap=True)
boxcolors = cmap(2 ** ((exponent + 2) / 3))
else:
boxcolors = maincolor
boxen = []
for i in range(n):
if self.orient == "x":
xy = (box_pos[i], box_vals[i])
w, h = (box_widths[i], box_heights[i])
else:
xy = (box_vals[i], box_pos[i])
w, h = (box_heights[i], box_widths[i])
boxen.append(Rectangle(xy, w, h))
if fill:
box_colors = {"facecolors": boxcolors, "edgecolors": linecolor}
else:
box_colors = {"facecolors": "none", "edgecolors": boxcolors}
collection_kws = {**box_colors, "linewidth": linewidth, **box_kws}
ax.add_collection(PatchCollection(boxen, **collection_kws), autolim=False)
ax.update_datalim(
np.column_stack([box_vals, box_vals]),
updatex=self.orient == "y",
updatey=self.orient == "x",
)
# Median line
med = lv_data["median"]
hw = pos_data["width"].item() / 2
if self.orient == "x":
x, y = inv_ori([center - hw, center + hw]), inv_val([med, med])
else:
x, y = inv_val([med, med]), inv_ori([center - hw, center + hw])
default_kws = {
"color": linecolor if fill else maincolor,
"solid_capstyle": "butt",
"linewidth": 1.25 * linewidth,
}
ax.plot(x, y, **{**default_kws, **line_kws})
# Outliers ("fliers")
if showfliers:
vals = inv_val(lv_data["fliers"])
pos = np.full(len(vals), inv_ori(pos_data[self.orient].item()))
x, y = (pos, vals) if self.orient == "x" else (vals, pos)
ax.scatter(x, y, **{**flier_colors, "s": 25, **flier_kws})
ax.autoscale_view(scalex=self.orient == "y", scaley=self.orient == "x")
legend_artist = _get_patch_legend_artist(fill)
common_kws = {**box_kws, "linewidth": linewidth, "edgecolor": linecolor}
self._configure_legend(ax, legend_artist, common_kws)
def plot_violins(
self,
width,
dodge,
gap,
split,
color,
fill,
linecolor,
linewidth,
inner,
density_norm,
common_norm,
kde_kws,
inner_kws,
plot_kws,
):
iter_vars = [self.orient, "hue"]
value_var = {"x": "y", "y": "x"}[self.orient]
inner_options = ["box", "quart", "stick", "point", None]
_check_argument("inner", inner_options, inner, prefix=True)
_check_argument("density_norm", ["area", "count", "width"], density_norm)
if linewidth is None:
if fill:
linewidth = 1.25 * mpl.rcParams["patch.linewidth"]
else:
linewidth = mpl.rcParams["lines.linewidth"]
if inner is not None and inner.startswith("box"):
box_width = inner_kws.pop("box_width", linewidth * 4.5)
whis_width = inner_kws.pop("whis_width", box_width / 3)
marker = inner_kws.pop("marker", "_" if self.orient == "x" else "|")
kde = KDE(**kde_kws)
ax = self.ax
violin_data = []
# Iterate through all the data splits once to compute the KDEs
for sub_vars, sub_data in self.iter_data(iter_vars,
from_comp_data=True,
allow_empty=False):
sub_data["weight"] = sub_data.get("weights", 1)
stat_data = kde._transform(sub_data, value_var, [])
maincolor = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
if not fill:
linecolor = maincolor
maincolor = "none"
default_kws = dict(
facecolor=maincolor,
edgecolor=linecolor,
linewidth=linewidth,
)
violin_data.append({
"position": sub_vars[self.orient],
"observations": sub_data[value_var],
"density": stat_data["density"],
"support": stat_data[value_var],
"kwargs": {**default_kws, **plot_kws},
"sub_vars": sub_vars,
"ax": self._get_axes(sub_vars),
})
# Once we've computed all the KDEs, get statistics for normalization
def vars_to_key(sub_vars):
return tuple((k, v) for k, v in sub_vars.items() if k != self.orient)
norm_keys = [vars_to_key(violin["sub_vars"]) for violin in violin_data]
if common_norm:
common_max_density = np.nanmax([v["density"].max() for v in violin_data])
common_max_count = np.nanmax([len(v["observations"]) for v in violin_data])
max_density = {key: common_max_density for key in norm_keys}
max_count = {key: common_max_count for key in norm_keys}
else:
with warnings.catch_warnings():
# Ignore warning when all violins are singular; it's not important
warnings.filterwarnings('ignore', "All-NaN (slice|axis) encountered")
max_density = {
key: np.nanmax([
v["density"].max() for v in violin_data
if vars_to_key(v["sub_vars"]) == key
]) for key in norm_keys
}
max_count = {
key: np.nanmax([
len(v["observations"]) for v in violin_data
if vars_to_key(v["sub_vars"]) == key
]) for key in norm_keys
}
real_width = width * self._native_width
# Now iterate through the violins again to apply the normalization and plot
for violin in violin_data:
index = pd.RangeIndex(0, max(len(violin["support"]), 1))
data = pd.DataFrame({
self.orient: violin["position"],
value_var: violin["support"],
"density": violin["density"],
"width": real_width,
}, index=index)
if dodge:
self._dodge(violin["sub_vars"], data)
if gap:
data["width"] *= 1 - gap
# Normalize the density across the distribution(s) and relative to the width
norm_key = vars_to_key(violin["sub_vars"])
hw = data["width"] / 2
peak_density = violin["density"].max()
if np.isnan(peak_density):
span = 1
elif density_norm == "area":
span = data["density"] / max_density[norm_key]
elif density_norm == "count":
count = len(violin["observations"])
span = data["density"] / peak_density * (count / max_count[norm_key])
elif density_norm == "width":
span = data["density"] / peak_density
span = span * hw * (2 if split else 1)
# Handle split violins (i.e. asymmetric spans)
right_side = (
0 if "hue" not in self.variables
else self._hue_map.levels.index(violin["sub_vars"]["hue"]) % 2
)
if split:
offsets = (hw, span - hw) if right_side else (span - hw, hw)
else:
offsets = span, span
ax = violin["ax"]
_, invx = _get_transform_functions(ax, "x")
_, invy = _get_transform_functions(ax, "y")
inv_pos = {"x": invx, "y": invy}[self.orient]
inv_val = {"x": invx, "y": invy}[value_var]
linecolor = violin["kwargs"]["edgecolor"]
# Handle singular datasets (one or more observations with no variance
if np.isnan(peak_density):
pos = data[self.orient].iloc[0]
val = violin["observations"].mean()
if self.orient == "x":
x, y = [pos - offsets[0], pos + offsets[1]], [val, val]
else:
x, y = [val, val], [pos - offsets[0], pos + offsets[1]]
ax.plot(invx(x), invy(y), color=linecolor, linewidth=linewidth)
continue
# Plot the main violin body
plot_func = {"x": ax.fill_betweenx, "y": ax.fill_between}[self.orient]
plot_func(
inv_val(data[value_var]),
inv_pos(data[self.orient] - offsets[0]),
inv_pos(data[self.orient] + offsets[1]),
**violin["kwargs"]
)
# Adjust the observation data
obs = violin["observations"]
pos_dict = {self.orient: violin["position"], "width": real_width}
if dodge:
self._dodge(violin["sub_vars"], pos_dict)
if gap:
pos_dict["width"] *= (1 - gap)
# --- Plot the inner components
if inner is None:
continue
elif inner.startswith("point"):
pos = np.array([pos_dict[self.orient]] * len(obs))
if split:
pos += (-1 if right_side else 1) * pos_dict["width"] / 2
x, y = (pos, obs) if self.orient == "x" else (obs, pos)
kws = {
"color": linecolor,
"edgecolor": linecolor,
"s": (linewidth * 2) ** 2,
"zorder": violin["kwargs"].get("zorder", 2) + 1,
**inner_kws,
}
ax.scatter(invx(x), invy(y), **kws)
elif inner.startswith("stick"):
pos0 = np.interp(obs, data[value_var], data[self.orient] - offsets[0])
pos1 = np.interp(obs, data[value_var], data[self.orient] + offsets[1])
pos_pts = np.stack([inv_pos(pos0), inv_pos(pos1)])
val_pts = np.stack([inv_val(obs), inv_val(obs)])
segments = np.stack([pos_pts, val_pts]).transpose(2, 1, 0)
if self.orient == "y":
segments = segments[:, :, ::-1]
kws = {
"color": linecolor,
"linewidth": linewidth / 2,
**inner_kws,
}
lines = mpl.collections.LineCollection(segments, **kws)
ax.add_collection(lines, autolim=False)
elif inner.startswith("quart"):
stats = np.percentile(obs, [25, 50, 75])
pos0 = np.interp(stats, data[value_var], data[self.orient] - offsets[0])
pos1 = np.interp(stats, data[value_var], data[self.orient] + offsets[1])
pos_pts = np.stack([inv_pos(pos0), inv_pos(pos1)])
val_pts = np.stack([inv_val(stats), inv_val(stats)])
segments = np.stack([pos_pts, val_pts]).transpose(2, 0, 1)
if self.orient == "y":
segments = segments[:, ::-1, :]
dashes = [(1.25, .75), (2.5, 1), (1.25, .75)]
for i, segment in enumerate(segments):
kws = {
"color": linecolor,
"linewidth": linewidth,
"dashes": dashes[i],
**inner_kws,
}
ax.plot(*segment, **kws)
elif inner.startswith("box"):
stats = mpl.cbook.boxplot_stats(obs)[0]
pos = np.array(pos_dict[self.orient])
if split:
pos += (-1 if right_side else 1) * pos_dict["width"] / 2
pos = [pos, pos], [pos, pos], [pos]
val = (
[stats["whislo"], stats["whishi"]],
[stats["q1"], stats["q3"]],
[stats["med"]]
)
if self.orient == "x":
(x0, x1, x2), (y0, y1, y2) = pos, val
else:
(x0, x1, x2), (y0, y1, y2) = val, pos
if split:
offset = (1 if right_side else -1) * box_width / 72 / 2
dx, dy = (offset, 0) if self.orient == "x" else (0, -offset)
trans = ax.transData + mpl.transforms.ScaledTranslation(
dx, dy, ax.figure.dpi_scale_trans,
)
else:
trans = ax.transData
line_kws = {
"color": linecolor,
"transform": trans,
**inner_kws,
"linewidth": whis_width,
}
ax.plot(invx(x0), invy(y0), **line_kws)
line_kws["linewidth"] = box_width
ax.plot(invx(x1), invy(y1), **line_kws)
dot_kws = {
"marker": marker,
"markersize": box_width / 1.2,
"markeredgewidth": box_width / 5,
"transform": trans,
**inner_kws,
"markeredgecolor": "w",
"markerfacecolor": "w",
"color": linecolor, # simplify tests
}
ax.plot(invx(x2), invy(y2), **dot_kws)
legend_artist = _get_patch_legend_artist(fill)
common_kws = {**plot_kws, "linewidth": linewidth, "edgecolor": linecolor}
self._configure_legend(ax, legend_artist, common_kws)
def plot_points(
self,
aggregator,
markers,
linestyles,
dodge,
color,
capsize,
err_kws,
plot_kws,
):
agg_var = {"x": "y", "y": "x"}[self.orient]
iter_vars = ["hue"]
plot_kws = normalize_kwargs(plot_kws, mpl.lines.Line2D)
plot_kws.setdefault("linewidth", mpl.rcParams["lines.linewidth"] * 1.8)
plot_kws.setdefault("markeredgewidth", plot_kws["linewidth"] * 0.75)
plot_kws.setdefault("markersize", plot_kws["linewidth"] * np.sqrt(2 * np.pi))
markers = self._map_prop_with_hue("marker", markers, "o", plot_kws)
linestyles = self._map_prop_with_hue("linestyle", linestyles, "-", plot_kws)
base_positions = self.var_levels[self.orient]
if self.var_types[self.orient] == "categorical":
min_cat_val = int(self.comp_data[self.orient].min())
max_cat_val = int(self.comp_data[self.orient].max())
base_positions = [i for i in range(min_cat_val, max_cat_val + 1)]
n_hue_levels = 0 if self._hue_map.levels is None else len(self._hue_map.levels)
if dodge is True:
dodge = .025 * n_hue_levels
ax = self.ax
for sub_vars, sub_data in self.iter_data(iter_vars,
from_comp_data=True,
allow_empty=True):
ax = self._get_axes(sub_vars)
ori_axis = getattr(ax, f"{self.orient}axis")
transform, _ = _get_transform_functions(ax, self.orient)
positions = transform(ori_axis.convert_units(base_positions))
agg_data = sub_data if sub_data.empty else (
sub_data
.groupby(self.orient)
.apply(aggregator, agg_var, **groupby_apply_include_groups(False))
.reindex(pd.Index(positions, name=self.orient))
.reset_index()
)
if dodge:
hue_idx = self._hue_map.levels.index(sub_vars["hue"])
step_size = dodge / (n_hue_levels - 1)
offset = -dodge / 2 + step_size * hue_idx
agg_data[self.orient] += offset * self._native_width
self._invert_scale(ax, agg_data)
sub_kws = plot_kws.copy()
sub_kws.update(
marker=markers[sub_vars.get("hue")],
linestyle=linestyles[sub_vars.get("hue")],
color=self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color,
)
line, = ax.plot(agg_data["x"], agg_data["y"], **sub_kws)
sub_err_kws = err_kws.copy()
line_props = line.properties()
for prop in ["color", "linewidth", "alpha", "zorder"]:
sub_err_kws.setdefault(prop, line_props[prop])
if aggregator.error_method is not None:
self.plot_errorbars(ax, agg_data, capsize, sub_err_kws)
legend_artist = partial(mpl.lines.Line2D, [], [])
semantic_kws = {"hue": {"marker": markers, "linestyle": linestyles}}
self._configure_legend(ax, legend_artist, sub_kws, semantic_kws)
def plot_bars(
self,
aggregator,
dodge,
gap,
width,
fill,
color,
capsize,
err_kws,
plot_kws,
):
agg_var = {"x": "y", "y": "x"}[self.orient]
iter_vars = ["hue"]
ax = self.ax
if self._hue_map.levels is None:
dodge = False
if dodge and capsize is not None:
capsize = capsize / len(self._hue_map.levels)
if not fill:
plot_kws.setdefault("linewidth", 1.5 * mpl.rcParams["lines.linewidth"])
err_kws.setdefault("linewidth", 1.5 * mpl.rcParams["lines.linewidth"])
for sub_vars, sub_data in self.iter_data(iter_vars,
from_comp_data=True,
allow_empty=True):
ax = self._get_axes(sub_vars)
agg_data = sub_data if sub_data.empty else (
sub_data
.groupby(self.orient)
.apply(aggregator, agg_var, **groupby_apply_include_groups(False))
.reset_index()
)
agg_data["width"] = width * self._native_width
if dodge:
self._dodge(sub_vars, agg_data)
if gap:
agg_data["width"] *= 1 - gap
agg_data["edge"] = agg_data[self.orient] - agg_data["width"] / 2
self._invert_scale(ax, agg_data)
if self.orient == "x":
bar_func = ax.bar
kws = dict(
x=agg_data["edge"], height=agg_data["y"], width=agg_data["width"]
)
else:
bar_func = ax.barh
kws = dict(
y=agg_data["edge"], width=agg_data["x"], height=agg_data["width"]
)
main_color = self._hue_map(sub_vars["hue"]) if "hue" in sub_vars else color
# Set both color and facecolor for property cycle logic
kws["align"] = "edge"
if fill:
kws.update(color=main_color, facecolor=main_color)
else:
kws.update(color=main_color, edgecolor=main_color, facecolor="none")
bar_func(**{**kws, **plot_kws})
if aggregator.error_method is not None:
self.plot_errorbars(
ax, agg_data, capsize,
{"color": ".26" if fill else main_color, **err_kws}
)
legend_artist = _get_patch_legend_artist(fill)
self._configure_legend(ax, legend_artist, plot_kws)
def plot_errorbars(self, ax, data, capsize, err_kws):
var = {"x": "y", "y": "x"}[self.orient]
for row in data.to_dict("records"):
row = dict(row)
pos = np.array([row[self.orient], row[self.orient]])
val = np.array([row[f"{var}min"], row[f"{var}max"]])
if capsize:
cw = capsize * self._native_width / 2
scl, inv = _get_transform_functions(ax, self.orient)
cap = inv(scl(pos[0]) - cw), inv(scl(pos[1]) + cw)
pos = np.concatenate([
[*cap, np.nan], pos, [np.nan, *cap]
])
val = np.concatenate([
[val[0], val[0], np.nan], val, [np.nan, val[-1], val[-1]],
])
if self.orient == "x":
args = pos, val
else:
args = val, pos
ax.plot(*args, **err_kws)
class _CategoricalAggPlotter(_CategoricalPlotter):
flat_structure = {"x": "@index", "y": "@values"}
_categorical_docs = dict(
# Shared narrative docs
categorical_narrative=dedent("""\
See the :ref:`tutorial <categorical_tutorial>` for more information.
.. note::
By default, this function treats one of the variables as categorical
and draws data at ordinal positions (0, 1, ... n) on the relevant axis.
As of version 0.13.0, this can be disabled by setting `native_scale=True`.
"""),
# Shared function parameters
input_params=dedent("""\
x, y, hue : names of variables in `data` or vector data
Inputs for plotting long-form data. See examples for interpretation.\
"""),
categorical_data=dedent("""\
data : DataFrame, Series, dict, array, or list of arrays
Dataset for plotting. If `x` and `y` are absent, this is
interpreted as wide-form. Otherwise it is expected to be long-form.\
"""),
order_vars=dedent("""\
order, hue_order : lists of strings
Order to plot the categorical levels in; otherwise the levels are
inferred from the data objects.\
"""),
stat_api_params=dedent("""\
estimator : string or callable that maps vector -> scalar
Statistical function to estimate within each categorical bin.
errorbar : string, (string, number) tuple, callable or None
Name of errorbar method (either "ci", "pi", "se", or "sd"), or a tuple
with a method name and a level parameter, or a function that maps from a
vector to a (min, max) interval, or None to hide errorbar. See the
:doc:`errorbar tutorial </tutorial/error_bars>` for more information.
.. versionadded:: v0.12.0
n_boot : int
Number of bootstrap samples used to compute confidence intervals.
seed : int, `numpy.random.Generator`, or `numpy.random.RandomState`
Seed or random number generator for reproducible bootstrapping.
units : name of variable in `data` or vector data
Identifier of sampling units; used by the errorbar function to
perform a multilevel bootstrap and account for repeated measures
weights : name of variable in `data` or vector data
Data values or column used to compute weighted statistics.
Note that the use of weights may limit other statistical options.
.. versionadded:: v0.13.1\
"""),
ci=dedent("""\
ci : float
Level of the confidence interval to show, in [0, 100].
.. deprecated:: v0.12.0
Use `errorbar=("ci", ...)`.\
"""),
orient=dedent("""\
orient : "v" | "h" | "x" | "y"
Orientation of the plot (vertical or horizontal). This is usually
inferred based on the type of the input variables, but it can be used
to resolve ambiguity when both `x` and `y` are numeric or when
plotting wide-form data.
.. versionchanged:: v0.13.0
Added 'x'/'y' as options, equivalent to 'v'/'h'.\
"""),
color=dedent("""\
color : matplotlib color
Single color for the elements in the plot.\
"""),
palette=dedent("""\
palette : palette name, list, dict, or :class:`matplotlib.colors.Colormap`
Color palette that maps the hue variable. If the palette is a dictionary,
keys should be names of levels and values should be matplotlib colors.
The type/value will sometimes force a qualitative/quantitative mapping.\
"""),
hue_norm=dedent("""\
hue_norm : tuple or :class:`matplotlib.colors.Normalize` object
Normalization in data units for colormap applied to the `hue`
variable when it is numeric. Not relevant if `hue` is categorical.
.. versionadded:: v0.12.0\
"""),
saturation=dedent("""\
saturation : float
Proportion of the original saturation to draw fill colors in. Large
patches often look better with desaturated colors, but set this to
`1` if you want the colors to perfectly match the input values.\
"""),
capsize=dedent("""\
capsize : float
Width of the "caps" on error bars, relative to bar spacing.\
"""),
errcolor=dedent("""\
errcolor : matplotlib color
Color used for the error bar lines.
.. deprecated:: 0.13.0
Use `err_kws={'color': ...}`.\
"""),
errwidth=dedent("""\
errwidth : float
Thickness of error bar lines (and caps), in points.
.. deprecated:: 0.13.0
Use `err_kws={'linewidth': ...}`.\
"""),
fill=dedent("""\
fill : bool
If True, use a solid patch. Otherwise, draw as line art.
.. versionadded:: v0.13.0\
"""),
gap=dedent("""\
gap : float
Shrink on the orient axis by this factor to add a gap between dodged elements.
.. versionadded:: 0.13.0\
"""),
width=dedent("""\
width : float
Width allotted to each element on the orient axis. When `native_scale=True`,
it is relative to the minimum distance between two values in the native scale.\
"""),
dodge=dedent("""\
dodge : "auto" or bool
When hue mapping is used, whether elements should be narrowed and shifted along
the orient axis to eliminate overlap. If `"auto"`, set to `True` when the
orient variable is crossed with the categorical variable or `False` otherwise.
.. versionchanged:: 0.13.0
Added `"auto"` mode as a new default.\
"""),
linewidth=dedent("""\
linewidth : float
Width of the lines that frame the plot elements.\
"""),
linecolor=dedent("""\
linecolor : color
Color to use for line elements, when `fill` is True.
.. versionadded:: v0.13.0\
"""),
log_scale=dedent("""\
log_scale : bool or number, or pair of bools or numbers
Set axis scale(s) to log. A single value sets the data axis for any numeric
axes in the plot. A pair of values sets each axis independently.
Numeric values are interpreted as the desired base (default 10).
When `None` or `False`, seaborn defers to the existing Axes scale.
.. versionadded:: v0.13.0\
"""),
native_scale=dedent("""\
native_scale : bool
When True, numeric or datetime values on the categorical axis will maintain
their original scaling rather than being converted to fixed indices.
.. versionadded:: v0.13.0\
"""),
formatter=dedent("""\
formatter : callable
Function for converting categorical data into strings. Affects both grouping
and tick labels.
.. versionadded:: v0.13.0\
"""),
legend=dedent("""\
legend : "auto", "brief", "full", or False
How to draw the legend. If "brief", numeric `hue` and `size`
variables will be represented with a sample of evenly spaced values.
If "full", every group will get an entry in the legend. If "auto",
choose between brief or full representation based on number of levels.
If `False`, no legend data is added and no legend is drawn.
.. versionadded:: v0.13.0\
"""),
err_kws=dedent("""\
err_kws : dict
Parameters of :class:`matplotlib.lines.Line2D`, for the error bar artists.
.. versionadded:: v0.13.0\
"""),
ax_in=dedent("""\
ax : matplotlib Axes
Axes object to draw the plot onto, otherwise uses the current Axes.\
"""),
ax_out=dedent("""\
ax : matplotlib Axes
Returns the Axes object with the plot drawn onto it.\
"""),
# Shared see also
boxplot=dedent("""\
boxplot : A traditional box-and-whisker plot with a similar API.\
"""),
violinplot=dedent("""\
violinplot : A combination of boxplot and kernel density estimation.\
"""),
stripplot=dedent("""\
stripplot : A scatterplot where one variable is categorical. Can be used
in conjunction with other plots to show each observation.\
"""),
swarmplot=dedent("""\
swarmplot : A categorical scatterplot where the points do not overlap. Can
be used with other plots to show each observation.\
"""),
barplot=dedent("""\
barplot : Show point estimates and confidence intervals using bars.\
"""),
countplot=dedent("""\
countplot : Show the counts of observations in each categorical bin.\
"""),
pointplot=dedent("""\
pointplot : Show point estimates and confidence intervals using dots.\
"""),
catplot=dedent("""\
catplot : Combine a categorical plot with a :class:`FacetGrid`.\
"""),
boxenplot=dedent("""\
boxenplot : An enhanced boxplot for larger datasets.\
"""),
)
_categorical_docs.update(_facet_docs)
def boxplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
orient=None, color=None, palette=None, saturation=.75, fill=True,
dodge="auto", width=.8, gap=0, whis=1.5, linecolor="auto", linewidth=None,
fliersize=None, hue_norm=None, native_scale=False, log_scale=None, formatter=None,
legend="auto", ax=None, **kwargs
):
p = _CategoricalPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue),
order=order,
orient=orient,
color=color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if dodge == "auto":
# Needs to be before scale_categorical changes the coordinate series dtype
dodge = p._dodge_needed()
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
saturation = saturation if fill else 1
p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
color = _default_color(
ax.fill_between, hue, color,
{k: v for k, v in kwargs.items() if k in ["c", "color", "fc", "facecolor"]},
saturation=saturation,
)
linecolor = p._complement_color(linecolor, color, p._hue_map)
p.plot_boxes(
width=width,
dodge=dodge,
gap=gap,
fill=fill,
whis=whis,
color=color,
linecolor=linecolor,
linewidth=linewidth,
fliersize=fliersize,
plot_kws=kwargs,
)
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
boxplot.__doc__ = dedent("""\
Draw a box plot to show distributions with respect to categories.
A box plot (or box-and-whisker plot) shows the distribution of quantitative
data in a way that facilitates comparisons between variables or across
levels of a categorical variable. The box shows the quartiles of the
dataset while the whiskers extend to show the rest of the distribution,
except for points that are determined to be "outliers" using a method
that is a function of the inter-quartile range.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
{orient}
{color}
{palette}
{saturation}
{fill}
{dodge}
{width}
{gap}
whis : float or pair of floats
Paramater that controls whisker length. If scalar, whiskers are drawn
to the farthest datapoint within *whis * IQR* from the nearest hinge.
If a tuple, it is interpreted as percentiles that whiskers represent.
{linecolor}
{linewidth}
fliersize : float
Size of the markers used to indicate outlier observations.
{hue_norm}
{log_scale}
{native_scale}
{formatter}
{legend}
{ax_in}
kwargs : key, value mappings
Other keyword arguments are passed through to
:meth:`matplotlib.axes.Axes.boxplot`.
Returns
-------
{ax_out}
See Also
--------
{violinplot}
{stripplot}
{swarmplot}
{catplot}
Examples
--------
.. include:: ../docstrings/boxplot.rst
""").format(**_categorical_docs)
def violinplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
orient=None, color=None, palette=None, saturation=.75, fill=True,
inner="box", split=False, width=.8, dodge="auto", gap=0,
linewidth=None, linecolor="auto", cut=2, gridsize=100,
bw_method="scott", bw_adjust=1, density_norm="area", common_norm=False,
hue_norm=None, formatter=None, log_scale=None, native_scale=False,
legend="auto", scale=deprecated, scale_hue=deprecated, bw=deprecated,
inner_kws=None, ax=None, **kwargs,
):
p = _CategoricalPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue),
order=order,
orient=orient,
color=color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if dodge == "auto":
# Needs to be before scale_categorical changes the coordinate series dtype
dodge = p._dodge_needed()
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
saturation = saturation if fill else 1
p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
color = _default_color(
ax.fill_between, hue, color,
{k: v for k, v in kwargs.items() if k in ["c", "color", "fc", "facecolor"]},
saturation=saturation,
)
linecolor = p._complement_color(linecolor, color, p._hue_map)
density_norm, common_norm = p._violin_scale_backcompat(
scale, scale_hue, density_norm, common_norm,
)
bw_method = p._violin_bw_backcompat(bw, bw_method)
kde_kws = dict(cut=cut, gridsize=gridsize, bw_method=bw_method, bw_adjust=bw_adjust)
inner_kws = {} if inner_kws is None else inner_kws.copy()
p.plot_violins(
width=width,
dodge=dodge,
gap=gap,
split=split,
color=color,
fill=fill,
linecolor=linecolor,
linewidth=linewidth,
inner=inner,
density_norm=density_norm,
common_norm=common_norm,
kde_kws=kde_kws,
inner_kws=inner_kws,
plot_kws=kwargs,
)
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
violinplot.__doc__ = dedent("""\
Draw a patch representing a KDE and add observations or box plot statistics.
A violin plot plays a similar role as a box-and-whisker plot. It shows the
distribution of data points after grouping by one (or more) variables.
Unlike a box plot, each violin is drawn using a kernel density estimate
of the underlying distribution.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
{orient}
{color}
{palette}
{saturation}
{fill}
inner : {{"box", "quart", "point", "stick", None}}
Representation of the data in the violin interior. One of the following:
- `"box"`: draw a miniature box-and-whisker plot
- `"quart"`: show the quartiles of the data
- `"point"` or `"stick"`: show each observation
split : bool
Show an un-mirrored distribution, alternating sides when using `hue`.
.. versionchanged:: v0.13.0
Previously, this option required a `hue` variable with exactly two levels.
{width}
{dodge}
{gap}
{linewidth}
{linecolor}
cut : float
Distance, in units of bandwidth, to extend the density past extreme
datapoints. Set to 0 to limit the violin within the data range.
gridsize : int
Number of points in the discrete grid used to evaluate the KDE.
bw_method : {{"scott", "silverman", float}}
Either the name of a reference rule or the scale factor to use when
computing the kernel bandwidth. The actual kernel size will be
determined by multiplying the scale factor by the standard deviation of
the data within each group.
.. versionadded:: v0.13.0
bw_adjust: float
Factor that scales the bandwidth to use more or less smoothing.
.. versionadded:: v0.13.0
density_norm : {{"area", "count", "width"}}
Method that normalizes each density to determine the violin's width.
If `area`, each violin will have the same area. If `count`, the width
will be proportional to the number of observations. If `width`, each
violin will have the same width.
.. versionadded:: v0.13.0
common_norm : bool
When `True`, normalize the density across all violins.
.. versionadded:: v0.13.0
{hue_norm}
{formatter}
{log_scale}
{native_scale}
{legend}
scale : {{"area", "count", "width"}}
.. deprecated:: v0.13.0
See `density_norm`.
scale_hue : bool
.. deprecated:: v0.13.0
See `common_norm`.
bw : {{'scott', 'silverman', float}}
.. deprecated:: v0.13.0
See `bw_method` and `bw_adjust`.
inner_kws : dict of key, value mappings
Keyword arguments for the "inner" plot, passed to one of:
- :class:`matplotlib.collections.LineCollection` (with `inner="stick"`)
- :meth:`matplotlib.axes.Axes.scatter` (with `inner="point"`)
- :meth:`matplotlib.axes.Axes.plot` (with `inner="quart"` or `inner="box"`)
Additionally, with `inner="box"`, the keywords `box_width`, `whis_width`,
and `marker` receive special handling for the components of the "box" plot.
.. versionadded:: v0.13.0
{ax_in}
kwargs : key, value mappings
Keyword arguments for the violin patches, passsed through to
:meth:`matplotlib.axes.Axes.fill_between`.
Returns
-------
{ax_out}
See Also
--------
{boxplot}
{stripplot}
{swarmplot}
{catplot}
Examples
--------
.. include:: ../docstrings/violinplot.rst
""").format(**_categorical_docs)
def boxenplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
orient=None, color=None, palette=None, saturation=.75, fill=True,
dodge="auto", width=.8, gap=0, linewidth=None, linecolor=None,
width_method="exponential", k_depth="tukey", outlier_prop=0.007, trust_alpha=0.05,
showfliers=True, hue_norm=None, log_scale=None, native_scale=False, formatter=None,
legend="auto", scale=deprecated, box_kws=None, flier_kws=None, line_kws=None,
ax=None, **kwargs,
):
p = _CategoricalPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue),
order=order,
orient=orient,
color=color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if dodge == "auto":
# Needs to be before scale_categorical changes the coordinate series dtype
dodge = p._dodge_needed()
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
# Longer-term deprecations
width_method = p._boxen_scale_backcompat(scale, width_method)
saturation = saturation if fill else 1
p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
color = _default_color(
ax.fill_between, hue, color,
{}, # TODO how to get default color?
# {k: v for k, v in kwargs.items() if k in ["c", "color", "fc", "facecolor"]},
saturation=saturation,
)
linecolor = p._complement_color(linecolor, color, p._hue_map)
p.plot_boxens(
width=width,
dodge=dodge,
gap=gap,
fill=fill,
color=color,
linecolor=linecolor,
linewidth=linewidth,
width_method=width_method,
k_depth=k_depth,
outlier_prop=outlier_prop,
trust_alpha=trust_alpha,
showfliers=showfliers,
box_kws=box_kws,
flier_kws=flier_kws,
line_kws=line_kws,
plot_kws=kwargs,
)
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
boxenplot.__doc__ = dedent("""\
Draw an enhanced box plot for larger datasets.
This style of plot was originally named a "letter value" plot because it
shows a large number of quantiles that are defined as "letter values". It
is similar to a box plot in plotting a nonparametric representation of a
distribution in which all features correspond to actual observations. By
plotting more quantiles, it provides more information about the shape of
the distribution, particularly in the tails.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
{orient}
{color}
{palette}
{saturation}
{fill}
{dodge}
{width}
{gap}
{linewidth}
{linecolor}
width_method : {{"exponential", "linear", "area"}}
Method to use for the width of the letter value boxes:
- `"exponential"`: Represent the corresponding percentile
- `"linear"`: Decrease by a constant amount for each box
- `"area"`: Represent the density of data points in that box
k_depth : {{"tukey", "proportion", "trustworthy", "full"}} or int
The number of levels to compute and draw in each tail:
- `"tukey"`: Use log2(n) - 3 levels, covering similar range as boxplot whiskers
- `"proportion"`: Leave approximately `outlier_prop` fliers
- `"trusthworthy"`: Extend to level with confidence of at least `trust_alpha`
- `"full"`: Use log2(n) + 1 levels and extend to most extreme points
outlier_prop : float
Proportion of data expected to be outliers; used when `k_depth="proportion"`.
trust_alpha : float
Confidence threshold for most extreme level; used when `k_depth="trustworthy"`.
showfliers : bool
If False, suppress the plotting of outliers.
{hue_norm}
{log_scale}
{native_scale}
{formatter}
{legend}
box_kws: dict
Keyword arguments for the box artists; passed to
:class:`matplotlib.patches.Rectangle`.
.. versionadded:: v0.12.0
line_kws: dict
Keyword arguments for the line denoting the median; passed to
:meth:`matplotlib.axes.Axes.plot`.
.. versionadded:: v0.12.0
flier_kws: dict
Keyword arguments for the scatter denoting the outlier observations;
passed to :meth:`matplotlib.axes.Axes.scatter`.
.. versionadded:: v0.12.0
{ax_in}
kwargs : key, value mappings
Other keyword arguments are passed to :class:`matplotlib.patches.Rectangle`,
superceded by those in `box_kws`.
Returns
-------
{ax_out}
See Also
--------
{violinplot}
{boxplot}
{catplot}
Notes
-----
For a more extensive explanation, you can read the paper that introduced the plot:
https://vita.had.co.nz/papers/letter-value-plot.html
Examples
--------
.. include:: ../docstrings/boxenplot.rst
""").format(**_categorical_docs)
def stripplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
jitter=True, dodge=False, orient=None, color=None, palette=None,
size=5, edgecolor=default, linewidth=0,
hue_norm=None, log_scale=None, native_scale=False, formatter=None, legend="auto",
ax=None, **kwargs
):
p = _CategoricalPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue),
order=order,
orient=orient,
color=color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
color = _default_color(ax.scatter, hue, color, kwargs)
edgecolor = p._complement_color(edgecolor, color, p._hue_map)
kwargs.setdefault("zorder", 3)
size = kwargs.get("s", size)
kwargs.update(
s=size ** 2,
edgecolor=edgecolor,
linewidth=linewidth,
)
p.plot_strips(
jitter=jitter,
dodge=dodge,
color=color,
plot_kws=kwargs,
)
# XXX this happens inside a plotting method in the distribution plots
# but maybe it's better out here? Alternatively, we have an open issue
# suggesting that _attach could add default axes labels, which seems smart.
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
stripplot.__doc__ = dedent("""\
Draw a categorical scatterplot using jitter to reduce overplotting.
A strip plot can be drawn on its own, but it is also a good complement
to a box or violin plot in cases where you want to show all observations
along with some representation of the underlying distribution.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
jitter : float, `True`/`1` is special-cased
Amount of jitter (only along the categorical axis) to apply. This
can be useful when you have many points and they overlap, so that
it is easier to see the distribution. You can specify the amount
of jitter (half the width of the uniform random variable support),
or use `True` for a good default.
dodge : bool
When a `hue` variable is assigned, setting this to `True` will
separate the strips for different hue levels along the categorical
axis and narrow the amount of space allotedto each strip. Otherwise,
the points for each level will be plotted in the same strip.
{orient}
{color}
{palette}
size : float
Radius of the markers, in points.
edgecolor : matplotlib color, "gray" is special-cased
Color of the lines around each point. If you pass `"gray"`, the
brightness is determined by the color palette used for the body
of the points. Note that `stripplot` has `linewidth=0` by default,
so edge colors are only visible with nonzero line width.
{linewidth}
{hue_norm}
{log_scale}
{native_scale}
{formatter}
{legend}
{ax_in}
kwargs : key, value mappings
Other keyword arguments are passed through to
:meth:`matplotlib.axes.Axes.scatter`.
Returns
-------
{ax_out}
See Also
--------
{swarmplot}
{boxplot}
{violinplot}
{catplot}
Examples
--------
.. include:: ../docstrings/stripplot.rst
""").format(**_categorical_docs)
def swarmplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
dodge=False, orient=None, color=None, palette=None,
size=5, edgecolor=None, linewidth=0, hue_norm=None, log_scale=None,
native_scale=False, formatter=None, legend="auto", warn_thresh=.05,
ax=None, **kwargs
):
p = _CategoricalPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue),
order=order,
orient=orient,
color=color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
if not p.has_xy_data:
return ax
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
color = _default_color(ax.scatter, hue, color, kwargs)
edgecolor = p._complement_color(edgecolor, color, p._hue_map)
kwargs.setdefault("zorder", 3)
size = kwargs.get("s", size)
if linewidth is None:
linewidth = size / 10
kwargs.update(dict(
s=size ** 2,
edgecolor=edgecolor,
linewidth=linewidth,
))
p.plot_swarms(
dodge=dodge,
color=color,
warn_thresh=warn_thresh,
plot_kws=kwargs,
)
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
swarmplot.__doc__ = dedent("""\
Draw a categorical scatterplot with points adjusted to be non-overlapping.
This function is similar to :func:`stripplot`, but the points are adjusted
(only along the categorical axis) so that they don't overlap. This gives a
better representation of the distribution of values, but it does not scale
well to large numbers of observations. This style of plot is sometimes
called a "beeswarm".
A swarm plot can be drawn on its own, but it is also a good complement
to a box or violin plot in cases where you want to show all observations
along with some representation of the underlying distribution.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
dodge : bool
When a `hue` variable is assigned, setting this to `True` will
separate the swarms for different hue levels along the categorical
axis and narrow the amount of space allotedto each strip. Otherwise,
the points for each level will be plotted in the same swarm.
{orient}
{color}
{palette}
size : float
Radius of the markers, in points.
edgecolor : matplotlib color, "gray" is special-cased
Color of the lines around each point. If you pass `"gray"`, the
brightness is determined by the color palette used for the body
of the points.
{linewidth}
{log_scale}
{native_scale}
{formatter}
{legend}
{ax_in}
kwargs : key, value mappings
Other keyword arguments are passed through to
:meth:`matplotlib.axes.Axes.scatter`.
Returns
-------
{ax_out}
See Also
--------
{boxplot}
{violinplot}
{stripplot}
{catplot}
Examples
--------
.. include:: ../docstrings/swarmplot.rst
""").format(**_categorical_docs)
def barplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
estimator="mean", errorbar=("ci", 95), n_boot=1000, seed=None, units=None,
weights=None, orient=None, color=None, palette=None, saturation=.75,
fill=True, hue_norm=None, width=.8, dodge="auto", gap=0, log_scale=None,
native_scale=False, formatter=None, legend="auto", capsize=0, err_kws=None,
ci=deprecated, errcolor=deprecated, errwidth=deprecated, ax=None, **kwargs,
):
errorbar = utils._deprecate_ci(errorbar, ci)
# Be backwards compatible with len passed directly, which
# does not work in Series.agg (maybe a pandas bug?)
if estimator is len:
estimator = "size"
p = _CategoricalAggPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue, units=units, weight=weights),
order=order,
orient=orient,
color=color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if dodge == "auto":
# Needs to be before scale_categorical changes the coordinate series dtype
dodge = p._dodge_needed()
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
saturation = saturation if fill else 1
p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
color = _default_color(ax.bar, hue, color, kwargs, saturation=saturation)
agg_cls = WeightedAggregator if "weight" in p.plot_data else EstimateAggregator
aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed)
err_kws = {} if err_kws is None else normalize_kwargs(err_kws, mpl.lines.Line2D)
# Deprecations to remove in v0.15.0.
err_kws, capsize = p._err_kws_backcompat(err_kws, errcolor, errwidth, capsize)
p.plot_bars(
aggregator=aggregator,
dodge=dodge,
width=width,
gap=gap,
color=color,
fill=fill,
capsize=capsize,
err_kws=err_kws,
plot_kws=kwargs,
)
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
barplot.__doc__ = dedent("""\
Show point estimates and errors as rectangular bars.
A bar plot represents an aggregate or statistical estimate for a numeric
variable with the height of each rectangle and indicates the uncertainty
around that estimate using an error bar. Bar plots include 0 in the
axis range, and they are a good choice when 0 is a meaningful value
for the variable to take.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
{stat_api_params}
{orient}
{color}
{palette}
{saturation}
{fill}
{hue_norm}
{width}
{dodge}
{gap}
{log_scale}
{native_scale}
{formatter}
{legend}
{capsize}
{err_kws}
{ci}
{errcolor}
{errwidth}
{ax_in}
kwargs : key, value mappings
Other parameters are passed through to :class:`matplotlib.patches.Rectangle`.
Returns
-------
{ax_out}
See Also
--------
{countplot}
{pointplot}
{catplot}
Notes
-----
For datasets where 0 is not a meaningful value, a :func:`pointplot` will
allow you to focus on differences between levels of one or more categorical
variables.
It is also important to keep in mind that a bar plot shows only the mean (or
other aggregate) value, but it is often more informative to show the
distribution of values at each level of the categorical variables. In those
cases, approaches such as a :func:`boxplot` or :func:`violinplot` may be
more appropriate.
Examples
--------
.. include:: ../docstrings/barplot.rst
""").format(**_categorical_docs)
def pointplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
estimator="mean", errorbar=("ci", 95), n_boot=1000, seed=None, units=None,
weights=None, color=None, palette=None, hue_norm=None, markers=default,
linestyles=default, dodge=False, log_scale=None, native_scale=False,
orient=None, capsize=0, formatter=None, legend="auto", err_kws=None,
ci=deprecated, errwidth=deprecated, join=deprecated, scale=deprecated,
ax=None, **kwargs,
):
errorbar = utils._deprecate_ci(errorbar, ci)
p = _CategoricalAggPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue, units=units, weight=weights),
order=order,
orient=orient,
# Handle special backwards compatibility where pointplot originally
# did *not* default to multi-colored unless a palette was specified.
color="C0" if (color is None and palette is None) else color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
p.map_hue(palette=palette, order=hue_order, norm=hue_norm)
color = _default_color(ax.plot, hue, color, kwargs)
agg_cls = WeightedAggregator if "weight" in p.plot_data else EstimateAggregator
aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed)
err_kws = {} if err_kws is None else normalize_kwargs(err_kws, mpl.lines.Line2D)
# Deprecations to remove in v0.15.0.
p._point_kwargs_backcompat(scale, join, kwargs)
err_kws, capsize = p._err_kws_backcompat(err_kws, None, errwidth, capsize)
p.plot_points(
aggregator=aggregator,
markers=markers,
linestyles=linestyles,
dodge=dodge,
color=color,
capsize=capsize,
err_kws=err_kws,
plot_kws=kwargs,
)
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
pointplot.__doc__ = dedent("""\
Show point estimates and errors using lines with markers.
A point plot represents an estimate of central tendency for a numeric
variable by the position of the dot and provides some indication of the
uncertainty around that estimate using error bars.
Point plots can be more useful than bar plots for focusing comparisons
between different levels of one or more categorical variables. They are
particularly adept at showing interactions: how the relationship between
levels of one categorical variable changes across levels of a second
categorical variable. The lines that join each point from the same `hue`
level allow interactions to be judged by differences in slope, which is
easier for the eyes than comparing the heights of several groups of points
or bars.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
{stat_api_params}
{color}
{palette}
markers : string or list of strings
Markers to use for each of the `hue` levels.
linestyles : string or list of strings
Line styles to use for each of the `hue` levels.
dodge : bool or float
Amount to separate the points for each level of the `hue` variable along
the categorical axis. Setting to `True` will apply a small default.
{log_scale}
{native_scale}
{orient}
{capsize}
{formatter}
{legend}
{err_kws}
{ci}
{errwidth}
join : bool
If `True`, connect point estimates with a line.
.. deprecated:: v0.13.0
Set `linestyle="none"` to remove the lines between the points.
scale : float
Scale factor for the plot elements.
.. deprecated:: v0.13.0
Control element sizes with :class:`matplotlib.lines.Line2D` parameters.
{ax_in}
kwargs : key, value mappings
Other parameters are passed through to :class:`matplotlib.lines.Line2D`.
.. versionadded:: v0.13.0
Returns
-------
{ax_out}
See Also
--------
{barplot}
{catplot}
Notes
-----
It is important to keep in mind that a point plot shows only the mean (or
other estimator) value, but in many cases it may be more informative to
show the distribution of values at each level of the categorical variables.
In that case, other approaches such as a box or violin plot may be more
appropriate.
Examples
--------
.. include:: ../docstrings/pointplot.rst
""").format(**_categorical_docs)
def countplot(
data=None, *, x=None, y=None, hue=None, order=None, hue_order=None,
orient=None, color=None, palette=None, saturation=.75, fill=True, hue_norm=None,
stat="count", width=.8, dodge="auto", gap=0, log_scale=None, native_scale=False,
formatter=None, legend="auto", ax=None, **kwargs
):
if x is None and y is not None:
orient = "y"
x = 1 if list(y) else None
elif x is not None and y is None:
orient = "x"
y = 1 if list(x) else None
elif x is not None and y is not None:
raise TypeError("Cannot pass values for both `x` and `y`.")
p = _CategoricalAggPlotter(
data=data,
variables=dict(x=x, y=y, hue=hue),
order=order,
orient=orient,
color=color,
legend=legend,
)
if ax is None:
ax = plt.gca()
if p.plot_data.empty:
return ax
if dodge == "auto":
# Needs to be before scale_categorical changes the coordinate series dtype
dodge = p._dodge_needed()
if p.var_types.get(p.orient) == "categorical" or not native_scale:
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(ax, log_scale=log_scale)
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
saturation = saturation if fill else 1
p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
color = _default_color(ax.bar, hue, color, kwargs, saturation)
count_axis = {"x": "y", "y": "x"}[p.orient]
if p.input_format == "wide":
p.plot_data[count_axis] = 1
_check_argument("stat", ["count", "percent", "probability", "proportion"], stat)
p.variables[count_axis] = stat
if stat != "count":
denom = 100 if stat == "percent" else 1
p.plot_data[count_axis] /= len(p.plot_data) / denom
aggregator = EstimateAggregator("sum", errorbar=None)
p.plot_bars(
aggregator=aggregator,
dodge=dodge,
width=width,
gap=gap,
color=color,
fill=fill,
capsize=0,
err_kws={},
plot_kws=kwargs,
)
p._add_axis_labels(ax)
p._adjust_cat_axis(ax, axis=p.orient)
return ax
countplot.__doc__ = dedent("""\
Show the counts of observations in each categorical bin using bars.
A count plot can be thought of as a histogram across a categorical, instead
of quantitative, variable. The basic API and options are identical to those
for :func:`barplot`, so you can compare counts across nested variables.
Note that :func:`histplot` function offers similar functionality with additional
features (e.g. bar stacking), although its default behavior is somewhat different.
{categorical_narrative}
Parameters
----------
{categorical_data}
{input_params}
{order_vars}
{orient}
{color}
{palette}
{saturation}
{hue_norm}
stat : {{'count', 'percent', 'proportion', 'probability'}}
Statistic to compute; when not `'count'`, bar heights will be normalized so that
they sum to 100 (for `'percent'`) or 1 (otherwise) across the plot.
.. versionadded:: v0.13.0
{width}
{dodge}
{log_scale}
{native_scale}
{formatter}
{legend}
{ax_in}
kwargs : key, value mappings
Other parameters are passed through to :class:`matplotlib.patches.Rectangle`.
Returns
-------
{ax_out}
See Also
--------
histplot : Bin and count observations with additional options.
{barplot}
{catplot}
Examples
--------
.. include:: ../docstrings/countplot.rst
""").format(**_categorical_docs)
def catplot(
data=None, *, x=None, y=None, hue=None, row=None, col=None, kind="strip",
estimator="mean", errorbar=("ci", 95), n_boot=1000, seed=None, units=None,
weights=None, order=None, hue_order=None, row_order=None, col_order=None,
col_wrap=None, height=5, aspect=1, log_scale=None, native_scale=False,
formatter=None, orient=None, color=None, palette=None, hue_norm=None,
legend="auto", legend_out=True, sharex=True, sharey=True,
margin_titles=False, facet_kws=None, ci=deprecated, **kwargs
):
# Check for attempt to plot onto specific axes and warn
if "ax" in kwargs:
msg = ("catplot is a figure-level function and does not accept "
f"target axes. You may wish to try {kind}plot")
warnings.warn(msg, UserWarning)
kwargs.pop("ax")
desaturated_kinds = ["bar", "count", "box", "violin", "boxen"]
undodged_kinds = ["strip", "swarm", "point"]
if kind in ["bar", "point", "count"]:
Plotter = _CategoricalAggPlotter
else:
Plotter = _CategoricalPlotter
if kind == "count":
if x is None and y is not None:
orient = "y"
x = 1
elif x is not None and y is None:
orient = "x"
y = 1
elif x is not None and y is not None:
raise ValueError("Cannot pass values for both `x` and `y`.")
p = Plotter(
data=data,
variables=dict(
x=x, y=y, hue=hue, row=row, col=col, units=units, weight=weights
),
order=order,
orient=orient,
# Handle special backwards compatibility where pointplot originally
# did *not* default to multi-colored unless a palette was specified.
color="C0" if kind == "point" and palette is None and color is None else color,
legend=legend,
)
for var in ["row", "col"]:
# Handle faceting variables that lack name information
if var in p.variables and p.variables[var] is None:
p.variables[var] = f"_{var}_"
# Adapt the plot_data dataframe for use with FacetGrid
facet_data = p.plot_data.rename(columns=p.variables)
facet_data = facet_data.loc[:, ~facet_data.columns.duplicated()]
col_name = p.variables.get("col", None)
row_name = p.variables.get("row", None)
if facet_kws is None:
facet_kws = {}
g = FacetGrid(
data=facet_data, row=row_name, col=col_name, col_wrap=col_wrap,
row_order=row_order, col_order=col_order, sharex=sharex, sharey=sharey,
legend_out=legend_out, margin_titles=margin_titles,
height=height, aspect=aspect,
**facet_kws,
)
# Capture this here because scale_categorical is going to insert a (null)
# x variable even if it is empty. It's not clear whether that needs to
# happen or if disabling that is the cleaner solution.
has_xy_data = p.has_xy_data
if not native_scale or p.var_types[p.orient] == "categorical":
p.scale_categorical(p.orient, order=order, formatter=formatter)
p._attach(g, log_scale=log_scale)
if not has_xy_data:
return g
# Deprecations to remove in v0.14.0.
hue_order = p._palette_without_hue_backcompat(palette, hue_order)
palette, hue_order = p._hue_backcompat(color, palette, hue_order)
# Othe deprecations
errorbar = utils._deprecate_ci(errorbar, ci)
saturation = kwargs.pop(
"saturation",
0.75 if kind in desaturated_kinds and kwargs.get("fill", True) else 1
)
p.map_hue(palette=palette, order=hue_order, norm=hue_norm, saturation=saturation)
# Set a default color
# Otherwise each artist will be plotted separately and trip the color cycle
if hue is None:
color = "C0" if color is None else color
if saturation < 1:
color = desaturate(color, saturation)
if kind in ["strip", "swarm"]:
kwargs = normalize_kwargs(kwargs, mpl.collections.PathCollection)
kwargs["edgecolor"] = p._complement_color(
kwargs.pop("edgecolor", default), color, p._hue_map
)
width = kwargs.pop("width", 0.8)
dodge = kwargs.pop("dodge", False if kind in undodged_kinds else "auto")
if dodge == "auto":
dodge = p._dodge_needed()
if "weight" in p.plot_data:
if kind not in ["bar", "point"]:
msg = f"The `weights` parameter has no effect with kind={kind!r}."
warnings.warn(msg, stacklevel=2)
agg_cls = WeightedAggregator
else:
agg_cls = EstimateAggregator
if kind == "strip":
jitter = kwargs.pop("jitter", True)
plot_kws = kwargs.copy()
plot_kws.setdefault("zorder", 3)
plot_kws.setdefault("linewidth", 0)
if "s" not in plot_kws:
plot_kws["s"] = plot_kws.pop("size", 5) ** 2
p.plot_strips(
jitter=jitter,
dodge=dodge,
color=color,
plot_kws=plot_kws,
)
elif kind == "swarm":
warn_thresh = kwargs.pop("warn_thresh", .05)
plot_kws = kwargs.copy()
plot_kws.setdefault("zorder", 3)
if "s" not in plot_kws:
plot_kws["s"] = plot_kws.pop("size", 5) ** 2
if plot_kws.setdefault("linewidth", 0) is None:
plot_kws["linewidth"] = np.sqrt(plot_kws["s"]) / 10
p.plot_swarms(
dodge=dodge,
color=color,
warn_thresh=warn_thresh,
plot_kws=plot_kws,
)
elif kind == "box":
plot_kws = kwargs.copy()
gap = plot_kws.pop("gap", 0)
fill = plot_kws.pop("fill", True)
whis = plot_kws.pop("whis", 1.5)
linewidth = plot_kws.pop("linewidth", None)
fliersize = plot_kws.pop("fliersize", 5)
linecolor = p._complement_color(
plot_kws.pop("linecolor", "auto"), color, p._hue_map
)
p.plot_boxes(
width=width,
dodge=dodge,
gap=gap,
fill=fill,
whis=whis,
color=color,
linecolor=linecolor,
linewidth=linewidth,
fliersize=fliersize,
plot_kws=plot_kws,
)
elif kind == "violin":
plot_kws = kwargs.copy()
gap = plot_kws.pop("gap", 0)
fill = plot_kws.pop("fill", True)
split = plot_kws.pop("split", False)
inner = plot_kws.pop("inner", "box")
density_norm = plot_kws.pop("density_norm", "area")
common_norm = plot_kws.pop("common_norm", False)
scale = plot_kws.pop("scale", deprecated)
scale_hue = plot_kws.pop("scale_hue", deprecated)
density_norm, common_norm = p._violin_scale_backcompat(
scale, scale_hue, density_norm, common_norm,
)
bw_method = p._violin_bw_backcompat(
plot_kws.pop("bw", deprecated), plot_kws.pop("bw_method", "scott")
)
kde_kws = dict(
cut=plot_kws.pop("cut", 2),
gridsize=plot_kws.pop("gridsize", 100),
bw_adjust=plot_kws.pop("bw_adjust", 1),
bw_method=bw_method,
)
inner_kws = plot_kws.pop("inner_kws", {}).copy()
linewidth = plot_kws.pop("linewidth", None)
linecolor = plot_kws.pop("linecolor", "auto")
linecolor = p._complement_color(linecolor, color, p._hue_map)
p.plot_violins(
width=width,
dodge=dodge,
gap=gap,
split=split,
color=color,
fill=fill,
linecolor=linecolor,
linewidth=linewidth,
inner=inner,
density_norm=density_norm,
common_norm=common_norm,
kde_kws=kde_kws,
inner_kws=inner_kws,
plot_kws=plot_kws,
)
elif kind == "boxen":
plot_kws = kwargs.copy()
gap = plot_kws.pop("gap", 0)
fill = plot_kws.pop("fill", True)
linecolor = plot_kws.pop("linecolor", "auto")
linewidth = plot_kws.pop("linewidth", None)
k_depth = plot_kws.pop("k_depth", "tukey")
width_method = plot_kws.pop("width_method", "exponential")
outlier_prop = plot_kws.pop("outlier_prop", 0.007)
trust_alpha = plot_kws.pop("trust_alpha", 0.05)
showfliers = plot_kws.pop("showfliers", True)
box_kws = plot_kws.pop("box_kws", {})
flier_kws = plot_kws.pop("flier_kws", {})
line_kws = plot_kws.pop("line_kws", {})
if "scale" in plot_kws:
width_method = p._boxen_scale_backcompat(
plot_kws["scale"], width_method
)
linecolor = p._complement_color(linecolor, color, p._hue_map)
p.plot_boxens(
width=width,
dodge=dodge,
gap=gap,
fill=fill,
color=color,
linecolor=linecolor,
linewidth=linewidth,
width_method=width_method,
k_depth=k_depth,
outlier_prop=outlier_prop,
trust_alpha=trust_alpha,
showfliers=showfliers,
box_kws=box_kws,
flier_kws=flier_kws,
line_kws=line_kws,
plot_kws=plot_kws,
)
elif kind == "point":
aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed)
markers = kwargs.pop("markers", default)
linestyles = kwargs.pop("linestyles", default)
# Deprecations to remove in v0.15.0.
# TODO Uncomment when removing deprecation backcompat
# capsize = kwargs.pop("capsize", 0)
# err_kws = normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D)
p._point_kwargs_backcompat(
kwargs.pop("scale", deprecated),
kwargs.pop("join", deprecated),
kwargs
)
err_kws, capsize = p._err_kws_backcompat(
normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D),
None,
errwidth=kwargs.pop("errwidth", deprecated),
capsize=kwargs.pop("capsize", 0),
)
p.plot_points(
aggregator=aggregator,
markers=markers,
linestyles=linestyles,
dodge=dodge,
color=color,
capsize=capsize,
err_kws=err_kws,
plot_kws=kwargs,
)
elif kind == "bar":
aggregator = agg_cls(estimator, errorbar, n_boot=n_boot, seed=seed)
err_kws, capsize = p._err_kws_backcompat(
normalize_kwargs(kwargs.pop("err_kws", {}), mpl.lines.Line2D),
errcolor=kwargs.pop("errcolor", deprecated),
errwidth=kwargs.pop("errwidth", deprecated),
capsize=kwargs.pop("capsize", 0),
)
gap = kwargs.pop("gap", 0)
fill = kwargs.pop("fill", True)
p.plot_bars(
aggregator=aggregator,
dodge=dodge,
width=width,
gap=gap,
color=color,
fill=fill,
capsize=capsize,
err_kws=err_kws,
plot_kws=kwargs,
)
elif kind == "count":
aggregator = EstimateAggregator("sum", errorbar=None)
count_axis = {"x": "y", "y": "x"}[p.orient]
p.plot_data[count_axis] = 1
stat_options = ["count", "percent", "probability", "proportion"]
stat = _check_argument("stat", stat_options, kwargs.pop("stat", "count"))
p.variables[count_axis] = stat
if stat != "count":
denom = 100 if stat == "percent" else 1
p.plot_data[count_axis] /= len(p.plot_data) / denom
gap = kwargs.pop("gap", 0)
fill = kwargs.pop("fill", True)
p.plot_bars(
aggregator=aggregator,
dodge=dodge,
width=width,
gap=gap,
color=color,
fill=fill,
capsize=0,
err_kws={},
plot_kws=kwargs,
)
else:
msg = (
f"Invalid `kind`: {kind!r}. Options are 'strip', 'swarm', "
"'box', 'boxen', 'violin', 'bar', 'count', and 'point'."
)
raise ValueError(msg)
for ax in g.axes.flat:
p._adjust_cat_axis(ax, axis=p.orient)
g.set_axis_labels(p.variables.get("x"), p.variables.get("y"))
g.set_titles()
g.tight_layout()
for ax in g.axes.flat:
g._update_legend_data(ax)
ax.legend_ = None
if legend == "auto":
show_legend = not p._redundant_hue and p.input_format != "wide"
else:
show_legend = bool(legend)
if show_legend:
g.add_legend(title=p.variables.get("hue"), label_order=hue_order)
if data is not None:
# Replace the dataframe on the FacetGrid for any subsequent maps
g.data = data
return g
catplot.__doc__ = dedent("""\
Figure-level interface for drawing categorical plots onto a FacetGrid.
This function provides access to several axes-level functions that
show the relationship between a numerical and one or more categorical
variables using one of several visual representations. The `kind`
parameter selects the underlying axes-level function to use.
Categorical scatterplots:
- :func:`stripplot` (with `kind="strip"`; the default)
- :func:`swarmplot` (with `kind="swarm"`)
Categorical distribution plots:
- :func:`boxplot` (with `kind="box"`)
- :func:`violinplot` (with `kind="violin"`)
- :func:`boxenplot` (with `kind="boxen"`)
Categorical estimate plots:
- :func:`pointplot` (with `kind="point"`)
- :func:`barplot` (with `kind="bar"`)
- :func:`countplot` (with `kind="count"`)
Extra keyword arguments are passed to the underlying function, so you
should refer to the documentation for each to see kind-specific options.
{categorical_narrative}
After plotting, the :class:`FacetGrid` with the plot is returned and can
be used directly to tweak supporting plot details or add other layers.
Parameters
----------
{categorical_data}
{input_params}
row, col : names of variables in `data` or vector data
Categorical variables that will determine the faceting of the grid.
kind : str
The kind of plot to draw, corresponds to the name of a categorical
axes-level plotting function. Options are: "strip", "swarm", "box", "violin",
"boxen", "point", "bar", or "count".
{stat_api_params}
{order_vars}
row_order, col_order : lists of strings
Order to organize the rows and/or columns of the grid in; otherwise the
orders are inferred from the data objects.
{col_wrap}
{height}
{aspect}
{native_scale}
{formatter}
{orient}
{color}
{palette}
{hue_norm}
{legend}
{legend_out}
{share_xy}
{margin_titles}
facet_kws : dict
Dictionary of other keyword arguments to pass to :class:`FacetGrid`.
kwargs : key, value pairings
Other keyword arguments are passed through to the underlying plotting
function.
Returns
-------
:class:`FacetGrid`
Returns the :class:`FacetGrid` object with the plot on it for further
tweaking.
Examples
--------
.. include:: ../docstrings/catplot.rst
""").format(**_categorical_docs)
class Beeswarm:
"""Modifies a scatterplot artist to show a beeswarm plot."""
def __init__(self, orient="x", width=0.8, warn_thresh=.05):
self.orient = orient
self.width = width
self.warn_thresh = warn_thresh
def __call__(self, points, center):
"""Swarm `points`, a PathCollection, around the `center` position."""
# Convert from point size (area) to diameter
ax = points.axes
dpi = ax.figure.dpi
# Get the original positions of the points
orig_xy_data = points.get_offsets()
# Reset the categorical positions to the center line
cat_idx = 1 if self.orient == "y" else 0
orig_xy_data[:, cat_idx] = center
# Transform the data coordinates to point coordinates.
# We'll figure out the swarm positions in the latter
# and then convert back to data coordinates and replot
orig_x_data, orig_y_data = orig_xy_data.T
orig_xy = ax.transData.transform(orig_xy_data)
# Order the variables so that x is the categorical axis
if self.orient == "y":
orig_xy = orig_xy[:, [1, 0]]
# Add a column with each point's radius
sizes = points.get_sizes()
if sizes.size == 1:
sizes = np.repeat(sizes, orig_xy.shape[0])
edge = points.get_linewidth().item()
radii = (np.sqrt(sizes) + edge) / 2 * (dpi / 72)
orig_xy = np.c_[orig_xy, radii]
# Sort along the value axis to facilitate the beeswarm
sorter = np.argsort(orig_xy[:, 1])
orig_xyr = orig_xy[sorter]
# Adjust points along the categorical axis to prevent overlaps
new_xyr = np.empty_like(orig_xyr)
new_xyr[sorter] = self.beeswarm(orig_xyr)
# Transform the point coordinates back to data coordinates
if self.orient == "y":
new_xy = new_xyr[:, [1, 0]]
else:
new_xy = new_xyr[:, :2]
new_x_data, new_y_data = ax.transData.inverted().transform(new_xy).T
# Add gutters
t_fwd, t_inv = _get_transform_functions(ax, self.orient)
if self.orient == "y":
self.add_gutters(new_y_data, center, t_fwd, t_inv)
else:
self.add_gutters(new_x_data, center, t_fwd, t_inv)
# Reposition the points so they do not overlap
if self.orient == "y":
points.set_offsets(np.c_[orig_x_data, new_y_data])
else:
points.set_offsets(np.c_[new_x_data, orig_y_data])
def beeswarm(self, orig_xyr):
"""Adjust x position of points to avoid overlaps."""
# In this method, `x` is always the categorical axis
# Center of the swarm, in point coordinates
midline = orig_xyr[0, 0]
# Start the swarm with the first point
swarm = np.atleast_2d(orig_xyr[0])
# Loop over the remaining points
for xyr_i in orig_xyr[1:]:
# Find the points in the swarm that could possibly
# overlap with the point we are currently placing
neighbors = self.could_overlap(xyr_i, swarm)
# Find positions that would be valid individually
# with respect to each of the swarm neighbors
candidates = self.position_candidates(xyr_i, neighbors)
# Sort candidates by their centrality
offsets = np.abs(candidates[:, 0] - midline)
candidates = candidates[np.argsort(offsets)]
# Find the first candidate that does not overlap any neighbors
new_xyr_i = self.first_non_overlapping_candidate(candidates, neighbors)
# Place it into the swarm
swarm = np.vstack([swarm, new_xyr_i])
return swarm
def could_overlap(self, xyr_i, swarm):
"""Return a list of all swarm points that could overlap with target."""
# Because we work backwards through the swarm and can short-circuit,
# the for-loop is faster than vectorization
_, y_i, r_i = xyr_i
neighbors = []
for xyr_j in reversed(swarm):
_, y_j, r_j = xyr_j
if (y_i - y_j) < (r_i + r_j):
neighbors.append(xyr_j)
else:
break
return np.array(neighbors)[::-1]
def position_candidates(self, xyr_i, neighbors):
"""Return a list of coordinates that might be valid by adjusting x."""
candidates = [xyr_i]
x_i, y_i, r_i = xyr_i
left_first = True
for x_j, y_j, r_j in neighbors:
dy = y_i - y_j
dx = np.sqrt(max((r_i + r_j) ** 2 - dy ** 2, 0)) * 1.05
cl, cr = (x_j - dx, y_i, r_i), (x_j + dx, y_i, r_i)
if left_first:
new_candidates = [cl, cr]
else:
new_candidates = [cr, cl]
candidates.extend(new_candidates)
left_first = not left_first
return np.array(candidates)
def first_non_overlapping_candidate(self, candidates, neighbors):
"""Find the first candidate that does not overlap with the swarm."""
# If we have no neighbors, all candidates are good.
if len(neighbors) == 0:
return candidates[0]
neighbors_x = neighbors[:, 0]
neighbors_y = neighbors[:, 1]
neighbors_r = neighbors[:, 2]
for xyr_i in candidates:
x_i, y_i, r_i = xyr_i
dx = neighbors_x - x_i
dy = neighbors_y - y_i
sq_distances = np.square(dx) + np.square(dy)
sep_needed = np.square(neighbors_r + r_i)
# Good candidate does not overlap any of neighbors which means that
# squared distance between candidate and any of the neighbors has
# to be at least square of the summed radii
good_candidate = np.all(sq_distances >= sep_needed)
if good_candidate:
return xyr_i
raise RuntimeError(
"No non-overlapping candidates found. This should not happen."
)
def add_gutters(self, points, center, trans_fwd, trans_inv):
"""Stop points from extending beyond their territory."""
half_width = self.width / 2
low_gutter = trans_inv(trans_fwd(center) - half_width)
off_low = points < low_gutter
if off_low.any():
points[off_low] = low_gutter
high_gutter = trans_inv(trans_fwd(center) + half_width)
off_high = points > high_gutter
if off_high.any():
points[off_high] = high_gutter
gutter_prop = (off_high + off_low).sum() / len(points)
if gutter_prop > self.warn_thresh:
msg = (
"{:.1%} of the points cannot be placed; you may want "
"to decrease the size of the markers or use stripplot."
).format(gutter_prop)
warnings.warn(msg, UserWarning)
return points
BoxPlotArtists = namedtuple("BoxPlotArtists", "box median whiskers caps fliers mean")
class BoxPlotContainer:
def __init__(self, artist_dict):
self.boxes = artist_dict["boxes"]
self.medians = artist_dict["medians"]
self.whiskers = artist_dict["whiskers"]
self.caps = artist_dict["caps"]
self.fliers = artist_dict["fliers"]
self.means = artist_dict["means"]
self._label = None
self._children = [
*self.boxes,
*self.medians,
*self.whiskers,
*self.caps,
*self.fliers,
*self.means,
]
def __repr__(self):
return f"<BoxPlotContainer object with {len(self.boxes)} boxes>"
def __getitem__(self, idx):
pair_slice = slice(2 * idx, 2 * idx + 2)
return BoxPlotArtists(
self.boxes[idx] if self.boxes else [],
self.medians[idx] if self.medians else [],
self.whiskers[pair_slice] if self.whiskers else [],
self.caps[pair_slice] if self.caps else [],
self.fliers[idx] if self.fliers else [],
self.means[idx]if self.means else [],
)
def __iter__(self):
yield from (self[i] for i in range(len(self.boxes)))
def get_label(self):
return self._label
def set_label(self, value):
self._label = value
def get_children(self):
return self._children
def remove(self):
for child in self._children:
child.remove()