717 lines
22 KiB
Python
717 lines
22 KiB
Python
|
# Only tests that raise an error and have no better location should go here.
|
||
|
# Tests for specific groupby methods should go in their respective
|
||
|
# test file.
|
||
|
|
||
|
import datetime
|
||
|
import re
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
from pandas import (
|
||
|
Categorical,
|
||
|
DataFrame,
|
||
|
Grouper,
|
||
|
Series,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
from pandas.tests.groupby import get_groupby_method_args
|
||
|
|
||
|
|
||
|
@pytest.fixture(
|
||
|
params=[
|
||
|
"a",
|
||
|
["a"],
|
||
|
["a", "b"],
|
||
|
Grouper(key="a"),
|
||
|
lambda x: x % 2,
|
||
|
[0, 0, 0, 1, 2, 2, 2, 3, 3],
|
||
|
np.array([0, 0, 0, 1, 2, 2, 2, 3, 3]),
|
||
|
dict(zip(range(9), [0, 0, 0, 1, 2, 2, 2, 3, 3])),
|
||
|
Series([1, 1, 1, 1, 1, 2, 2, 2, 2]),
|
||
|
[Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), Series([3, 3, 4, 4, 4, 4, 4, 3, 3])],
|
||
|
]
|
||
|
)
|
||
|
def by(request):
|
||
|
return request.param
|
||
|
|
||
|
|
||
|
@pytest.fixture(params=[True, False])
|
||
|
def groupby_series(request):
|
||
|
return request.param
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def df_with_string_col():
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
|
||
|
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
|
||
|
"c": range(9),
|
||
|
"d": list("xyzwtyuio"),
|
||
|
}
|
||
|
)
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def df_with_datetime_col():
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
|
||
|
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
|
||
|
"c": range(9),
|
||
|
"d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
|
||
|
}
|
||
|
)
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def df_with_timedelta_col():
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
|
||
|
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
|
||
|
"c": range(9),
|
||
|
"d": datetime.timedelta(days=1),
|
||
|
}
|
||
|
)
|
||
|
return df
|
||
|
|
||
|
|
||
|
@pytest.fixture
|
||
|
def df_with_cat_col():
|
||
|
df = DataFrame(
|
||
|
{
|
||
|
"a": [1, 1, 1, 1, 1, 2, 2, 2, 2],
|
||
|
"b": [3, 3, 4, 4, 4, 4, 4, 3, 3],
|
||
|
"c": range(9),
|
||
|
"d": Categorical(
|
||
|
["a", "a", "a", "a", "b", "b", "b", "b", "c"],
|
||
|
categories=["a", "b", "c", "d"],
|
||
|
ordered=True,
|
||
|
),
|
||
|
}
|
||
|
)
|
||
|
return df
|
||
|
|
||
|
|
||
|
def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
|
||
|
warn_klass = None if warn_msg == "" else FutureWarning
|
||
|
with tm.assert_produces_warning(warn_klass, match=warn_msg):
|
||
|
if klass is None:
|
||
|
if how == "method":
|
||
|
getattr(gb, groupby_func)(*args)
|
||
|
elif how == "agg":
|
||
|
gb.agg(groupby_func, *args)
|
||
|
else:
|
||
|
gb.transform(groupby_func, *args)
|
||
|
else:
|
||
|
with pytest.raises(klass, match=msg):
|
||
|
if how == "method":
|
||
|
getattr(gb, groupby_func)(*args)
|
||
|
elif how == "agg":
|
||
|
gb.agg(groupby_func, *args)
|
||
|
else:
|
||
|
gb.transform(groupby_func, *args)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
|
||
|
def test_groupby_raises_string(
|
||
|
how, by, groupby_series, groupby_func, df_with_string_col
|
||
|
):
|
||
|
df = df_with_string_col
|
||
|
args = get_groupby_method_args(groupby_func, df)
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
if groupby_func == "corrwith":
|
||
|
assert not hasattr(gb, "corrwith")
|
||
|
return
|
||
|
|
||
|
klass, msg = {
|
||
|
"all": (None, ""),
|
||
|
"any": (None, ""),
|
||
|
"bfill": (None, ""),
|
||
|
"corrwith": (TypeError, "Could not convert"),
|
||
|
"count": (None, ""),
|
||
|
"cumcount": (None, ""),
|
||
|
"cummax": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(function|cummax) is not (implemented|supported) for (this|object) dtype",
|
||
|
),
|
||
|
"cummin": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(function|cummin) is not (implemented|supported) for (this|object) dtype",
|
||
|
),
|
||
|
"cumprod": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(function|cumprod) is not (implemented|supported) for (this|object) dtype",
|
||
|
),
|
||
|
"cumsum": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(function|cumsum) is not (implemented|supported) for (this|object) dtype",
|
||
|
),
|
||
|
"diff": (TypeError, "unsupported operand type"),
|
||
|
"ffill": (None, ""),
|
||
|
"fillna": (None, ""),
|
||
|
"first": (None, ""),
|
||
|
"idxmax": (None, ""),
|
||
|
"idxmin": (None, ""),
|
||
|
"last": (None, ""),
|
||
|
"max": (None, ""),
|
||
|
"mean": (
|
||
|
TypeError,
|
||
|
re.escape("agg function failed [how->mean,dtype->object]"),
|
||
|
),
|
||
|
"median": (
|
||
|
TypeError,
|
||
|
re.escape("agg function failed [how->median,dtype->object]"),
|
||
|
),
|
||
|
"min": (None, ""),
|
||
|
"ngroup": (None, ""),
|
||
|
"nunique": (None, ""),
|
||
|
"pct_change": (TypeError, "unsupported operand type"),
|
||
|
"prod": (
|
||
|
TypeError,
|
||
|
re.escape("agg function failed [how->prod,dtype->object]"),
|
||
|
),
|
||
|
"quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
|
||
|
"rank": (None, ""),
|
||
|
"sem": (ValueError, "could not convert string to float"),
|
||
|
"shift": (None, ""),
|
||
|
"size": (None, ""),
|
||
|
"skew": (ValueError, "could not convert string to float"),
|
||
|
"std": (ValueError, "could not convert string to float"),
|
||
|
"sum": (None, ""),
|
||
|
"var": (
|
||
|
TypeError,
|
||
|
re.escape("agg function failed [how->var,dtype->"),
|
||
|
),
|
||
|
}[groupby_func]
|
||
|
|
||
|
if groupby_func == "fillna":
|
||
|
kind = "Series" if groupby_series else "DataFrame"
|
||
|
warn_msg = f"{kind}GroupBy.fillna is deprecated"
|
||
|
else:
|
||
|
warn_msg = ""
|
||
|
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["agg", "transform"])
|
||
|
def test_groupby_raises_string_udf(how, by, groupby_series, df_with_string_col):
|
||
|
df = df_with_string_col
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
def func(x):
|
||
|
raise TypeError("Test error message")
|
||
|
|
||
|
with pytest.raises(TypeError, match="Test error message"):
|
||
|
getattr(gb, how)(func)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["agg", "transform"])
|
||
|
@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
|
||
|
def test_groupby_raises_string_np(
|
||
|
how, by, groupby_series, groupby_func_np, df_with_string_col
|
||
|
):
|
||
|
# GH#50749
|
||
|
df = df_with_string_col
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
klass, msg = {
|
||
|
np.sum: (None, ""),
|
||
|
np.mean: (
|
||
|
TypeError,
|
||
|
re.escape("agg function failed [how->mean,dtype->object]"),
|
||
|
),
|
||
|
}[groupby_func_np]
|
||
|
|
||
|
if groupby_series:
|
||
|
warn_msg = "using SeriesGroupBy.[sum|mean]"
|
||
|
else:
|
||
|
warn_msg = "using DataFrameGroupBy.[sum|mean]"
|
||
|
_call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
|
||
|
def test_groupby_raises_datetime(
|
||
|
how, by, groupby_series, groupby_func, df_with_datetime_col
|
||
|
):
|
||
|
df = df_with_datetime_col
|
||
|
args = get_groupby_method_args(groupby_func, df)
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
if groupby_func == "corrwith":
|
||
|
assert not hasattr(gb, "corrwith")
|
||
|
return
|
||
|
|
||
|
klass, msg = {
|
||
|
"all": (None, ""),
|
||
|
"any": (None, ""),
|
||
|
"bfill": (None, ""),
|
||
|
"corrwith": (TypeError, "cannot perform __mul__ with this index type"),
|
||
|
"count": (None, ""),
|
||
|
"cumcount": (None, ""),
|
||
|
"cummax": (None, ""),
|
||
|
"cummin": (None, ""),
|
||
|
"cumprod": (TypeError, "datetime64 type does not support cumprod operations"),
|
||
|
"cumsum": (TypeError, "datetime64 type does not support cumsum operations"),
|
||
|
"diff": (None, ""),
|
||
|
"ffill": (None, ""),
|
||
|
"fillna": (None, ""),
|
||
|
"first": (None, ""),
|
||
|
"idxmax": (None, ""),
|
||
|
"idxmin": (None, ""),
|
||
|
"last": (None, ""),
|
||
|
"max": (None, ""),
|
||
|
"mean": (None, ""),
|
||
|
"median": (None, ""),
|
||
|
"min": (None, ""),
|
||
|
"ngroup": (None, ""),
|
||
|
"nunique": (None, ""),
|
||
|
"pct_change": (TypeError, "cannot perform __truediv__ with this index type"),
|
||
|
"prod": (TypeError, "datetime64 type does not support prod"),
|
||
|
"quantile": (None, ""),
|
||
|
"rank": (None, ""),
|
||
|
"sem": (None, ""),
|
||
|
"shift": (None, ""),
|
||
|
"size": (None, ""),
|
||
|
"skew": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
r"dtype datetime64\[ns\] does not support reduction",
|
||
|
"datetime64 type does not support skew operations",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"std": (None, ""),
|
||
|
"sum": (TypeError, "datetime64 type does not support sum operations"),
|
||
|
"var": (TypeError, "datetime64 type does not support var operations"),
|
||
|
}[groupby_func]
|
||
|
|
||
|
if groupby_func in ["any", "all"]:
|
||
|
warn_msg = f"'{groupby_func}' with datetime64 dtypes is deprecated"
|
||
|
elif groupby_func == "fillna":
|
||
|
kind = "Series" if groupby_series else "DataFrame"
|
||
|
warn_msg = f"{kind}GroupBy.fillna is deprecated"
|
||
|
else:
|
||
|
warn_msg = ""
|
||
|
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=warn_msg)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["agg", "transform"])
|
||
|
def test_groupby_raises_datetime_udf(how, by, groupby_series, df_with_datetime_col):
|
||
|
df = df_with_datetime_col
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
def func(x):
|
||
|
raise TypeError("Test error message")
|
||
|
|
||
|
with pytest.raises(TypeError, match="Test error message"):
|
||
|
getattr(gb, how)(func)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["agg", "transform"])
|
||
|
@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
|
||
|
def test_groupby_raises_datetime_np(
|
||
|
how, by, groupby_series, groupby_func_np, df_with_datetime_col
|
||
|
):
|
||
|
# GH#50749
|
||
|
df = df_with_datetime_col
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
klass, msg = {
|
||
|
np.sum: (TypeError, "datetime64 type does not support sum operations"),
|
||
|
np.mean: (None, ""),
|
||
|
}[groupby_func_np]
|
||
|
|
||
|
if groupby_series:
|
||
|
warn_msg = "using SeriesGroupBy.[sum|mean]"
|
||
|
else:
|
||
|
warn_msg = "using DataFrameGroupBy.[sum|mean]"
|
||
|
_call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"])
|
||
|
def test_groupby_raises_timedelta(func, df_with_timedelta_col):
|
||
|
df = df_with_timedelta_col
|
||
|
gb = df.groupby(by="a")
|
||
|
|
||
|
_call_and_check(
|
||
|
TypeError,
|
||
|
"timedelta64 type does not support .* operations",
|
||
|
"method",
|
||
|
gb,
|
||
|
func,
|
||
|
[],
|
||
|
)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
|
||
|
def test_groupby_raises_category(
|
||
|
how, by, groupby_series, groupby_func, using_copy_on_write, df_with_cat_col
|
||
|
):
|
||
|
# GH#50749
|
||
|
df = df_with_cat_col
|
||
|
args = get_groupby_method_args(groupby_func, df)
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
if groupby_func == "corrwith":
|
||
|
assert not hasattr(gb, "corrwith")
|
||
|
return
|
||
|
|
||
|
klass, msg = {
|
||
|
"all": (None, ""),
|
||
|
"any": (None, ""),
|
||
|
"bfill": (None, ""),
|
||
|
"corrwith": (
|
||
|
TypeError,
|
||
|
r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
|
||
|
),
|
||
|
"count": (None, ""),
|
||
|
"cumcount": (None, ""),
|
||
|
"cummax": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(category type does not support cummax operations|"
|
||
|
"category dtype not supported|"
|
||
|
"cummax is not supported for category dtype)",
|
||
|
),
|
||
|
"cummin": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(category type does not support cummin operations|"
|
||
|
"category dtype not supported|"
|
||
|
"cummin is not supported for category dtype)",
|
||
|
),
|
||
|
"cumprod": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(category type does not support cumprod operations|"
|
||
|
"category dtype not supported|"
|
||
|
"cumprod is not supported for category dtype)",
|
||
|
),
|
||
|
"cumsum": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(category type does not support cumsum operations|"
|
||
|
"category dtype not supported|"
|
||
|
"cumsum is not supported for category dtype)",
|
||
|
),
|
||
|
"diff": (
|
||
|
TypeError,
|
||
|
r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'",
|
||
|
),
|
||
|
"ffill": (None, ""),
|
||
|
"fillna": (
|
||
|
TypeError,
|
||
|
r"Cannot setitem on a Categorical with a new category \(0\), "
|
||
|
"set the categories first",
|
||
|
)
|
||
|
if not using_copy_on_write
|
||
|
else (None, ""), # no-op with CoW
|
||
|
"first": (None, ""),
|
||
|
"idxmax": (None, ""),
|
||
|
"idxmin": (None, ""),
|
||
|
"last": (None, ""),
|
||
|
"max": (None, ""),
|
||
|
"mean": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'mean'",
|
||
|
"category dtype does not support aggregation 'mean'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"median": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'median'",
|
||
|
"category dtype does not support aggregation 'median'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"min": (None, ""),
|
||
|
"ngroup": (None, ""),
|
||
|
"nunique": (None, ""),
|
||
|
"pct_change": (
|
||
|
TypeError,
|
||
|
r"unsupported operand type\(s\) for /: 'Categorical' and 'Categorical'",
|
||
|
),
|
||
|
"prod": (TypeError, "category type does not support prod operations"),
|
||
|
"quantile": (TypeError, "No matching signature found"),
|
||
|
"rank": (None, ""),
|
||
|
"sem": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'sem'",
|
||
|
"category dtype does not support aggregation 'sem'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"shift": (None, ""),
|
||
|
"size": (None, ""),
|
||
|
"skew": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"dtype category does not support reduction 'skew'",
|
||
|
"category type does not support skew operations",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"std": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'std'",
|
||
|
"category dtype does not support aggregation 'std'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"sum": (TypeError, "category type does not support sum operations"),
|
||
|
"var": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'var'",
|
||
|
"category dtype does not support aggregation 'var'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
}[groupby_func]
|
||
|
|
||
|
if groupby_func == "fillna":
|
||
|
kind = "Series" if groupby_series else "DataFrame"
|
||
|
warn_msg = f"{kind}GroupBy.fillna is deprecated"
|
||
|
else:
|
||
|
warn_msg = ""
|
||
|
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["agg", "transform"])
|
||
|
def test_groupby_raises_category_udf(how, by, groupby_series, df_with_cat_col):
|
||
|
# GH#50749
|
||
|
df = df_with_cat_col
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
def func(x):
|
||
|
raise TypeError("Test error message")
|
||
|
|
||
|
with pytest.raises(TypeError, match="Test error message"):
|
||
|
getattr(gb, how)(func)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["agg", "transform"])
|
||
|
@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean])
|
||
|
def test_groupby_raises_category_np(
|
||
|
how, by, groupby_series, groupby_func_np, df_with_cat_col
|
||
|
):
|
||
|
# GH#50749
|
||
|
df = df_with_cat_col
|
||
|
gb = df.groupby(by=by)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
klass, msg = {
|
||
|
np.sum: (TypeError, "category type does not support sum operations"),
|
||
|
np.mean: (
|
||
|
TypeError,
|
||
|
"category dtype does not support aggregation 'mean'",
|
||
|
),
|
||
|
}[groupby_func_np]
|
||
|
|
||
|
if groupby_series:
|
||
|
warn_msg = "using SeriesGroupBy.[sum|mean]"
|
||
|
else:
|
||
|
warn_msg = "using DataFrameGroupBy.[sum|mean]"
|
||
|
_call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg)
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("how", ["method", "agg", "transform"])
|
||
|
def test_groupby_raises_category_on_category(
|
||
|
how,
|
||
|
by,
|
||
|
groupby_series,
|
||
|
groupby_func,
|
||
|
observed,
|
||
|
using_copy_on_write,
|
||
|
df_with_cat_col,
|
||
|
):
|
||
|
# GH#50749
|
||
|
df = df_with_cat_col
|
||
|
df["a"] = Categorical(
|
||
|
["a", "a", "a", "a", "b", "b", "b", "b", "c"],
|
||
|
categories=["a", "b", "c", "d"],
|
||
|
ordered=True,
|
||
|
)
|
||
|
args = get_groupby_method_args(groupby_func, df)
|
||
|
gb = df.groupby(by=by, observed=observed)
|
||
|
|
||
|
if groupby_series:
|
||
|
gb = gb["d"]
|
||
|
|
||
|
if groupby_func == "corrwith":
|
||
|
assert not hasattr(gb, "corrwith")
|
||
|
return
|
||
|
|
||
|
empty_groups = not observed and any(group.empty for group in gb.groups.values())
|
||
|
if (
|
||
|
not observed
|
||
|
and how != "transform"
|
||
|
and isinstance(by, list)
|
||
|
and isinstance(by[0], str)
|
||
|
and by == ["a", "b"]
|
||
|
):
|
||
|
assert not empty_groups
|
||
|
# TODO: empty_groups should be true due to unobserved categorical combinations
|
||
|
empty_groups = True
|
||
|
if how == "transform":
|
||
|
# empty groups will be ignored
|
||
|
empty_groups = False
|
||
|
|
||
|
klass, msg = {
|
||
|
"all": (None, ""),
|
||
|
"any": (None, ""),
|
||
|
"bfill": (None, ""),
|
||
|
"corrwith": (
|
||
|
TypeError,
|
||
|
r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
|
||
|
),
|
||
|
"count": (None, ""),
|
||
|
"cumcount": (None, ""),
|
||
|
"cummax": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(cummax is not supported for category dtype|"
|
||
|
"category dtype not supported|"
|
||
|
"category type does not support cummax operations)",
|
||
|
),
|
||
|
"cummin": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(cummin is not supported for category dtype|"
|
||
|
"category dtype not supported|"
|
||
|
"category type does not support cummin operations)",
|
||
|
),
|
||
|
"cumprod": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(cumprod is not supported for category dtype|"
|
||
|
"category dtype not supported|"
|
||
|
"category type does not support cumprod operations)",
|
||
|
),
|
||
|
"cumsum": (
|
||
|
(NotImplementedError, TypeError),
|
||
|
"(cumsum is not supported for category dtype|"
|
||
|
"category dtype not supported|"
|
||
|
"category type does not support cumsum operations)",
|
||
|
),
|
||
|
"diff": (TypeError, "unsupported operand type"),
|
||
|
"ffill": (None, ""),
|
||
|
"fillna": (
|
||
|
TypeError,
|
||
|
r"Cannot setitem on a Categorical with a new category \(0\), "
|
||
|
"set the categories first",
|
||
|
)
|
||
|
if not using_copy_on_write
|
||
|
else (None, ""), # no-op with CoW
|
||
|
"first": (None, ""),
|
||
|
"idxmax": (ValueError, "empty group due to unobserved categories")
|
||
|
if empty_groups
|
||
|
else (None, ""),
|
||
|
"idxmin": (ValueError, "empty group due to unobserved categories")
|
||
|
if empty_groups
|
||
|
else (None, ""),
|
||
|
"last": (None, ""),
|
||
|
"max": (None, ""),
|
||
|
"mean": (TypeError, "category dtype does not support aggregation 'mean'"),
|
||
|
"median": (TypeError, "category dtype does not support aggregation 'median'"),
|
||
|
"min": (None, ""),
|
||
|
"ngroup": (None, ""),
|
||
|
"nunique": (None, ""),
|
||
|
"pct_change": (TypeError, "unsupported operand type"),
|
||
|
"prod": (TypeError, "category type does not support prod operations"),
|
||
|
"quantile": (TypeError, ""),
|
||
|
"rank": (None, ""),
|
||
|
"sem": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'sem'",
|
||
|
"category dtype does not support aggregation 'sem'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"shift": (None, ""),
|
||
|
"size": (None, ""),
|
||
|
"skew": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"category type does not support skew operations",
|
||
|
"dtype category does not support reduction 'skew'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"std": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'std'",
|
||
|
"category dtype does not support aggregation 'std'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
"sum": (TypeError, "category type does not support sum operations"),
|
||
|
"var": (
|
||
|
TypeError,
|
||
|
"|".join(
|
||
|
[
|
||
|
"'Categorical' .* does not support reduction 'var'",
|
||
|
"category dtype does not support aggregation 'var'",
|
||
|
]
|
||
|
),
|
||
|
),
|
||
|
}[groupby_func]
|
||
|
|
||
|
if groupby_func == "fillna":
|
||
|
kind = "Series" if groupby_series else "DataFrame"
|
||
|
warn_msg = f"{kind}GroupBy.fillna is deprecated"
|
||
|
else:
|
||
|
warn_msg = ""
|
||
|
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
|
||
|
|
||
|
|
||
|
def test_subsetting_columns_axis_1_raises():
|
||
|
# GH 35443
|
||
|
df = DataFrame({"a": [1], "b": [2], "c": [3]})
|
||
|
msg = "DataFrame.groupby with axis=1 is deprecated"
|
||
|
with tm.assert_produces_warning(FutureWarning, match=msg):
|
||
|
gb = df.groupby("a", axis=1)
|
||
|
with pytest.raises(ValueError, match="Cannot subset columns when using axis=1"):
|
||
|
gb["b"]
|