301 lines
10 KiB
Python
301 lines
10 KiB
Python
|
import inspect
|
||
|
import pydoc
|
||
|
|
||
|
import numpy as np
|
||
|
import pytest
|
||
|
|
||
|
import pandas as pd
|
||
|
from pandas import (
|
||
|
DataFrame,
|
||
|
Index,
|
||
|
Series,
|
||
|
date_range,
|
||
|
period_range,
|
||
|
timedelta_range,
|
||
|
)
|
||
|
import pandas._testing as tm
|
||
|
|
||
|
|
||
|
class TestSeriesMisc:
|
||
|
def test_tab_completion(self):
|
||
|
# GH 9910
|
||
|
s = Series(list("abcd"))
|
||
|
# Series of str values should have .str but not .dt/.cat in __dir__
|
||
|
assert "str" in dir(s)
|
||
|
assert "dt" not in dir(s)
|
||
|
assert "cat" not in dir(s)
|
||
|
|
||
|
def test_tab_completion_dt(self):
|
||
|
# similarly for .dt
|
||
|
s = Series(date_range("1/1/2015", periods=5))
|
||
|
assert "dt" in dir(s)
|
||
|
assert "str" not in dir(s)
|
||
|
assert "cat" not in dir(s)
|
||
|
|
||
|
def test_tab_completion_cat(self):
|
||
|
# Similarly for .cat, but with the twist that str and dt should be
|
||
|
# there if the categories are of that type first cat and str.
|
||
|
s = Series(list("abbcd"), dtype="category")
|
||
|
assert "cat" in dir(s)
|
||
|
assert "str" in dir(s) # as it is a string categorical
|
||
|
assert "dt" not in dir(s)
|
||
|
|
||
|
def test_tab_completion_cat_str(self):
|
||
|
# similar to cat and str
|
||
|
s = Series(date_range("1/1/2015", periods=5)).astype("category")
|
||
|
assert "cat" in dir(s)
|
||
|
assert "str" not in dir(s)
|
||
|
assert "dt" in dir(s) # as it is a datetime categorical
|
||
|
|
||
|
def test_tab_completion_with_categorical(self):
|
||
|
# test the tab completion display
|
||
|
ok_for_cat = [
|
||
|
"categories",
|
||
|
"codes",
|
||
|
"ordered",
|
||
|
"set_categories",
|
||
|
"add_categories",
|
||
|
"remove_categories",
|
||
|
"rename_categories",
|
||
|
"reorder_categories",
|
||
|
"remove_unused_categories",
|
||
|
"as_ordered",
|
||
|
"as_unordered",
|
||
|
]
|
||
|
|
||
|
s = Series(list("aabbcde")).astype("category")
|
||
|
results = sorted({r for r in s.cat.__dir__() if not r.startswith("_")})
|
||
|
tm.assert_almost_equal(results, sorted(set(ok_for_cat)))
|
||
|
|
||
|
@pytest.mark.parametrize(
|
||
|
"index",
|
||
|
[
|
||
|
Index(list("ab") * 5, dtype="category"),
|
||
|
Index([str(i) for i in range(10)]),
|
||
|
Index(["foo", "bar", "baz"] * 2),
|
||
|
date_range("2020-01-01", periods=10),
|
||
|
period_range("2020-01-01", periods=10, freq="D"),
|
||
|
timedelta_range("1 day", periods=10),
|
||
|
Index(np.arange(10), dtype=np.uint64),
|
||
|
Index(np.arange(10), dtype=np.int64),
|
||
|
Index(np.arange(10), dtype=np.float64),
|
||
|
Index([True, False]),
|
||
|
Index([f"a{i}" for i in range(101)]),
|
||
|
pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
|
||
|
pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
|
||
|
],
|
||
|
)
|
||
|
def test_index_tab_completion(self, index):
|
||
|
# dir contains string-like values of the Index.
|
||
|
s = Series(index=index, dtype=object)
|
||
|
dir_s = dir(s)
|
||
|
for i, x in enumerate(s.index.unique(level=0)):
|
||
|
if i < 100:
|
||
|
assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
|
||
|
else:
|
||
|
assert x not in dir_s
|
||
|
|
||
|
@pytest.mark.parametrize("ser", [Series(dtype=object), Series([1])])
|
||
|
def test_not_hashable(self, ser):
|
||
|
msg = "unhashable type: 'Series'"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
hash(ser)
|
||
|
|
||
|
def test_contains(self, datetime_series):
|
||
|
tm.assert_contains_all(datetime_series.index, datetime_series)
|
||
|
|
||
|
def test_axis_alias(self):
|
||
|
s = Series([1, 2, np.nan])
|
||
|
tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
|
||
|
assert s.dropna().sum("rows") == 3
|
||
|
assert s._get_axis_number("rows") == 0
|
||
|
assert s._get_axis_name("rows") == "index"
|
||
|
|
||
|
def test_class_axis(self):
|
||
|
# https://github.com/pandas-dev/pandas/issues/18147
|
||
|
# no exception and no empty docstring
|
||
|
assert pydoc.getdoc(Series.index)
|
||
|
|
||
|
def test_ndarray_compat(self):
|
||
|
# test numpy compat with Series as sub-class of NDFrame
|
||
|
tsdf = DataFrame(
|
||
|
np.random.default_rng(2).standard_normal((1000, 3)),
|
||
|
columns=["A", "B", "C"],
|
||
|
index=date_range("1/1/2000", periods=1000),
|
||
|
)
|
||
|
|
||
|
def f(x):
|
||
|
return x[x.idxmax()]
|
||
|
|
||
|
result = tsdf.apply(f)
|
||
|
expected = tsdf.max()
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_ndarray_compat_like_func(self):
|
||
|
# using an ndarray like function
|
||
|
s = Series(np.random.default_rng(2).standard_normal(10))
|
||
|
result = Series(np.ones_like(s))
|
||
|
expected = Series(1, index=range(10), dtype="float64")
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
|
||
|
def test_ndarray_compat_ravel(self):
|
||
|
# ravel
|
||
|
s = Series(np.random.default_rng(2).standard_normal(10))
|
||
|
with tm.assert_produces_warning(FutureWarning, match="ravel is deprecated"):
|
||
|
result = s.ravel(order="F")
|
||
|
tm.assert_almost_equal(result, s.values.ravel(order="F"))
|
||
|
|
||
|
def test_empty_method(self):
|
||
|
s_empty = Series(dtype=object)
|
||
|
assert s_empty.empty
|
||
|
|
||
|
@pytest.mark.parametrize("dtype", ["int64", object])
|
||
|
def test_empty_method_full_series(self, dtype):
|
||
|
full_series = Series(index=[1], dtype=dtype)
|
||
|
assert not full_series.empty
|
||
|
|
||
|
@pytest.mark.parametrize("dtype", [None, "Int64"])
|
||
|
def test_integer_series_size(self, dtype):
|
||
|
# GH 25580
|
||
|
s = Series(range(9), dtype=dtype)
|
||
|
assert s.size == 9
|
||
|
|
||
|
def test_attrs(self):
|
||
|
s = Series([0, 1], name="abc")
|
||
|
assert s.attrs == {}
|
||
|
s.attrs["version"] = 1
|
||
|
result = s + 1
|
||
|
assert result.attrs == {"version": 1}
|
||
|
|
||
|
def test_inspect_getmembers(self):
|
||
|
# GH38782
|
||
|
pytest.importorskip("jinja2")
|
||
|
ser = Series(dtype=object)
|
||
|
msg = "Series._data is deprecated"
|
||
|
with tm.assert_produces_warning(
|
||
|
DeprecationWarning, match=msg, check_stacklevel=False
|
||
|
):
|
||
|
inspect.getmembers(ser)
|
||
|
|
||
|
def test_unknown_attribute(self):
|
||
|
# GH#9680
|
||
|
tdi = timedelta_range(start=0, periods=10, freq="1s")
|
||
|
ser = Series(np.random.default_rng(2).normal(size=10), index=tdi)
|
||
|
assert "foo" not in ser.__dict__
|
||
|
msg = "'Series' object has no attribute 'foo'"
|
||
|
with pytest.raises(AttributeError, match=msg):
|
||
|
ser.foo
|
||
|
|
||
|
@pytest.mark.parametrize("op", ["year", "day", "second", "weekday"])
|
||
|
def test_datetime_series_no_datelike_attrs(self, op, datetime_series):
|
||
|
# GH#7206
|
||
|
msg = f"'Series' object has no attribute '{op}'"
|
||
|
with pytest.raises(AttributeError, match=msg):
|
||
|
getattr(datetime_series, op)
|
||
|
|
||
|
def test_series_datetimelike_attribute_access(self):
|
||
|
# attribute access should still work!
|
||
|
ser = Series({"year": 2000, "month": 1, "day": 10})
|
||
|
assert ser.year == 2000
|
||
|
assert ser.month == 1
|
||
|
assert ser.day == 10
|
||
|
|
||
|
def test_series_datetimelike_attribute_access_invalid(self):
|
||
|
ser = Series({"year": 2000, "month": 1, "day": 10})
|
||
|
msg = "'Series' object has no attribute 'weekday'"
|
||
|
with pytest.raises(AttributeError, match=msg):
|
||
|
ser.weekday
|
||
|
|
||
|
@pytest.mark.filterwarnings("ignore:Downcasting object dtype arrays:FutureWarning")
|
||
|
@pytest.mark.parametrize(
|
||
|
"kernel, has_numeric_only",
|
||
|
[
|
||
|
("skew", True),
|
||
|
("var", True),
|
||
|
("all", False),
|
||
|
("prod", True),
|
||
|
("any", False),
|
||
|
("idxmin", False),
|
||
|
("quantile", False),
|
||
|
("idxmax", False),
|
||
|
("min", True),
|
||
|
("sem", True),
|
||
|
("mean", True),
|
||
|
("nunique", False),
|
||
|
("max", True),
|
||
|
("sum", True),
|
||
|
("count", False),
|
||
|
("median", True),
|
||
|
("std", True),
|
||
|
("backfill", False),
|
||
|
("rank", True),
|
||
|
("pct_change", False),
|
||
|
("cummax", False),
|
||
|
("shift", False),
|
||
|
("diff", False),
|
||
|
("cumsum", False),
|
||
|
("cummin", False),
|
||
|
("cumprod", False),
|
||
|
("fillna", False),
|
||
|
("ffill", False),
|
||
|
("pad", False),
|
||
|
("bfill", False),
|
||
|
("sample", False),
|
||
|
("tail", False),
|
||
|
("take", False),
|
||
|
("head", False),
|
||
|
("cov", False),
|
||
|
("corr", False),
|
||
|
],
|
||
|
)
|
||
|
@pytest.mark.parametrize("dtype", [bool, int, float, object])
|
||
|
def test_numeric_only(self, kernel, has_numeric_only, dtype):
|
||
|
# GH#47500
|
||
|
ser = Series([0, 1, 1], dtype=dtype)
|
||
|
if kernel == "corrwith":
|
||
|
args = (ser,)
|
||
|
elif kernel == "corr":
|
||
|
args = (ser,)
|
||
|
elif kernel == "cov":
|
||
|
args = (ser,)
|
||
|
elif kernel == "nth":
|
||
|
args = (0,)
|
||
|
elif kernel == "fillna":
|
||
|
args = (True,)
|
||
|
elif kernel == "fillna":
|
||
|
args = ("ffill",)
|
||
|
elif kernel == "take":
|
||
|
args = ([0],)
|
||
|
elif kernel == "quantile":
|
||
|
args = (0.5,)
|
||
|
else:
|
||
|
args = ()
|
||
|
method = getattr(ser, kernel)
|
||
|
if not has_numeric_only:
|
||
|
msg = (
|
||
|
"(got an unexpected keyword argument 'numeric_only'"
|
||
|
"|too many arguments passed in)"
|
||
|
)
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
method(*args, numeric_only=True)
|
||
|
elif dtype is object:
|
||
|
msg = f"Series.{kernel} does not allow numeric_only=True with non-numeric"
|
||
|
with pytest.raises(TypeError, match=msg):
|
||
|
method(*args, numeric_only=True)
|
||
|
else:
|
||
|
result = method(*args, numeric_only=True)
|
||
|
expected = method(*args, numeric_only=False)
|
||
|
if isinstance(expected, Series):
|
||
|
# transformer
|
||
|
tm.assert_series_equal(result, expected)
|
||
|
else:
|
||
|
# reducer
|
||
|
assert result == expected
|
||
|
|
||
|
|
||
|
@pytest.mark.parametrize("converter", [int, float, complex])
|
||
|
def test_float_int_deprecated(converter):
|
||
|
# GH 51101
|
||
|
with tm.assert_produces_warning(FutureWarning):
|
||
|
assert converter(Series([1])) == converter(1)
|