AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/pandas/tests/strings/test_api.py

import numpy as np
import pytest

from pandas import (
    CategoricalDtype,
    DataFrame,
    Index,
    MultiIndex,
    Series,
    _testing as tm,
    option_context,
)
from pandas.core.strings.accessor import StringMethods

# subset of the full set from pandas/conftest.py
_any_allowed_skipna_inferred_dtype = [
    ("string", ["a", np.nan, "c"]),
    ("bytes", [b"a", np.nan, b"c"]),
    ("empty", [np.nan, np.nan, np.nan]),
    ("empty", []),
    ("mixed-integer", ["a", np.nan, 2]),
]
ids, _ = zip(*_any_allowed_skipna_inferred_dtype)  # use inferred type as id


@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids)
def any_allowed_skipna_inferred_dtype(request):
    """
    Fixture for all (inferred) dtypes allowed in StringMethods.__init__

    The covered (inferred) types are:
    * 'string'
    * 'empty'
    * 'bytes'
    * 'mixed'
    * 'mixed-integer'

    Returns
    -------
    inferred_dtype : str
        The string for the inferred dtype from _libs.lib.infer_dtype
    values : np.ndarray
        An array of object dtype that will be inferred to have
        `inferred_dtype`

    Examples
    --------
    >>> from pandas._libs import lib
    >>>
    >>> def test_something(any_allowed_skipna_inferred_dtype):
    ...     inferred_dtype, values = any_allowed_skipna_inferred_dtype
    ...     # will pass
    ...     assert lib.infer_dtype(values, skipna=True) == inferred_dtype
    ...
    ...     # constructor for .str-accessor will also pass
    ...     Series(values).str
    """
    inferred_dtype, values = request.param
    values = np.array(values, dtype=object)  # object dtype to avoid casting

    # correctness of inference tested in tests/dtypes/test_inference.py
    return inferred_dtype, values


def test_api(any_string_dtype):
    # GH 6106, GH 9322
    assert Series.str is StringMethods
    assert isinstance(Series([""], dtype=any_string_dtype).str, StringMethods)


def test_api_mi_raises():
    # GH 23679
    mi = MultiIndex.from_arrays([["a", "b", "c"]])
    msg = "Can only use .str accessor with Index, not MultiIndex"
    with pytest.raises(AttributeError, match=msg):
        mi.str
    assert not hasattr(mi, "str")


@pytest.mark.parametrize("dtype", [object, "category"])
def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype):
    # one instance of parametrized fixture
    box = index_or_series
    inferred_dtype, values = any_skipna_inferred_dtype

    t = box(values, dtype=dtype)  # explicit dtype to avoid casting

    types_passing_constructor = [
        "string",
        "unicode",
        "empty",
        "bytes",
        "mixed",
        "mixed-integer",
    ]
    if inferred_dtype in types_passing_constructor:
        # GH 6106
        assert isinstance(t.str, StringMethods)
    else:
        # GH 9184, GH 23011, GH 23163
        msg = "Can only use .str accessor with string values.*"
        with pytest.raises(AttributeError, match=msg):
            t.str
        assert not hasattr(t, "str")


@pytest.mark.parametrize("dtype", [object, "category"])
def test_api_per_method(
    index_or_series,
    dtype,
    any_allowed_skipna_inferred_dtype,
    any_string_method,
    request,
):
    # this test does not check correctness of the different methods,
    # just that the methods work on the specified (inferred) dtypes,
    # and raise on all others
    box = index_or_series

    # one instance of each parametrized fixture
    inferred_dtype, values = any_allowed_skipna_inferred_dtype
    method_name, args, kwargs = any_string_method

    reason = None
    if box is Index and values.size == 0:
        if method_name in ["partition", "rpartition"] and kwargs.get("expand", True):
            raises = TypeError
            reason = "Method cannot deal with empty Index"
        elif method_name == "split" and kwargs.get("expand", None):
            raises = TypeError
            reason = "Split fails on empty Series when expand=True"
        elif method_name == "get_dummies":
            raises = ValueError
            reason = "Need to fortify get_dummies corner cases"

    elif (
        box is Index
        and inferred_dtype == "empty"
        and dtype == object
        and method_name == "get_dummies"
    ):
        raises = ValueError
        reason = "Need to fortify get_dummies corner cases"

    if reason is not None:
        mark = pytest.mark.xfail(raises=raises, reason=reason)
        request.applymarker(mark)

    t = box(values, dtype=dtype)  # explicit dtype to avoid casting
    method = getattr(t.str, method_name)

    bytes_allowed = method_name in ["decode", "get", "len", "slice"]
    # as of v0.23.4, all methods except 'cat' are very lenient with the
    # allowed data types, just returning NaN for entries that error.
    # This could be changed with an 'errors'-kwarg to the `str`-accessor,
    # see discussion in GH 13877
    mixed_allowed = method_name not in ["cat"]

    allowed_types = (
        ["string", "unicode", "empty"]
        + ["bytes"] * bytes_allowed
        + ["mixed", "mixed-integer"] * mixed_allowed
    )

    if inferred_dtype in allowed_types:
        # xref GH 23555, GH 23556
        with option_context("future.no_silent_downcasting", True):
            method(*args, **kwargs)  # works!
    else:
        # GH 23011, GH 23163
        msg = (
            f"Cannot use .str.{method_name} with values of "
            f"inferred dtype {repr(inferred_dtype)}."
        )
        with pytest.raises(TypeError, match=msg):
            method(*args, **kwargs)


def test_api_for_categorical(any_string_method, any_string_dtype):
    # https://github.com/pandas-dev/pandas/issues/10661
    s = Series(list("aabb"), dtype=any_string_dtype)
    s = s + " " + s
    c = s.astype("category")
    c = c.astype(CategoricalDtype(c.dtype.categories.astype("object")))
    assert isinstance(c.str, StringMethods)

    method_name, args, kwargs = any_string_method

    result = getattr(c.str, method_name)(*args, **kwargs)
    expected = getattr(s.astype("object").str, method_name)(*args, **kwargs)

    if isinstance(result, DataFrame):
        tm.assert_frame_equal(result, expected)
    elif isinstance(result, Series):
        tm.assert_series_equal(result, expected)
    else:
        # str.cat(others=None) returns string, for example
        assert result == expected
lab 1 is done 2024-10-02 22:15:59 +04:00			`import numpy as np`
			`import pytest`

			`from pandas import (`
			`CategoricalDtype,`
			`DataFrame,`
			`Index,`
			`MultiIndex,`
			`Series,`
			`_testing as tm,`
			`option_context,`
			`)`
			`from pandas.core.strings.accessor import StringMethods`

			`# subset of the full set from pandas/conftest.py`
			`_any_allowed_skipna_inferred_dtype = [`
			`("string", ["a", np.nan, "c"]),`
			`("bytes", [b"a", np.nan, b"c"]),`
			`("empty", [np.nan, np.nan, np.nan]),`
			`("empty", []),`
			`("mixed-integer", ["a", np.nan, 2]),`
			`]`
			`ids, _ = zip(*_any_allowed_skipna_inferred_dtype) # use inferred type as id`


			`@pytest.fixture(params=_any_allowed_skipna_inferred_dtype, ids=ids)`
			`def any_allowed_skipna_inferred_dtype(request):`
			`"""`
			`Fixture for all (inferred) dtypes allowed in StringMethods.__init__`

			`The covered (inferred) types are:`
			`* 'string'`
			`* 'empty'`
			`* 'bytes'`
			`* 'mixed'`
			`* 'mixed-integer'`

			`Returns`
			`-------`
			`inferred_dtype : str`
			`The string for the inferred dtype from _libs.lib.infer_dtype`
			`values : np.ndarray`
			`An array of object dtype that will be inferred to have`
			`inferred_dtype`

			`Examples`
			`--------`
			`>>> from pandas._libs import lib`
			`>>>`
			`>>> def test_something(any_allowed_skipna_inferred_dtype):`
			`... inferred_dtype, values = any_allowed_skipna_inferred_dtype`
			`... # will pass`
			`... assert lib.infer_dtype(values, skipna=True) == inferred_dtype`
			`...`
			`... # constructor for .str-accessor will also pass`
			`... Series(values).str`
			`"""`
			`inferred_dtype, values = request.param`
			`values = np.array(values, dtype=object) # object dtype to avoid casting`

			`# correctness of inference tested in tests/dtypes/test_inference.py`
			`return inferred_dtype, values`


			`def test_api(any_string_dtype):`
			`# GH 6106, GH 9322`
			`assert Series.str is StringMethods`
			`assert isinstance(Series([""], dtype=any_string_dtype).str, StringMethods)`


			`def test_api_mi_raises():`
			`# GH 23679`
			`mi = MultiIndex.from_arrays([["a", "b", "c"]])`
			`msg = "Can only use .str accessor with Index, not MultiIndex"`
			`with pytest.raises(AttributeError, match=msg):`
			`mi.str`
			`assert not hasattr(mi, "str")`


			`@pytest.mark.parametrize("dtype", [object, "category"])`
			`def test_api_per_dtype(index_or_series, dtype, any_skipna_inferred_dtype):`
			`# one instance of parametrized fixture`
			`box = index_or_series`
			`inferred_dtype, values = any_skipna_inferred_dtype`

			`t = box(values, dtype=dtype) # explicit dtype to avoid casting`

			`types_passing_constructor = [`
			`"string",`
			`"unicode",`
			`"empty",`
			`"bytes",`
			`"mixed",`
			`"mixed-integer",`
			`]`
			`if inferred_dtype in types_passing_constructor:`
			`# GH 6106`
			`assert isinstance(t.str, StringMethods)`
			`else:`
			`# GH 9184, GH 23011, GH 23163`
			`msg = "Can only use .str accessor with string values.*"`
			`with pytest.raises(AttributeError, match=msg):`
			`t.str`
			`assert not hasattr(t, "str")`


			`@pytest.mark.parametrize("dtype", [object, "category"])`
			`def test_api_per_method(`
			`index_or_series,`
			`dtype,`
			`any_allowed_skipna_inferred_dtype,`
			`any_string_method,`
			`request,`
			`):`
			`# this test does not check correctness of the different methods,`
			`# just that the methods work on the specified (inferred) dtypes,`
			`# and raise on all others`
			`box = index_or_series`

			`# one instance of each parametrized fixture`
			`inferred_dtype, values = any_allowed_skipna_inferred_dtype`
			`method_name, args, kwargs = any_string_method`

			`reason = None`
			`if box is Index and values.size == 0:`
			`if method_name in ["partition", "rpartition"] and kwargs.get("expand", True):`
			`raises = TypeError`
			`reason = "Method cannot deal with empty Index"`
			`elif method_name == "split" and kwargs.get("expand", None):`
			`raises = TypeError`
			`reason = "Split fails on empty Series when expand=True"`
			`elif method_name == "get_dummies":`
			`raises = ValueError`
			`reason = "Need to fortify get_dummies corner cases"`

			`elif (`
			`box is Index`
			`and inferred_dtype == "empty"`
			`and dtype == object`
			`and method_name == "get_dummies"`
			`):`
			`raises = ValueError`
			`reason = "Need to fortify get_dummies corner cases"`

			`if reason is not None:`
			`mark = pytest.mark.xfail(raises=raises, reason=reason)`
			`request.applymarker(mark)`

			`t = box(values, dtype=dtype) # explicit dtype to avoid casting`
			`method = getattr(t.str, method_name)`

			`bytes_allowed = method_name in ["decode", "get", "len", "slice"]`
			`# as of v0.23.4, all methods except 'cat' are very lenient with the`
			`# allowed data types, just returning NaN for entries that error.`
			# This could be changed with an 'errors'-kwarg to the `str`-accessor,
			`# see discussion in GH 13877`
			`mixed_allowed = method_name not in ["cat"]`

			`allowed_types = (`
			`["string", "unicode", "empty"]`
			`+ ["bytes"] * bytes_allowed`
			`+ ["mixed", "mixed-integer"] * mixed_allowed`
			`)`

			`if inferred_dtype in allowed_types:`
			`# xref GH 23555, GH 23556`
			`with option_context("future.no_silent_downcasting", True):`
			`method(args, *kwargs) # works!`
			`else:`
			`# GH 23011, GH 23163`
			`msg = (`
			`f"Cannot use .str.{method_name} with values of "`
			`f"inferred dtype {repr(inferred_dtype)}."`
			`)`
			`with pytest.raises(TypeError, match=msg):`
			`method(args, *kwargs)`


			`def test_api_for_categorical(any_string_method, any_string_dtype):`
			`# https://github.com/pandas-dev/pandas/issues/10661`
			`s = Series(list("aabb"), dtype=any_string_dtype)`
			`s = s + " " + s`
			`c = s.astype("category")`
			`c = c.astype(CategoricalDtype(c.dtype.categories.astype("object")))`
			`assert isinstance(c.str, StringMethods)`

			`method_name, args, kwargs = any_string_method`

			`result = getattr(c.str, method_name)(args, *kwargs)`
			`expected = getattr(s.astype("object").str, method_name)(args, *kwargs)`

			`if isinstance(result, DataFrame):`
			`tm.assert_frame_equal(result, expected)`
			`elif isinstance(result, Series):`
			`tm.assert_series_equal(result, expected)`
			`else:`
			`# str.cat(others=None) returns string, for example`
			`assert result == expected`