90 lines
2.9 KiB
Python
90 lines
2.9 KiB
Python
import numpy as np
|
|
import pytest
|
|
|
|
import pandas as pd
|
|
from pandas.core.interchange.utils import dtype_to_arrow_c_fmt
|
|
|
|
# TODO: use ArrowSchema to get reference C-string.
|
|
# At the time, there is no way to access ArrowSchema holding a type format string
|
|
# from python. The only way to access it is to export the structure to a C-pointer,
|
|
# see DataType._export_to_c() method defined in
|
|
# https://github.com/apache/arrow/blob/master/python/pyarrow/types.pxi
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"pandas_dtype, c_string",
|
|
[
|
|
(np.dtype("bool"), "b"),
|
|
(np.dtype("int8"), "c"),
|
|
(np.dtype("uint8"), "C"),
|
|
(np.dtype("int16"), "s"),
|
|
(np.dtype("uint16"), "S"),
|
|
(np.dtype("int32"), "i"),
|
|
(np.dtype("uint32"), "I"),
|
|
(np.dtype("int64"), "l"),
|
|
(np.dtype("uint64"), "L"),
|
|
(np.dtype("float16"), "e"),
|
|
(np.dtype("float32"), "f"),
|
|
(np.dtype("float64"), "g"),
|
|
(pd.Series(["a"]).dtype, "u"),
|
|
(
|
|
pd.Series([0]).astype("datetime64[ns]").dtype,
|
|
"tsn:",
|
|
),
|
|
(pd.CategoricalDtype(["a"]), "l"),
|
|
(np.dtype("O"), "u"),
|
|
],
|
|
)
|
|
def test_dtype_to_arrow_c_fmt(pandas_dtype, c_string): # PR01
|
|
"""Test ``dtype_to_arrow_c_fmt`` utility function."""
|
|
assert dtype_to_arrow_c_fmt(pandas_dtype) == c_string
|
|
|
|
|
|
@pytest.mark.parametrize(
|
|
"pa_dtype, args_kwargs, c_string",
|
|
[
|
|
["null", {}, "n"],
|
|
["bool_", {}, "b"],
|
|
["uint8", {}, "C"],
|
|
["uint16", {}, "S"],
|
|
["uint32", {}, "I"],
|
|
["uint64", {}, "L"],
|
|
["int8", {}, "c"],
|
|
["int16", {}, "S"],
|
|
["int32", {}, "i"],
|
|
["int64", {}, "l"],
|
|
["float16", {}, "e"],
|
|
["float32", {}, "f"],
|
|
["float64", {}, "g"],
|
|
["string", {}, "u"],
|
|
["binary", {}, "z"],
|
|
["time32", ("s",), "tts"],
|
|
["time32", ("ms",), "ttm"],
|
|
["time64", ("us",), "ttu"],
|
|
["time64", ("ns",), "ttn"],
|
|
["date32", {}, "tdD"],
|
|
["date64", {}, "tdm"],
|
|
["timestamp", {"unit": "s"}, "tss:"],
|
|
["timestamp", {"unit": "ms"}, "tsm:"],
|
|
["timestamp", {"unit": "us"}, "tsu:"],
|
|
["timestamp", {"unit": "ns"}, "tsn:"],
|
|
["timestamp", {"unit": "ns", "tz": "UTC"}, "tsn:UTC"],
|
|
["duration", ("s",), "tDs"],
|
|
["duration", ("ms",), "tDm"],
|
|
["duration", ("us",), "tDu"],
|
|
["duration", ("ns",), "tDn"],
|
|
["decimal128", {"precision": 4, "scale": 2}, "d:4,2"],
|
|
],
|
|
)
|
|
def test_dtype_to_arrow_c_fmt_arrowdtype(pa_dtype, args_kwargs, c_string):
|
|
# GH 52323
|
|
pa = pytest.importorskip("pyarrow")
|
|
if not args_kwargs:
|
|
pa_type = getattr(pa, pa_dtype)()
|
|
elif isinstance(args_kwargs, tuple):
|
|
pa_type = getattr(pa, pa_dtype)(*args_kwargs)
|
|
else:
|
|
pa_type = getattr(pa, pa_dtype)(**args_kwargs)
|
|
arrow_type = pd.ArrowDtype(pa_type)
|
|
assert dtype_to_arrow_c_fmt(arrow_type) == c_string
|