diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py index 0ff8209dcd4..1828c5ce97b 100644 --- a/python/cudf/cudf/core/column/categorical.py +++ b/python/cudf/cudf/core/column/categorical.py @@ -789,12 +789,11 @@ def __cuda_array_interface__(self) -> Mapping[str, Any]: def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: + ) -> pd.Index: if nullable: - raise NotImplementedError(f"{nullable=} is not implemented.") + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) elif arrow_type: raise NotImplementedError(f"{arrow_type=} is not implemented.") @@ -828,7 +827,7 @@ def to_pandas( data = pd.Categorical.from_codes( codes, categories=cats.to_pandas(), ordered=col.ordered ) - return pd.Series(data, index=index) + return pd.Index(data) def to_arrow(self) -> pa.Array: """Convert to PyArrow Array.""" diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index 59bae179497..68079371b85 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -190,10 +190,9 @@ def __repr__(self): def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: + ) -> pd.Index: """Convert object to pandas type. The default implementation falls back to PyArrow for the conversion. @@ -208,15 +207,9 @@ def to_pandas( raise NotImplementedError(f"{nullable=} is not implemented.") pa_array = self.to_arrow() if arrow_type: - return pd.Series( - pd.arrays.ArrowExtensionArray(pa_array), index=index - ) + return pd.Index(pd.arrays.ArrowExtensionArray(pa_array)) else: - pd_series = pa_array.to_pandas() - - if index is not None: - pd_series.index = index - return pd_series + return pd.Index(pa_array.to_pandas()) @property def values_host(self) -> "np.ndarray": diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py index 27f31c8f500..057169aa7e1 100644 --- a/python/cudf/cudf/core/column/datetime.py +++ b/python/cudf/cudf/core/column/datetime.py @@ -840,27 +840,15 @@ def __init__( def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: - if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) - elif nullable: - raise NotImplementedError(f"{nullable=} is not implemented.") - elif arrow_type: - return pd.Series( - pd.arrays.ArrowExtensionArray(self.to_arrow()), index=index - ) + ) -> pd.Index: + if arrow_type or nullable: + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) else: - series = self._local_time.to_pandas().dt.tz_localize( + return self._local_time.to_pandas().tz_localize( self.dtype.tz, ambiguous="NaT", nonexistent="NaT" ) - if index is not None: - series.index = index - return series def to_arrow(self): return pa.compute.assume_timezone( diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py index 7bd693966dc..f24ca3fdad1 100644 --- a/python/cudf/cudf/core/column/interval.py +++ b/python/cudf/cudf/core/column/interval.py @@ -1,6 +1,4 @@ # Copyright (c) 2018-2024, NVIDIA CORPORATION. -from typing import Optional - import pandas as pd import pyarrow as pa @@ -109,28 +107,21 @@ def as_interval_column(self, dtype): def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: + ) -> pd.Index: # Note: This does not handle null values in the interval column. # However, this exact sequence (calling __from_arrow__ on the output of # self.to_arrow) is currently the best known way to convert interval # types into pandas (trying to convert the underlying numerical columns # directly is problematic), so we're stuck with this for now. - if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) if nullable: - raise NotImplementedError(f"{nullable=} is not implemented.") + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) elif arrow_type: raise NotImplementedError(f"{arrow_type=} is not implemented.") pd_type = self.dtype.to_pandas() - return pd.Series( - pd_type.__from_arrow__(self.to_arrow()), index=index, dtype=pd_type - ) + return pd.Index(pd_type.__from_arrow__(self.to_arrow()), dtype=pd_type) def element_indexing(self, index: int): result = super().element_indexing(index) diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py index 1c2bcbef2ec..8f8ee46c796 100644 --- a/python/cudf/cudf/core/column/lists.py +++ b/python/cudf/cudf/core/column/lists.py @@ -292,25 +292,13 @@ def _transform_leaves(self, func, *args, **kwargs) -> Self: def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: - # Can't rely on Column.to_pandas implementation for lists. - # Need to perform `to_pylist` to preserve list types. - if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) - if nullable: - raise NotImplementedError(f"{nullable=} is not implemented.") - pa_array = self.to_arrow() - if arrow_type: - return pd.Series( - pd.arrays.ArrowExtensionArray(pa_array), index=index - ) + ) -> pd.Index: + if arrow_type or nullable: + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) else: - return pd.Series(pa_array.tolist(), dtype="object", index=index) + return pd.Index(self.to_arrow().tolist(), dtype="object") class ListMethods(ColumnMethods): diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py index bab862f775f..fb413959eb9 100644 --- a/python/cudf/cudf/core/column/numerical.py +++ b/python/cudf/cudf/core/column/numerical.py @@ -674,18 +674,13 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase: def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: + ) -> pd.Index: if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) elif arrow_type: - return pd.Series( - pd.arrays.ArrowExtensionArray(self.to_arrow()), index=index - ) + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) elif ( nullable and ( @@ -697,11 +692,11 @@ def to_pandas( ): arrow_array = self.to_arrow() pandas_array = pandas_nullable_dtype.__from_arrow__(arrow_array) # type: ignore[attr-defined] - return pd.Series(pandas_array, copy=False, index=index) + return pd.Index(pandas_array, copy=False) elif self.dtype.kind in set("iuf") and not self.has_nulls(): - return pd.Series(self.values_host, copy=False, index=index) + return pd.Index(self.values_host, copy=False) else: - return super().to_pandas(index=index, nullable=nullable) + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) def _reduction_result_dtype(self, reduction_op: str) -> Dtype: col_dtype = self.dtype diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 40e58e14612..fd98d0dc163 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -5783,23 +5783,14 @@ def values(self) -> cupy.ndarray: def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: - if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) - if arrow_type: - return pd.Series( - pd.arrays.ArrowExtensionArray(self.to_arrow()), index=index - ) - elif nullable: + ) -> pd.Index: + if nullable and not arrow_type: pandas_array = pd.StringDtype().__from_arrow__(self.to_arrow()) - return pd.Series(pandas_array, copy=False, index=index) + return pd.Index(pandas_array, copy=False) else: - return super().to_pandas(index=index, nullable=nullable) + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) def can_cast_safely(self, to_dtype: Dtype) -> bool: to_dtype = cudf.api.types.dtype(to_dtype) diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py index 1b2ffcc2700..6dd35570b95 100644 --- a/python/cudf/cudf/core/column/struct.py +++ b/python/cudf/cudf/core/column/struct.py @@ -2,7 +2,6 @@ from __future__ import annotations from functools import cached_property -from typing import Optional import pandas as pd import pyarrow as pa @@ -60,25 +59,15 @@ def to_arrow(self): def to_pandas( self, *, - index: Optional[pd.Index] = None, nullable: bool = False, arrow_type: bool = False, - ) -> pd.Series: + ) -> pd.Index: # We cannot go via Arrow's `to_pandas` because of the following issue: # https://issues.apache.org/jira/browse/ARROW-12680 - if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) - elif nullable: - raise NotImplementedError(f"{nullable=} is not implemented.") - pa_array = self.to_arrow() - if arrow_type: - return pd.Series( - pd.arrays.ArrowExtensionArray(pa_array), index=index - ) + if arrow_type or nullable: + return super().to_pandas(nullable=nullable, arrow_type=arrow_type) else: - return pd.Series(pa_array.tolist(), dtype="object", index=index) + return pd.Index(self.to_arrow().tolist(), dtype="object") @cached_property def memory_usage(self): diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 0fc36fa80e4..4c55b5427de 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -5321,9 +5321,7 @@ def to_pandas( """ out_index = self.index.to_pandas() out_data = { - i: col.to_pandas( - index=out_index, nullable=nullable, arrow_type=arrow_type - ) + i: col.to_pandas(nullable=nullable, arrow_type=arrow_type) for i, col in enumerate(self._data.columns) } diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py index 9b4c5473438..4b09765fa46 100644 --- a/python/cudf/cudf/core/index.py +++ b/python/cudf/cudf/core/index.py @@ -1568,10 +1568,11 @@ def any(self): def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.Index: - return pd.Index( - self._values.to_pandas(nullable=nullable, arrow_type=arrow_type), - name=self.name, + result = self._column.to_pandas( + nullable=nullable, arrow_type=arrow_type ) + result.name = self.name + return result def append(self, other): if is_list_like(other): @@ -2191,23 +2192,10 @@ def isocalendar(self): def to_pandas( self, *, nullable: bool = False, arrow_type: bool = False ) -> pd.DatetimeIndex: - if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) - elif nullable: - raise NotImplementedError(f"{nullable=} is not implemented.") - - result = self._values.to_pandas(arrow_type=arrow_type) - if arrow_type: - return pd.Index(result, name=self.name) - else: - freq = ( - self._freq._maybe_as_fast_pandas_offset() - if self._freq is not None - else None - ) - return pd.DatetimeIndex(result, name=self.name, freq=freq) + result = super().to_pandas(nullable=nullable, arrow_type=arrow_type) + if not arrow_type and self._freq is not None: + result.freq = self._freq._maybe_as_fast_pandas_offset() + return result @_cudf_nvtx_annotate def _get_dt_field(self, field): @@ -2527,23 +2515,6 @@ def __getitem__(self, index): return pd.Timedelta(value) return value - @_cudf_nvtx_annotate - def to_pandas( - self, *, nullable: bool = False, arrow_type: bool = False - ) -> pd.TimedeltaIndex: - if arrow_type and nullable: - raise ValueError( - f"{arrow_type=} and {nullable=} cannot both be set." - ) - elif nullable: - raise NotImplementedError(f"{nullable=} is not implemented.") - - result = self._values.to_pandas(arrow_type=arrow_type) - if arrow_type: - return pd.Index(result, name=self.name) - else: - return pd.TimedeltaIndex(result, name=self.name) - @property # type: ignore @_cudf_nvtx_annotate def days(self): diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index a5b204ef346..169f7c11cf9 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -2022,11 +2022,11 @@ def to_pandas( index = self.index.to_pandas() else: index = None # type: ignore[assignment] - s = self._column.to_pandas( - index=index, nullable=nullable, arrow_type=arrow_type + return pd.Series( + self._column.to_pandas(nullable=nullable, arrow_type=arrow_type), + index=index, + name=self.name, ) - s.name = self.name - return s @property # type: ignore @_cudf_nvtx_annotate diff --git a/python/cudf/cudf/tests/test_cuda_array_interface.py b/python/cudf/cudf/tests/test_cuda_array_interface.py index f98c3ad0475..06d63561fc1 100644 --- a/python/cudf/cudf/tests/test_cuda_array_interface.py +++ b/python/cudf/cudf/tests/test_cuda_array_interface.py @@ -175,12 +175,12 @@ def test_column_from_ephemeral_cupy_try_lose_reference(): a = cudf.Series(cupy.asarray([1, 2, 3]))._column a = cudf.core.column.as_column(a) b = cupy.asarray([1, 1, 1]) # noqa: F841 - assert_eq(pd.Series([1, 2, 3]), a.to_pandas()) + assert_eq(pd.Index([1, 2, 3]), a.to_pandas()) a = cudf.Series(cupy.asarray([1, 2, 3]))._column a.name = "b" b = cupy.asarray([1, 1, 1]) # noqa: F841 - assert_eq(pd.Series([1, 2, 3]), a.to_pandas()) + assert_eq(pd.Index([1, 2, 3]), a.to_pandas()) @pytest.mark.xfail(