Skip to content

Commit

Permalink
Fixed dropping the geometry column
Browse files Browse the repository at this point in the history
  • Loading branch information
TomAugspurger committed Dec 21, 2024
1 parent f629462 commit 0d41c8a
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 0 deletions.
24 changes: 24 additions & 0 deletions dask_geopandas/_expr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import Literal

import dask_expr as dx

import geopandas


def _drop(df: geopandas.GeoDataFrame, columns, errors):
return df.drop(columns=columns, errors=errors)


def _validate_axis(axis=0, none_is_zero: bool = True) -> None | Literal[0, 1]:
if axis not in (0, 1, "index", "columns", None):
raise ValueError(f"No axis named {axis}")
# convert to numeric axis
numeric_axis: dict[str | None, Literal[0, 1]] = {"index": 0, "columns": 1}
if none_is_zero:
numeric_axis[None] = 0

return numeric_axis.get(axis, axis)


class Drop(dx.expr.Drop):
operation = staticmethod(_drop)
20 changes: 20 additions & 0 deletions dask_geopandas/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import dask_geopandas

from ._expr import Drop, _validate_axis
from .geohash import _geohash
from .hilbert_distance import _hilbert_distance
from .morton_distance import _morton_distance
Expand Down Expand Up @@ -868,6 +869,25 @@ def explode(self, column=None, ignore_index=False, index_parts=None):
enforce_metadata=False,
)

@derived_from(geopandas.GeoDataFrame)
def drop(self, labels=None, axis=0, columns=None, errors="raise"):
# https://github.com/geopandas/dask-geopandas/issues/321
# Override to avoid an inplace drop, since we need
# to convert from a GeoDataFrame to a DataFrame when dropping
# the geometry column.
if columns is None and labels is None:
raise TypeError("must either specify 'columns' or 'labels'")

axis = _validate_axis(axis)

if axis == 1:
columns = labels or columns
elif axis == 0 and columns is None:
raise NotImplementedError(
"Drop currently only works for axis=1 or when columns is not None"
)
return new_collection(Drop(self, columns=columns, errors=errors))


from_geopandas = dx.from_pandas

Expand Down
12 changes: 12 additions & 0 deletions dask_geopandas/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1046,3 +1046,15 @@ def get_chunk(n):

expected = geopandas.GeoDataFrame({"col": [1, 1], "geometry": [Point(1, 1)] * 2})
assert_geodataframe_equal(ddf.compute(), expected)


def test_drop():
# https://github.com/geopandas/dask-geopandas/issues/321
df = dask_geopandas.from_geopandas(
geopandas.GeoDataFrame({"col": [1], "geometry": [Point(1, 1)]}), npartitions=1
)
result = df.drop(columns="geometry")
assert type(result) is dd.DataFrame

result = df.drop(columns="col")
assert type(result) is dask_geopandas.GeoDataFrame

0 comments on commit 0d41c8a

Please sign in to comment.