Skip to content

Commit

Permalink
ENH: add from_wkt and from_wkb tools
Browse files Browse the repository at this point in the history
  • Loading branch information
giorgiobasile committed Jun 21, 2024
1 parent aa1b52f commit a629de2
Show file tree
Hide file tree
Showing 5 changed files with 134 additions and 0 deletions.
6 changes: 6 additions & 0 deletions dask_geopandas/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
if backends.QUERY_PLANNING_ON:
from .expr import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
Expand All @@ -13,6 +15,8 @@
else:
from .core import (
points_from_xy,
from_wkt,
from_wkb,
GeoDataFrame,
GeoSeries,
from_geopandas,
Expand All @@ -30,6 +34,8 @@

__all__ = [
"points_from_xy",
"from_wkt",
"from_wkb",
"GeoDataFrame",
"GeoSeries",
"from_geopandas",
Expand Down
50 changes: 50 additions & 0 deletions dask_geopandas/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,56 @@ def func(data, x, y, z):
)


def from_wkt(wkt, crs=None):
"""
Convert dask.dataframe of WKT objects to a GeoSeries.
Parameters
----------
wkt: dask Series
A dask Series containing WKT objects.
crs: value, optional
Coordinate Reference System of the geometry objects. Can be anything
accepted by
:meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
Returns
-------
GeoSeries
"""

def func(data):
return geopandas.GeoSeries.from_wkt(data, index=data.index, crs=crs)

return wkt.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkt")


def from_wkb(wkb, crs=None):
"""
Convert dask.dataframe of WKB objects to a GeoSeries.
Parameters
----------
wkb: dask Series
A dask Series containing WKB objects.
crs: value, optional
Coordinate Reference System of the geometry objects. Can be anything
accepted by
:meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
Returns
-------
GeoSeries
"""

def func(data):
return geopandas.GeoSeries.from_wkb(data, index=data.index, crs=crs)

return wkb.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkb")


for name in [
"area",
"geom_type",
Expand Down
50 changes: 50 additions & 0 deletions dask_geopandas/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -916,6 +916,56 @@ def func(data, x, y, z):
)


def from_wkt(wkt, crs=None):
"""
Convert dask.dataframe of WKT objects to a GeoSeries.
Parameters
----------
wkt: dask Series
A dask Series containing WKT objects.
crs: value, optional
Coordinate Reference System of the geometry objects. Can be anything
accepted by
:meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
Returns
-------
GeoSeries
"""

def func(data):
return geopandas.GeoSeries.from_wkt(data, index=data.index, crs=crs)

return wkt.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkt")


def from_wkb(wkb, crs=None):
"""
Convert dask.dataframe of WKB objects to a GeoSeries.
Parameters
----------
wkb: dask Series
A dask Series containing WKB objects.
crs: value, optional
Coordinate Reference System of the geometry objects. Can be anything
accepted by
:meth:`pyproj.CRS.from_user_input() <pyproj.crs.CRS.from_user_input>`,
such as an authority string (eg "EPSG:4326") or a WKT string.
Returns
-------
GeoSeries
"""

def func(data):
return geopandas.GeoSeries.from_wkb(data, index=data.index, crs=crs)

return wkb.map_partitions(func, meta=geopandas.GeoSeries(), token="from_wkb")


for name in [
"area",
"geom_type",
Expand Down
26 changes: 26 additions & 0 deletions dask_geopandas/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,32 @@ def test_points_from_xy_with_crs():
assert_geoseries_equal(actual.compute(), expected)


def test_from_wkt():
wkt = [
"POLYGON ((-64.8 32.3, -65.5 18.3, -80.3 25.2, -64.8 32.3))",
"POLYGON ((-81.079102 35.496456, -81.166992 31.914868, -75.541992 31.914868, -75.629883 35.675147, -81.079102 35.496456))", # noqa E501
]
expected = geopandas.GeoSeries.from_wkt(wkt, crs="EPSG:4326")
df = pd.DataFrame({"wkt": wkt})
ddf = dd.from_pandas(df, npartitions=2)
actual = dask_geopandas.from_wkt(ddf["wkt"], crs="EPSG:4326")
assert isinstance(actual, dask_geopandas.GeoSeries)
assert_geoseries_equal(actual.compute(), expected)


def test_from_wkb():
wkb = [
"0103000000010000000400000033333333333350c0666666666626404000000000006050c0cdcccccccc4c324033333333331354c0333333333333394033333333333350c06666666666264040", # noqa E501
"0103000000010000000500000016c3d501104554c095f3c5de8bbf414064ac36ffaf4a54c02c280cca34ea3f4064ac36ffafe252c02c280cca34ea3f409c53c90050e852c00b7f86376bd6414016c3d501104554c095f3c5de8bbf4140", # noqa E501
]
expected = geopandas.GeoSeries.from_wkb(wkb, crs="EPSG:4326")
df = pd.DataFrame({"wkb": wkb})
ddf = dd.from_pandas(df, npartitions=2)
actual = dask_geopandas.from_wkb(ddf["wkb"], crs="EPSG:4326")
assert isinstance(actual, dask_geopandas.GeoSeries)
assert_geoseries_equal(actual.compute(), expected)


def test_geodataframe_crs(geodf_points_crs):
df = geodf_points_crs
original = df.crs
Expand Down
2 changes: 2 additions & 0 deletions doc/source/docs/reference/tools.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ Tools
sjoin
clip
points_from_xy
from_wkt
from_wkb

0 comments on commit a629de2

Please sign in to comment.