Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: add to_wkb / from_wkb #68

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ Input/Output

from_wkt
to_wkt
fron_wkb
jorisvandenbossche marked this conversation as resolved.
Show resolved Hide resolved
to_wkb
from_geoarrow
to_geoarrow

Expand Down
92 changes: 92 additions & 0 deletions src/io.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,44 @@ class ToWKT {
std::shared_ptr<s2geog::WKTWriter> m_writer;
};

#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \
(S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2)
class FromWKB {
public:
FromWKB(bool oriented, bool planar, float tessellate_tolerance = 100.0) {
s2geog::geoarrow::ImportOptions options;
options.set_oriented(oriented);
if (planar) {
auto tol = S1Angle::Radians(tessellate_tolerance / EARTH_RADIUS_METERS);
options.set_tessellate_tolerance(tol);
}
m_reader = std::make_shared<s2geog::WKBReader>(options);
}

PyObjectGeography operator()(py::bytes a) const {
return make_py_geography(m_reader->ReadFeature(a));
}

private:
std::shared_ptr<s2geog::WKBReader> m_reader;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use unique_ptr instead of shared_ptr? (Also in ToWKB) Unless I'm missing something?

Copy link
Collaborator Author

@jorisvandenbossche jorisvandenbossche Nov 17, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might be that pybind11 requires this class to be copyable? (if you pass it as argument to vectorize)
And that then gives the typical "use of deleted function" compilation errors if using unique_ptr instead of shared_ptr

};

class ToWKB {
public:
ToWKB() {
m_writer = std::make_shared<s2geog::WKBWriter>();
}

py::bytes operator()(PyObjectGeography a) const {
return m_writer->WriteFeature(a.as_geog_ptr()->geog());
}

private:
std::shared_ptr<s2geog::WKBWriter> m_writer;
};

#endif

void init_io(py::module& m) {
m.def(
"from_wkt",
Expand Down Expand Up @@ -109,4 +147,58 @@ void init_io(py::module& m) {
The number of decimal places to include in the output.

)pbdoc");

#if defined(S2GEOGRAPHY_VERSION_MAJOR) && \
(S2GEOGRAPHY_VERSION_MAJOR >= 1 || S2GEOGRAPHY_VERSION_MINOR >= 2)

m.def(
"from_wkb",
[](py::array_t<py::bytes> a, bool oriented, bool planar, float tessellate_tolerance) {
return py::vectorize(FromWKB(oriented, planar, tessellate_tolerance))(std::move(a));
},
py::arg("a"),
py::arg("oriented") = false,
py::arg("planar") = false,
py::arg("tessellate_tolerance") = 100.0,
R"pbdoc(
Creates geographies from the Well-Known Bytes (WKB) representation.

Parameters
----------
a : bytes or array_like
WKB objects.
oriented : bool, default False
Set to True if polygon ring directions are known to be correct
(i.e., exterior rings are defined counter clockwise and interior
rings are defined clockwise).
By default (False), it will return the polygon with the smaller
area.
planar : bool, default False
If set to True, the edges of linestrings and polygons are assumed
to be linear on the plane. In that case, additional points will
be added to the line while creating the geography objects, to
ensure every point is within 100m of the original line.
By default (False), it is assumed that the edges are spherical
(i.e. represent the shortest path on the sphere between two points).
tessellate_tolerance : float, default 100.0
The maximum distance in meters that a point must be moved to
satisfy the planar edge constraint. This is only used if `planar`
is set to True.

)pbdoc");

m.def("to_wkb",
py::vectorize(ToWKB()),
py::arg("a"),
R"pbdoc(
Returns the WKB representation of each geography.

Parameters
----------
a : :py:class:`Geography` or array_like
Geography object(s)

)pbdoc");

#endif
}
29 changes: 29 additions & 0 deletions src/pybind11.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,18 @@ struct vectorize_arg<py::str> {
using type = conditional_t<vectorize, array_t<remove_cv_t<call_type>, array::forcecast>, T>;
};

template <>
struct vectorize_arg<py::bytes> {
using T = py::bytes;
// The wrapped function gets called with this type:
using call_type = T;
// Is this a vectorized argument?
static constexpr bool vectorize = true;
// Accept this type: an array for vectorized types, otherwise the type
// as-is:
using type = conditional_t<vectorize, array_t<remove_cv_t<call_type>, array::forcecast>, T>;
};

// Register PyObjectGeography and str as a valid numpy dtype (numpy.object alias)
// from: https://github.com/pybind/pybind11/pull/1152
template <>
Expand All @@ -166,6 +178,18 @@ struct npy_format_descriptor<py::str> {
}
};

template <>
struct npy_format_descriptor<py::bytes> {
static constexpr auto name = _("object");
enum { value = npy_api::NPY_OBJECT_ };
static pybind11::dtype dtype() {
if (auto ptr = npy_api::get().PyArray_DescrFromType_(value)) {
return reinterpret_borrow<pybind11::dtype>(ptr);
}
pybind11_fail("Unsupported buffer format!");
}
};

// Override signature type hint for vectorized Geography arguments
template <int Flags>
struct handle_type_name<array_t<spherely::PyObjectGeography, Flags>> {
Expand All @@ -191,6 +215,11 @@ object cast(T &&value) {
return value;
}

template <typename T, typename detail::enable_if_t<std::is_same<T, py::bytes>::value, int> = 0>
object cast(T &&value) {
return value;
}

} // namespace pybind11

#endif // SPHERELY_PYBIND11_H_
7 changes: 7 additions & 0 deletions src/spherely.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -207,13 +207,20 @@ area: _VFunc_Nin1optradius_Nout1[Literal["area"], float, float]
# io functions

to_wkt: _VFunc_Nin1_Nout1[Literal["to_wkt"], str, object]
to_wkb: _VFunc_Nin1_Nout1[Literal["to_wkb"], bytes, object]

def from_wkt(
a: Iterable[str],
oriented: bool = False,
planar: bool = False,
tessellate_tolerance: float = 100.0,
) -> npt.NDArray[Any]: ...
def from_wkb(
a: Iterable[bytes],
oriented: bool = False,
planar: bool = False,
tessellate_tolerance: float = 100.0,
) -> npt.NDArray[Any]: ...

class ArrowArrayExportable(Protocol):
def __arrow_c_array__(
Expand Down
129 changes: 121 additions & 8 deletions tests/test_io.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,18 @@
import struct

import numpy as np
import pytest
from packaging.version import Version

import spherely


needs_s2geography_0_2 = pytest.mark.skipif(
Version(spherely.__s2geography_version__) < Version("0.2.0"),
reason="Needs s2geography >= 0.2.0",
)


def test_from_wkt():
result = spherely.from_wkt(["POINT (1 1)", "POINT(2 2)", "POINT(3 3)"])
expected = spherely.points([1, 2, 3], [1, 2, 3])
Expand Down Expand Up @@ -47,10 +55,7 @@ def test_from_wkt_wrong_type():
)


@pytest.mark.skipif(
Version(spherely.__s2geography_version__) < Version("0.2.0"),
reason="Needs s2geography >= 0.2.0",
)
@needs_s2geography_0_2
def test_from_wkt_oriented():
# by default re-orients the inner ring
result = spherely.from_wkt(polygon_with_bad_hole_wkt)
Expand All @@ -64,10 +69,7 @@ def test_from_wkt_oriented():
spherely.from_wkt(polygon_with_bad_hole_wkt, oriented=True)


@pytest.mark.skipif(
Version(spherely.__s2geography_version__) < Version("0.2.0"),
reason="Needs s2geography >= 0.2.0",
)
@needs_s2geography_0_2
def test_from_wkt_planar():
result = spherely.from_wkt("LINESTRING (-64 45, 0 45)")
assert spherely.distance(result, spherely.point(-30.1, 45)) > 10000
Expand Down Expand Up @@ -108,3 +110,114 @@ def test_to_wkt_precision():

result = spherely.to_wkt(arr, precision=2)
assert result[0] == "POINT (0.12 0.57)"


POINT11_WKB = struct.pack("<BI2d", 1, 1, 1.0, 1.0)
NAN = struct.pack("<d", float("nan"))
POINT_NAN_WKB = struct.pack("<BI", 1, 1) + (NAN * 2)
MULTIPOINT_NAN_WKB = struct.pack("<BII", 1, 4, 1) + POINT_NAN_WKB
GEOMETRYCOLLECTION_NAN_WKB = struct.pack("<BII", 1, 7, 1) + POINT_NAN_WKB
INVALID_WKB = bytes.fromhex(
"01030000000100000002000000507daec600b1354100de02498e5e3d41306ea321fcb03541a011a53d905e3d41"
) # noqa: E501


@needs_s2geography_0_2
def test_from_wkb_point_empty():
result = spherely.from_wkb([POINT11_WKB, POINT_NAN_WKB, MULTIPOINT_NAN_WKB])
# empty MultiPoint is converted to empty Point
expected = spherely.from_wkt(["POINT (1 1)", "POINT EMPTY", "POINT EMPTY"])
assert spherely.equals(result, expected).all()

result = spherely.from_wkb(GEOMETRYCOLLECTION_NAN_WKB)
assert str(result) == "GEOMETRYCOLLECTION (POINT EMPTY)"


@needs_s2geography_0_2
def test_from_wkb_invalid():
with pytest.raises(RuntimeError, match="Expected endian byte"):
spherely.from_wkb(b"")

with pytest.raises(RuntimeError):
spherely.from_wkb([b"\x01\x01\x00\x00\x00\x00"])

# TODO should this raise an error?
# with pytest.raises(RuntimeError):
result = spherely.from_wkb(INVALID_WKB)
assert str(result) == "POLYGON ((108.7761 -10.2852, 108.7761 -10.2852))"


@needs_s2geography_0_2
def test_from_wkb_invalid_type():
with pytest.raises(TypeError, match="expected bytes, str found"):
spherely.from_wkb("POINT (1 1)")


@needs_s2geography_0_2
@pytest.mark.parametrize(
"geog",
[
spherely.point(45, 50),
spherely.multipoint([(5, 50), (6, 51)]),
spherely.linestring([(5, 50), (6, 51)]),
spherely.multilinestring([[(5, 50), (6, 51)], [(15, 60), (16, 61)]]),
spherely.polygon([(5, 50), (5, 60), (6, 60), (6, 51)]),
# with hole
spherely.polygon(
shell=[(5, 60), (6, 60), (6, 50), (5, 50)],
holes=[[(5.1, 59), (5.9, 59), (5.9, 51), (5.1, 51)]],
),
spherely.multipolygon(
[
spherely.polygon([(5, 50), (5, 60), (6, 60), (6, 51)]),
spherely.polygon([(10, 100), (10, 160), (11, 160), (11, 100)]),
]
),
spherely.collection([spherely.point(40, 50)]),
spherely.collection(
[
spherely.point(0, 0),
spherely.linestring([(0, 0), (1, 1)]),
spherely.polygon([(0, 0), (1, 0), (1, 1)]),
]
),
],
)
def test_wkb_roundtrip(geog):
wkb = spherely.to_wkb(geog)
result = spherely.from_wkb(wkb)
# roundtrip through Geography unit vector is not exact, so equals can fail
# TODO properly test this once `equals` supports snapping/precision
# assert spherely.equals(result, geog)
assert str(result) == str(geog)


@needs_s2geography_0_2
def test_from_wkb_oriented():
# WKB for POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0)) -> non-CCW box
wkb = bytes.fromhex(
"010300000001000000050000000000000000000000000000000000000000000000000000000000000000002440000000000000244000000000000024400000000000002440000000000000000000000000000000000000000000000000"
) # noqa: E501

result = spherely.from_wkb(wkb)
# by default re-oriented to take the smaller polygon
assert str(result) == "POLYGON ((10 0, 10 10, 0 10, 0 0, 10 0))"
assert spherely.within(spherely.point(5, 5), result)

result = spherely.from_wkb(wkb, oriented=True)
assert str(result) == "POLYGON ((0 0, 0 10, 10 10, 10 0, 0 0))"
assert not spherely.within(spherely.point(5, 5), result)


@needs_s2geography_0_2
def test_from_wkb_planar():
wkb = spherely.to_wkb(spherely.from_wkt("LINESTRING (-64 45, 0 45)"))

result = spherely.from_wkb(wkb)
assert spherely.distance(result, spherely.point(-30.1, 45)) > 10000

result = spherely.from_wkb(wkb, planar=True)
assert spherely.distance(result, spherely.point(-30.1, 45)) < 100

result = spherely.from_wkb(wkb, planar=True, tessellate_tolerance=10)
assert spherely.distance(result, spherely.point(-30.1, 45)) < 10
Loading