Skip to content

Commit

Permalink
Fix union array interop (#718)
Browse files Browse the repository at this point in the history
### Change list

- Fix DataType creation to match the spec, with hard-coded type ids.
- Don't include geoarrow metadata on underlying arrays when exporting to
arrow-rs. Only include geoarrow metadata on top-level
`geoarrow.geometry` array
- We no longer need a `map` attribute in the struct because the ordering
of the fields is guaranteed by the spec now.
- Don't store underlying arrays under an `Option`

Closes #717, closes
#714

Unblocks #646

Progress towards #679
  • Loading branch information
kylebarron authored Aug 26, 2024
1 parent 4655458 commit e6f900b
Show file tree
Hide file tree
Showing 6 changed files with 503 additions and 605 deletions.
242 changes: 96 additions & 146 deletions src/algorithm/native/cast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -737,10 +737,10 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
use GeoDataType::*;
match to_type {
Point(ct, Dimension::XY) => {
if self.has_line_string_2ds()
| self.has_polygon_2ds()
| self.has_multi_line_string_2ds()
| self.has_multi_polygon_2ds()
if self.has_line_strings()
| self.has_polygons()
| self.has_multi_line_strings()
| self.has_multi_polygons()
{
return Err(GeoArrowError::General("".to_string()));
}
Expand All @@ -753,28 +753,27 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
LineString(ct, Dimension::XY) => {
if self.has_points()
| self.has_polygon_2ds()
| self.has_multi_point_2ds()
| self.has_multi_polygon_2ds()
| self.has_polygons()
| self.has_multi_points()
| self.has_multi_polygons()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.line_strings
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(multi_line_strings) = &self.multi_line_strings {
if multi_line_strings.geom_offsets.last().to_usize().unwrap()
!= multi_line_strings.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = multi_line_strings.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.geom_capacity += buffer_lengths.ring_capacity;
let mut capacity = self.line_strings.buffer_lengths();
if self
.multi_line_strings
.geom_offsets
.last()
.to_usize()
.unwrap()
!= self.multi_line_strings.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = self.multi_line_strings.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.geom_capacity += buffer_lengths.ring_capacity;

let mut builder = LineStringBuilder::<i32, 2>::with_capacity_and_options(
capacity,
Expand All @@ -787,28 +786,27 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
LargeLineString(ct, Dimension::XY) => {
if self.has_points()
| self.has_polygon_2ds()
| self.has_multi_point_2ds()
| self.has_multi_polygon_2ds()
| self.has_polygons()
| self.has_multi_points()
| self.has_multi_polygons()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.line_strings
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(multi_line_strings) = &self.multi_line_strings {
if multi_line_strings.geom_offsets.last().to_usize().unwrap()
!= multi_line_strings.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = multi_line_strings.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.geom_capacity += buffer_lengths.ring_capacity;
let mut capacity = self.line_strings.buffer_lengths();
if self
.multi_line_strings
.geom_offsets
.last()
.to_usize()
.unwrap()
!= self.multi_line_strings.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = self.multi_line_strings.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.geom_capacity += buffer_lengths.ring_capacity;

let mut builder = LineStringBuilder::<i64, 2>::with_capacity_and_options(
capacity,
Expand All @@ -821,29 +819,23 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
Polygon(ct, Dimension::XY) => {
if self.has_points()
| self.has_line_string_2ds()
| self.has_multi_point_2ds()
| self.has_multi_line_string_2ds()
| self.has_line_strings()
| self.has_multi_points()
| self.has_multi_line_strings()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.polygons
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(multi_polygons) = &self.multi_polygons {
if multi_polygons.geom_offsets.last().to_usize().unwrap()
!= multi_polygons.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = multi_polygons.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.ring_capacity += buffer_lengths.ring_capacity;
capacity.geom_capacity += buffer_lengths.polygon_capacity;
let mut capacity = self.polygons.buffer_lengths();
if self.multi_polygons.geom_offsets.last().to_usize().unwrap()
!= self.multi_polygons.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = self.multi_polygons.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.ring_capacity += buffer_lengths.ring_capacity;
capacity.geom_capacity += buffer_lengths.polygon_capacity;

let mut builder = PolygonBuilder::<i32, 2>::with_capacity_and_options(
capacity,
Expand All @@ -856,29 +848,23 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
LargePolygon(ct, Dimension::XY) => {
if self.has_points()
| self.has_line_string_2ds()
| self.has_multi_point_2ds()
| self.has_multi_line_string_2ds()
| self.has_line_strings()
| self.has_multi_points()
| self.has_multi_line_strings()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.polygons
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(multi_polygons) = &self.multi_polygons {
if multi_polygons.geom_offsets.last().to_usize().unwrap()
!= multi_polygons.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = multi_polygons.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.ring_capacity += buffer_lengths.ring_capacity;
capacity.geom_capacity += buffer_lengths.polygon_capacity;
let mut capacity = self.polygons.buffer_lengths();
if self.multi_polygons.geom_offsets.last().to_usize().unwrap()
!= self.multi_polygons.len()
{
return Err(GeoArrowError::General("Unable to cast".to_string()));
}
let buffer_lengths = self.multi_polygons.buffer_lengths();
capacity.coord_capacity += buffer_lengths.coord_capacity;
capacity.ring_capacity += buffer_lengths.ring_capacity;
capacity.geom_capacity += buffer_lengths.polygon_capacity;

let mut builder = PolygonBuilder::<i64, 2>::with_capacity_and_options(
capacity,
Expand All @@ -890,24 +876,18 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
Ok(Arc::new(builder.finish()))
}
MultiPoint(ct, Dimension::XY) => {
if self.has_line_string_2ds()
| self.has_polygon_2ds()
| self.has_multi_line_string_2ds()
| self.has_multi_polygon_2ds()
if self.has_line_strings()
| self.has_polygons()
| self.has_multi_line_strings()
| self.has_multi_polygons()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.multi_points
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(points) = &self.points {
// Hack: move to newtype
capacity.coord_capacity += points.buffer_lengths();
capacity.geom_capacity += points.buffer_lengths();
}
let mut capacity = self.multi_points.buffer_lengths();
// Hack: move to newtype
capacity.coord_capacity += self.points.buffer_lengths();
capacity.geom_capacity += self.points.buffer_lengths();

let mut builder = MultiPointBuilder::<i32, 2>::with_capacity_and_options(
capacity,
Expand All @@ -919,24 +899,18 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
Ok(Arc::new(builder.finish()))
}
LargeMultiPoint(ct, Dimension::XY) => {
if self.has_line_string_2ds()
| self.has_polygon_2ds()
| self.has_multi_line_string_2ds()
| self.has_multi_polygon_2ds()
if self.has_line_strings()
| self.has_polygons()
| self.has_multi_line_strings()
| self.has_multi_polygons()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.multi_points
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(points) = &self.points {
// Hack: move to newtype
capacity.coord_capacity += points.buffer_lengths();
capacity.geom_capacity += points.buffer_lengths();
}
let mut capacity = self.multi_points.buffer_lengths();
// Hack: move to newtype
capacity.coord_capacity += self.points.buffer_lengths();
capacity.geom_capacity += self.points.buffer_lengths();

let mut builder = MultiPointBuilder::<i64, 2>::with_capacity_and_options(
capacity,
Expand All @@ -949,21 +923,15 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
MultiLineString(ct, Dimension::XY) => {
if self.has_points()
| self.has_polygon_2ds()
| self.has_multi_point_2ds()
| self.has_multi_polygon_2ds()
| self.has_polygons()
| self.has_multi_points()
| self.has_multi_polygons()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.multi_line_strings
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(line_strings) = &self.line_strings {
capacity += line_strings.buffer_lengths();
}
let mut capacity = self.multi_line_strings.buffer_lengths();
capacity += self.line_strings.buffer_lengths();

let mut builder = MultiLineStringBuilder::<i32, 2>::with_capacity_and_options(
capacity,
Expand All @@ -976,21 +944,15 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
LargeMultiLineString(ct, Dimension::XY) => {
if self.has_points()
| self.has_polygon_2ds()
| self.has_multi_point_2ds()
| self.has_multi_polygon_2ds()
| self.has_polygons()
| self.has_multi_points()
| self.has_multi_polygons()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.multi_line_strings
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(line_strings) = &self.line_strings {
capacity += line_strings.buffer_lengths();
}
let mut capacity = self.multi_line_strings.buffer_lengths();
capacity += self.line_strings.buffer_lengths();

let mut builder = MultiLineStringBuilder::<i64, 2>::with_capacity_and_options(
capacity,
Expand All @@ -1003,21 +965,15 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
MultiPolygon(ct, Dimension::XY) => {
if self.has_points()
| self.has_line_string_2ds()
| self.has_multi_point_2ds()
| self.has_multi_line_string_2ds()
| self.has_line_strings()
| self.has_multi_points()
| self.has_multi_line_strings()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.multi_polygons
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(polygons) = &self.polygons {
capacity += polygons.buffer_lengths();
}
let mut capacity = self.multi_polygons.buffer_lengths();
capacity += self.polygons.buffer_lengths();

let mut builder = MultiPolygonBuilder::<i32, 2>::with_capacity_and_options(
capacity,
Expand All @@ -1030,21 +986,15 @@ impl<O: OffsetSizeTrait> Cast for MixedGeometryArray<O, 2> {
}
LargeMultiPolygon(ct, Dimension::XY) => {
if self.has_points()
| self.has_line_string_2ds()
| self.has_multi_point_2ds()
| self.has_multi_line_string_2ds()
| self.has_line_strings()
| self.has_multi_points()
| self.has_multi_line_strings()
{
return Err(GeoArrowError::General("".to_string()));
}

let mut capacity = self
.multi_polygons
.as_ref()
.map(|x| x.buffer_lengths())
.unwrap_or_default();
if let Some(polygons) = &self.polygons {
capacity += polygons.buffer_lengths();
}
let mut capacity = self.multi_polygons.buffer_lengths();
capacity += self.polygons.buffer_lengths();

let mut builder = MultiPolygonBuilder::<i64, 2>::with_capacity_and_options(
capacity,
Expand Down
Loading

0 comments on commit e6f900b

Please sign in to comment.