From ec7410ebaabd5c227a39bf30e41e00e55739dfdf Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 6 Oct 2024 11:30:29 -0400 Subject: [PATCH 1/6] Infer geometry type and dimension from WKT --- src/infer_type.rs | 131 +++++++++++++++++++++++++++++++++++++ src/lib.rs | 4 ++ src/types/dimension.rs | 2 +- src/types/geometry_type.rs | 11 ++++ src/types/mod.rs | 4 +- 5 files changed, 150 insertions(+), 2 deletions(-) create mode 100644 src/infer_type.rs create mode 100644 src/types/geometry_type.rs diff --git a/src/infer_type.rs b/src/infer_type.rs new file mode 100644 index 0000000..f9286a2 --- /dev/null +++ b/src/infer_type.rs @@ -0,0 +1,131 @@ +use crate::types::{Dimension, GeometryType}; + +const POINT: &str = "POINT"; +const LINESTRING: &str = "LINESTRING"; +const POLYGON: &str = "POLYGON"; +const MULTIPOINT: &str = "MULTIPOINT"; +const MULTILINESTRING: &str = "MULTILINESTRING"; +const MULTIPOLYGON: &str = "MULTIPOLYGON"; +const GEOMETRYCOLLECTION: &str = "GEOMETRYCOLLECTION"; + +/// Infer the geometry type and dimension from an input WKT string slice. +/// +/// An `EMPTY` WKT object will return `None` in place of the dimension. +/// +/// ``` +/// use wkt::infer_type; +/// use wkt::types::{Dimension, GeometryType}; +/// +/// assert_eq!( +/// infer_type("POINT (10 20.1)").unwrap(), +/// (GeometryType::Point, Some(Dimension::XY)) +/// ); +/// +/// assert_eq!( +/// infer_type("POINT EMPTY").unwrap(), +/// (GeometryType::Point, None) +/// ); +/// ``` +pub fn infer_type(input: &str) -> Result<(GeometryType, Option), String> { + if let Some((prefix, _suffix)) = input.split_once("(") { + let prefix = prefix.to_uppercase(); + + let (geom_type, dim_str) = if let Some(dim_str) = prefix.strip_prefix(POINT) { + (GeometryType::Point, dim_str) + } else if let Some(dim_str) = prefix.strip_prefix(LINESTRING) { + (GeometryType::LineString, dim_str) + } else if let Some(dim_str) = prefix.strip_prefix(POLYGON) { + (GeometryType::Polygon, dim_str) + } else if let Some(dim_str) = prefix.strip_prefix(MULTIPOINT) { + (GeometryType::MultiPoint, dim_str) + } else if let Some(dim_str) = prefix.strip_prefix(MULTILINESTRING) { + (GeometryType::MultiLineString, dim_str) + } else if let Some(dim_str) = prefix.strip_prefix(MULTIPOLYGON) { + (GeometryType::MultiPolygon, dim_str) + } else if let Some(dim_str) = prefix.strip_prefix(GEOMETRYCOLLECTION) { + (GeometryType::GeometryCollection, dim_str) + } else { + return Err(format!("Unsupported WKT prefix {}", prefix)); + }; + + let dim = if dim_str.contains("ZM") { + Dimension::XYZM + } else if dim_str.contains("Z") { + Dimension::XYZ + } else if dim_str.contains("M") { + Dimension::XYM + } else { + Dimension::XY + }; + + Ok((geom_type, Some(dim))) + } else { + let input = input.to_uppercase(); + if !input.contains("EMPTY") { + return Err("Invalid WKT; no '(' character and not EMPTY".to_string()); + } + + if input.starts_with(POINT) { + Ok((GeometryType::Point, None)) + } else if input.starts_with(LINESTRING) { + Ok((GeometryType::LineString, None)) + } else if input.starts_with(POLYGON) { + Ok((GeometryType::Polygon, None)) + } else if input.starts_with(MULTIPOINT) { + Ok((GeometryType::MultiPoint, None)) + } else if input.starts_with(MULTILINESTRING) { + Ok((GeometryType::MultiLineString, None)) + } else if input.starts_with(MULTIPOLYGON) { + Ok((GeometryType::MultiPolygon, None)) + } else if input.starts_with(GEOMETRYCOLLECTION) { + Ok((GeometryType::GeometryCollection, None)) + } else { + return Err(format!("Unsupported WKT prefix {}", input)); + } + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn test_points() { + assert_eq!( + infer_type("POINT (10 20.1)").unwrap(), + (GeometryType::Point, Some(Dimension::XY)) + ); + assert_eq!( + infer_type("POINT Z (10 20.1 5)").unwrap(), + (GeometryType::Point, Some(Dimension::XYZ)) + ); + assert_eq!( + infer_type("POINT M (10 20.1 80)").unwrap(), + (GeometryType::Point, Some(Dimension::XYM)) + ); + assert_eq!( + infer_type("POINT ZM (10 20.1 5 80)").unwrap(), + (GeometryType::Point, Some(Dimension::XYZM)) + ); + } + + #[test] + fn lowercase_point() { + assert_eq!( + infer_type("point EMPTY").unwrap(), + (GeometryType::Point, None) + ); + } + + #[test] + fn test_empty() { + assert_eq!( + infer_type("POINT EMPTY").unwrap(), + (GeometryType::Point, None) + ); + assert_eq!( + infer_type("MULTIPOLYGON EMPTY").unwrap(), + (GeometryType::MultiPolygon, None) + ); + } +} diff --git a/src/lib.rs b/src/lib.rs index 2b97b13..ee6d881 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -94,6 +94,10 @@ mod tokenizer; /// `WKT` primitive types and collections pub mod types; +mod infer_type; + +pub use infer_type::infer_type; + #[cfg(feature = "geo-types")] extern crate geo_types; diff --git a/src/types/dimension.rs b/src/types/dimension.rs index b2cbf62..6d8f0bd 100644 --- a/src/types/dimension.rs +++ b/src/types/dimension.rs @@ -1,6 +1,6 @@ /// The dimension of geometry that we're parsing. #[allow(clippy::upper_case_acronyms)] -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Hash)] pub enum Dimension { #[default] XY, diff --git a/src/types/geometry_type.rs b/src/types/geometry_type.rs new file mode 100644 index 0000000..71a6559 --- /dev/null +++ b/src/types/geometry_type.rs @@ -0,0 +1,11 @@ +/// The geometry type of the WKT object +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +pub enum GeometryType { + Point, + LineString, + Polygon, + MultiPoint, + MultiLineString, + MultiPolygon, + GeometryCollection, +} diff --git a/src/types/mod.rs b/src/types/mod.rs index b0dfc87..05f9162 100644 --- a/src/types/mod.rs +++ b/src/types/mod.rs @@ -13,7 +13,8 @@ // limitations under the License. pub use self::coord::Coord; -pub(crate) use self::dimension::Dimension; +pub use self::dimension::Dimension; +pub use self::geometry_type::GeometryType; pub use self::geometrycollection::GeometryCollection; pub use self::linestring::LineString; pub use self::multilinestring::MultiLineString; @@ -24,6 +25,7 @@ pub use self::polygon::Polygon; mod coord; mod dimension; +mod geometry_type; mod geometrycollection; mod linestring; mod multilinestring; From 33c060fa6fa79f45010d752ad0ae99a22d71bedf Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Sun, 6 Oct 2024 11:32:21 -0400 Subject: [PATCH 2/6] update changes --- CHANGES.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGES.md b/CHANGES.md index d709377..0889282 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,9 @@ # Changes +## Unreleased + +* Add an `infer_type` function to extract only the geometry type and dimension from a WKT string. + ## 0.11.0 - 2024-07-24 * Support parsing Z, M, and ZM WKT strings. From bcd1250b732953cf6edf3d7b8203a0573f662a66 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 7 Oct 2024 16:21:45 -0400 Subject: [PATCH 3/6] Allow leading whitespace --- src/infer_type.rs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/infer_type.rs b/src/infer_type.rs index f9286a2..6586d8f 100644 --- a/src/infer_type.rs +++ b/src/infer_type.rs @@ -65,19 +65,20 @@ pub fn infer_type(input: &str) -> Result<(GeometryType, Option), Stri return Err("Invalid WKT; no '(' character and not EMPTY".to_string()); } - if input.starts_with(POINT) { + // We use contains instead of starts_with to allow leading whitespace + if input.contains(POINT) { Ok((GeometryType::Point, None)) - } else if input.starts_with(LINESTRING) { + } else if input.contains(LINESTRING) { Ok((GeometryType::LineString, None)) - } else if input.starts_with(POLYGON) { + } else if input.contains(POLYGON) { Ok((GeometryType::Polygon, None)) - } else if input.starts_with(MULTIPOINT) { + } else if input.contains(MULTIPOINT) { Ok((GeometryType::MultiPoint, None)) - } else if input.starts_with(MULTILINESTRING) { + } else if input.contains(MULTILINESTRING) { Ok((GeometryType::MultiLineString, None)) - } else if input.starts_with(MULTIPOLYGON) { + } else if input.contains(MULTIPOLYGON) { Ok((GeometryType::MultiPolygon, None)) - } else if input.starts_with(GEOMETRYCOLLECTION) { + } else if input.contains(GEOMETRYCOLLECTION) { Ok((GeometryType::GeometryCollection, None)) } else { return Err(format!("Unsupported WKT prefix {}", input)); From 3aaf2007206c44dcfe08f4274cbc142871d403b5 Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 7 Oct 2024 19:10:28 -0400 Subject: [PATCH 4/6] use trim_start --- src/infer_type.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/infer_type.rs b/src/infer_type.rs index 6586d8f..581ed67 100644 --- a/src/infer_type.rs +++ b/src/infer_type.rs @@ -27,6 +27,8 @@ const GEOMETRYCOLLECTION: &str = "GEOMETRYCOLLECTION"; /// ); /// ``` pub fn infer_type(input: &str) -> Result<(GeometryType, Option), String> { + let input = input.trim_start(); + if let Some((prefix, _suffix)) = input.split_once("(") { let prefix = prefix.to_uppercase(); @@ -65,20 +67,19 @@ pub fn infer_type(input: &str) -> Result<(GeometryType, Option), Stri return Err("Invalid WKT; no '(' character and not EMPTY".to_string()); } - // We use contains instead of starts_with to allow leading whitespace - if input.contains(POINT) { + if input.starts_with(POINT) { Ok((GeometryType::Point, None)) - } else if input.contains(LINESTRING) { + } else if input.starts_with(LINESTRING) { Ok((GeometryType::LineString, None)) - } else if input.contains(POLYGON) { + } else if input.starts_with(POLYGON) { Ok((GeometryType::Polygon, None)) - } else if input.contains(MULTIPOINT) { + } else if input.starts_with(MULTIPOINT) { Ok((GeometryType::MultiPoint, None)) - } else if input.contains(MULTILINESTRING) { + } else if input.starts_with(MULTILINESTRING) { Ok((GeometryType::MultiLineString, None)) - } else if input.contains(MULTIPOLYGON) { + } else if input.starts_with(MULTIPOLYGON) { Ok((GeometryType::MultiPolygon, None)) - } else if input.contains(GEOMETRYCOLLECTION) { + } else if input.starts_with(GEOMETRYCOLLECTION) { Ok((GeometryType::GeometryCollection, None)) } else { return Err(format!("Unsupported WKT prefix {}", input)); From 56a7fcd37c2beb205db53d5ac7b8a897359b3ced Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 7 Oct 2024 19:11:16 -0400 Subject: [PATCH 5/6] add test with leading whitespace --- src/infer_type.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/infer_type.rs b/src/infer_type.rs index 581ed67..60322a9 100644 --- a/src/infer_type.rs +++ b/src/infer_type.rs @@ -111,6 +111,19 @@ mod test { ); } + #[test] + fn test_with_leading_whitespace() { + assert_eq!( + infer_type(" POINT (10 20.1)").unwrap(), + (GeometryType::Point, Some(Dimension::XY)) + ); + + assert_eq!( + infer_type("POINT EMPTY").unwrap(), + (GeometryType::Point, None) + ); + } + #[test] fn lowercase_point() { assert_eq!( From dbe53a23cf5a611169aff8a314072255b23f38ac Mon Sep 17 00:00:00 2001 From: Kyle Barron Date: Mon, 7 Oct 2024 21:30:16 -0400 Subject: [PATCH 6/6] fix test --- src/infer_type.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/infer_type.rs b/src/infer_type.rs index 60322a9..da904c7 100644 --- a/src/infer_type.rs +++ b/src/infer_type.rs @@ -119,7 +119,7 @@ mod test { ); assert_eq!( - infer_type("POINT EMPTY").unwrap(), + infer_type(" POINT EMPTY").unwrap(), (GeometryType::Point, None) ); }