From ca37ea716bf1ae2240d2aa66714bf0881e70ed82 Mon Sep 17 00:00:00 2001 From: Yuji Mise Date: Tue, 20 Aug 2024 14:48:32 +0900 Subject: [PATCH] feat: Refactor code to use slice references instead of owned strings --- nusamai-citygml/macros/src/derive.rs | 4 +- nusamai-citygml/src/parser.rs | 60 +++++++--------- nusamai-citygml/src/values.rs | 19 +++-- .../examples/parse_and_compress.rs | 72 ++++++++++--------- 4 files changed, 77 insertions(+), 78 deletions(-) diff --git a/nusamai-citygml/macros/src/derive.rs b/nusamai-citygml/macros/src/derive.rs index ac62b402b..bf6e36a9f 100644 --- a/nusamai-citygml/macros/src/derive.rs +++ b/nusamai-citygml/macros/src/derive.rs @@ -382,7 +382,7 @@ fn generate_citygml_impl_for_struct( #attr_parsing st.parse_children(move |st| { - let path = st.current_path(); + let path: &[u8] = &st.current_path(); let hash = (path.iter().skip(#HASH_CHAR_SKIP).take(#HASH_CHAR_TAKE).fold(5381u32, |a, c| a.wrapping_mul(33) ^ *c as u32) & #HASH_MASK) as u8; match (hash, path) { #(#child_arms)* @@ -512,7 +512,7 @@ fn generate_citygml_impl_for_enum( #[inline(never)] fn parse(&mut self, st: &mut ::nusamai_citygml::SubTreeReader) -> Result<(), ::nusamai_citygml::ParseError> { st.parse_children(|st| { - let path = st.current_path(); + let path: &[u8] = &st.current_path(); let hash = (path.iter().skip(#HASH_CHAR_SKIP).take(#HASH_CHAR_TAKE).fold(5381u32, |a, c| a.wrapping_mul(33) ^ *c as u32) & #HASH_MASK) as u8; match (hash, path) { #(#child_arms)* diff --git a/nusamai-citygml/src/parser.rs b/nusamai-citygml/src/parser.rs index a92867e4f..f24ae1678 100644 --- a/nusamai-citygml/src/parser.rs +++ b/nusamai-citygml/src/parser.rs @@ -48,12 +48,8 @@ pub struct CityGmlReader<'a> { struct InternalState<'a> { /// Buffer holding the current path path_buf: Vec, - /// Buffer holding the properties - property_buf: Vec, /// Stack of indices of slashes '/' in `path_buf` path_stack_indices: Vec, - /// Stack of indices of slashes '/' in `property_buf` - property_stack_indices: Vec, /// General purpose buffer 1 buf1: Vec, /// General purpose buffer 2 @@ -74,9 +70,7 @@ impl<'a> InternalState<'a> { fn new(context: ParseContext<'a>) -> Self { Self { path_buf: Vec::new(), - property_buf: Vec::new(), path_stack_indices: Vec::new(), - property_stack_indices: Vec::new(), buf1: Vec::new(), buf2: Vec::new(), fp_buf: Vec::new(), @@ -241,33 +235,21 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> { let Some(start) = &self.state.current_start else { panic!("parse_attributes() must be called immediately after encountering a start tag."); }; - self.state - .property_stack_indices - .push(self.state.property_buf.len()); - - self.state.property_buf.push(b'/'); - let (nsres, localname) = self.reader.resolve_element(start.name()); - self.state - .property_buf - .extend(wellknown_prefix_from_nsres(&nsres)); - self.state.property_buf.extend(localname.as_ref()); - self.state.property_buf.push(b'['); - + let mut property_buf = Vec::new(); + property_buf.push(b'['); self.state.buf1.clear(); self.state.buf1.push(b'@'); for (index, attr) in start.attributes().flatten().enumerate() { if index > 0 { - self.state.property_buf.push(b','); + property_buf.push(b','); } let (nsres, localname) = self.reader.resolve_attribute(attr.key); self.state.buf1.extend(wellknown_prefix_from_nsres(&nsres)); self.state.buf1.extend(localname.as_ref()); - self.state - .property_buf - .extend(wellknown_prefix_from_nsres(&nsres)); - self.state.property_buf.extend(localname.as_ref()); - self.state.property_buf.extend(b"="); - self.state.property_buf.extend(attr.value.as_ref()); + property_buf.extend(wellknown_prefix_from_nsres(&nsres)); + property_buf.extend(localname.as_ref()); + property_buf.extend(b"="); + property_buf.extend(attr.value.as_ref()); logic( self.state.buf1.as_ref(), // attribute path "@nsprefix:name" @@ -276,7 +258,8 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> { )?; self.state.buf1.truncate(1); } - self.state.property_buf.push(b']'); + property_buf.push(b']'); + self.state.path_buf.extend(&property_buf); Ok(()) } @@ -306,11 +289,22 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> { } /// Gets the current sub-tree path to the current element. - pub fn current_path(&self) -> &[u8] { + pub fn current_path(&self) -> Vec { if self.path_start + 1 < self.state.path_buf.len() { - &self.state.path_buf[self.path_start + 1..] + let current_path = &self.state.path_buf[self.path_start + 1..]; + let path = String::from_utf8_lossy(current_path); + if let Some(captures) = PROPERTY_PATTERN.captures(path.to_string().as_str()) { + if let Some(value) = captures.get(1).map(|m| m.as_str()) { + let result = value.to_string(); + result.into_bytes() + } else { + current_path.to_vec() + } + } else { + current_path.to_vec() + } } else { - b"" + Vec::new() } } @@ -370,13 +364,9 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> { let start = self.state.path_stack_indices[self.state.path_stack_indices.len() - 2]; let end = self.state.path_stack_indices[self.state.path_stack_indices.len() - 1]; let before_tag = &paths[start + 1..end]; - let properties = String::from_utf8_lossy(self.state.property_buf.as_ref()); - for caps in PROPERTY_PATTERN.captures_iter(&properties) { + for caps in PROPERTY_PATTERN.captures_iter(before_tag) { let tag = &caps[1]; - if tag != before_tag { - continue; - } let inner_content = &caps[2]; for kv_caps in PROPERTY_KEY_VALUE_PATTERN.captures_iter(inner_content) { let key = &kv_caps[1]; @@ -414,11 +404,9 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> { return Ok(()); } // FIXME } - self.state .path_buf .truncate(self.state.path_stack_indices.pop().unwrap()); - Ok(()) } diff --git a/nusamai-citygml/src/values.rs b/nusamai-citygml/src/values.rs index cfabad705..4380bb19e 100644 --- a/nusamai-citygml/src/values.rs +++ b/nusamai-citygml/src/values.rs @@ -545,9 +545,10 @@ impl CityGmlElement for Option { #[inline(never)] fn parse(&mut self, st: &mut SubTreeReader) -> Result<(), ParseError> { if self.is_some() { + let current_path: &[u8] = &st.current_path(); return Err(ParseError::SchemaViolation(format!( "{} must not occur two or more times.", - String::from_utf8_lossy(st.current_path()), + String::from_utf8_lossy(current_path), ))); } let mut v: T = Default::default(); @@ -629,13 +630,14 @@ impl CityGmlElement for Envelope { // TODO: parse CRS URI st.parse_children(|st| { - match st.current_path() { + let current_path: &[u8] = &st.current_path(); + match current_path { b"gml:lowerCorner" => self.lower_corner.parse(st)?, b"gml:upperCorner" => self.upper_corner.parse(st)?, _ => { return Err(ParseError::SchemaViolation(format!( "Expected gml:lowerCorner or gml:upperCorner, but got {}", - String::from_utf8_lossy(st.current_path()), + String::from_utf8_lossy(current_path), ))) } } @@ -670,7 +672,8 @@ pub struct GenericAttribute { impl CityGmlElement for GenericAttribute { #[inline(never)] fn parse(&mut self, st: &mut SubTreeReader) -> Result<(), ParseError> { - match st.current_path() { + let current_path: &[u8] = &st.current_path(); + match current_path { b"gen:stringAttribute" | b"gen:StringAttribute" => { self.string_attrs.push(parse_value(st)?) } @@ -690,7 +693,7 @@ impl CityGmlElement for GenericAttribute { _ => { return Err(ParseError::SchemaViolation(format!( "generic attributes are expected but found {}", - String::from_utf8_lossy(st.current_path()), + String::from_utf8_lossy(current_path), ))) } } @@ -780,7 +783,8 @@ where Ok(()) })?; st.parse_children(|st| { - match st.current_path() { + let current_path: &[u8] = &st.current_path(); + match current_path { // CityGML 3.0 b"gen:name" => { name = Some(st.parse_text()?.to_string()); @@ -815,7 +819,8 @@ fn parse_generic_set( Ok(()) })?; st.parse_children(|st| { - match st.current_path() { + let current_path: &[u8] = &st.current_path(); + match current_path { b"gen:name" => { name = Some(st.parse_text()?.to_string()); } diff --git a/nusamai-plateau/examples/parse_and_compress.rs b/nusamai-plateau/examples/parse_and_compress.rs index 8fe83f901..357101d59 100644 --- a/nusamai-plateau/examples/parse_and_compress.rs +++ b/nusamai-plateau/examples/parse_and_compress.rs @@ -17,41 +17,47 @@ fn example_toplevel_dispatcher( ) -> Result<(), ParseError> { let bincode_params = bincode::config::standard(); - match st.parse_children(|st| match st.current_path() { - b"core:cityObjectMember" => { - let mut cityobj: nusamai_plateau::models::TopLevelCityObject = Default::default(); - cityobj.parse(st)?; - let geometries = st.collect_geometries(); - - if let Some(root) = cityobj.into_object() { - let obj = self::TopLevelCityObject { root, geometries }; - - // print top-level city object - // println!( - // "vertices={} polygons={}", - // toplevel_cityobj.geometries.vertices.len(), - // toplevel_cityobj.geometries.polygons.len() - // ); - // println!("TLCO: {:#?}", toplevel_cityobj); - // println!("{}", serde_json::to_string(&toplevel_cityobj).unwrap()); - - // serialize with bincode - let start = encoded_data.len(); - bincode::serde::encode_into_std_write(obj, encoded_data, bincode_params).unwrap(); - encoded_sizes.push(encoded_data.len() - start); - } + let parse_result = { + let path: &[u8] = &st.current_path(); + match path { + b"core:cityObjectMember" => { + let mut cityobj: nusamai_plateau::models::TopLevelCityObject = Default::default(); + cityobj.parse(st)?; + let geometries = st.collect_geometries(); + + if let Some(root) = cityobj.into_object() { + let obj = self::TopLevelCityObject { root, geometries }; + + // print top-level city object + // println!( + // "vertices={} polygons={}", + // toplevel_cityobj.geometries.vertices.len(), + // toplevel_cityobj.geometries.polygons.len() + // ); + // println!("TLCO: {:#?}", toplevel_cityobj); + // println!("{}", serde_json::to_string(&toplevel_cityobj).unwrap()); + + // serialize with bincode + let start = encoded_data.len(); + bincode::serde::encode_into_std_write(obj, encoded_data, bincode_params) + .unwrap(); + encoded_sizes.push(encoded_data.len() - start); + } - Ok(()) - } - b"gml:boundedBy" | b"app:appearanceMember" => { - st.skip_current_element()?; - Ok(()) + Ok(()) + } + b"gml:boundedBy" | b"app:appearanceMember" => { + st.skip_current_element()?; + Ok(()) + } + other => Err(ParseError::SchemaViolation(format!( + "Unrecognized element {}", + String::from_utf8_lossy(other) + ))), } - other => Err(ParseError::SchemaViolation(format!( - "Unrecognized element {}", - String::from_utf8_lossy(other) - ))), - }) { + }; + + match parse_result { Ok(_) => Ok(()), Err(e) => { println!("Err: {:?}", e);