Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Refactor code to use slice references instead of owned strings #20

Merged
merged 1 commit into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions nusamai-citygml/macros/src/derive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,7 +382,7 @@ fn generate_citygml_impl_for_struct(
#attr_parsing

st.parse_children(move |st| {
let path = st.current_path();
let path: &[u8] = &st.current_path();
let hash = (path.iter().skip(#HASH_CHAR_SKIP).take(#HASH_CHAR_TAKE).fold(5381u32, |a, c| a.wrapping_mul(33) ^ *c as u32) & #HASH_MASK) as u8;
match (hash, path) {
#(#child_arms)*
Expand Down Expand Up @@ -512,7 +512,7 @@ fn generate_citygml_impl_for_enum(
#[inline(never)]
fn parse<R: ::std::io::BufRead>(&mut self, st: &mut ::nusamai_citygml::SubTreeReader<R>) -> Result<(), ::nusamai_citygml::ParseError> {
st.parse_children(|st| {
let path = st.current_path();
let path: &[u8] = &st.current_path();
let hash = (path.iter().skip(#HASH_CHAR_SKIP).take(#HASH_CHAR_TAKE).fold(5381u32, |a, c| a.wrapping_mul(33) ^ *c as u32) & #HASH_MASK) as u8;
match (hash, path) {
#(#child_arms)*
Expand Down
60 changes: 24 additions & 36 deletions nusamai-citygml/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,8 @@ pub struct CityGmlReader<'a> {
struct InternalState<'a> {
/// Buffer holding the current path
path_buf: Vec<u8>,
/// Buffer holding the properties
property_buf: Vec<u8>,
/// Stack of indices of slashes '/' in `path_buf`
path_stack_indices: Vec<usize>,
/// Stack of indices of slashes '/' in `property_buf`
property_stack_indices: Vec<usize>,
/// General purpose buffer 1
buf1: Vec<u8>,
/// General purpose buffer 2
Expand All @@ -74,9 +70,7 @@ impl<'a> InternalState<'a> {
fn new(context: ParseContext<'a>) -> Self {
Self {
path_buf: Vec::new(),
property_buf: Vec::new(),
path_stack_indices: Vec::new(),
property_stack_indices: Vec::new(),
buf1: Vec::new(),
buf2: Vec::new(),
fp_buf: Vec::new(),
Expand Down Expand Up @@ -241,33 +235,21 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> {
let Some(start) = &self.state.current_start else {
panic!("parse_attributes() must be called immediately after encountering a start tag.");
};
self.state
.property_stack_indices
.push(self.state.property_buf.len());

self.state.property_buf.push(b'/');
let (nsres, localname) = self.reader.resolve_element(start.name());
self.state
.property_buf
.extend(wellknown_prefix_from_nsres(&nsres));
self.state.property_buf.extend(localname.as_ref());
self.state.property_buf.push(b'[');

let mut property_buf = Vec::new();
property_buf.push(b'[');
self.state.buf1.clear();
self.state.buf1.push(b'@');
for (index, attr) in start.attributes().flatten().enumerate() {
if index > 0 {
self.state.property_buf.push(b',');
property_buf.push(b',');
}
let (nsres, localname) = self.reader.resolve_attribute(attr.key);
self.state.buf1.extend(wellknown_prefix_from_nsres(&nsres));
self.state.buf1.extend(localname.as_ref());
self.state
.property_buf
.extend(wellknown_prefix_from_nsres(&nsres));
self.state.property_buf.extend(localname.as_ref());
self.state.property_buf.extend(b"=");
self.state.property_buf.extend(attr.value.as_ref());
property_buf.extend(wellknown_prefix_from_nsres(&nsres));
property_buf.extend(localname.as_ref());
property_buf.extend(b"=");
property_buf.extend(attr.value.as_ref());

logic(
self.state.buf1.as_ref(), // attribute path "@nsprefix:name"
Expand All @@ -276,7 +258,8 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> {
)?;
self.state.buf1.truncate(1);
}
self.state.property_buf.push(b']');
property_buf.push(b']');
self.state.path_buf.extend(&property_buf);
Ok(())
}

Expand Down Expand Up @@ -306,11 +289,22 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> {
}

/// Gets the current sub-tree path to the current element.
pub fn current_path(&self) -> &[u8] {
pub fn current_path(&self) -> Vec<u8> {
if self.path_start + 1 < self.state.path_buf.len() {
&self.state.path_buf[self.path_start + 1..]
let current_path = &self.state.path_buf[self.path_start + 1..];
let path = String::from_utf8_lossy(current_path);
if let Some(captures) = PROPERTY_PATTERN.captures(path.to_string().as_str()) {
if let Some(value) = captures.get(1).map(|m| m.as_str()) {
let result = value.to_string();
result.into_bytes()
} else {
current_path.to_vec()
}
} else {
current_path.to_vec()
}
} else {
b""
Vec::new()
}
}

Expand Down Expand Up @@ -370,13 +364,9 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> {
let start = self.state.path_stack_indices[self.state.path_stack_indices.len() - 2];
let end = self.state.path_stack_indices[self.state.path_stack_indices.len() - 1];
let before_tag = &paths[start + 1..end];
let properties = String::from_utf8_lossy(self.state.property_buf.as_ref());

for caps in PROPERTY_PATTERN.captures_iter(&properties) {
for caps in PROPERTY_PATTERN.captures_iter(before_tag) {
let tag = &caps[1];
if tag != before_tag {
continue;
}
let inner_content = &caps[2];
for kv_caps in PROPERTY_KEY_VALUE_PATTERN.captures_iter(inner_content) {
let key = &kv_caps[1];
Expand Down Expand Up @@ -414,11 +404,9 @@ impl<'b, R: BufRead> SubTreeReader<'_, 'b, R> {
return Ok(());
} // FIXME
}

self.state
.path_buf
.truncate(self.state.path_stack_indices.pop().unwrap());

Ok(())
}

Expand Down
19 changes: 12 additions & 7 deletions nusamai-citygml/src/values.rs
Original file line number Diff line number Diff line change
Expand Up @@ -545,9 +545,10 @@ impl<T: CityGmlElement + Default> CityGmlElement for Option<T> {
#[inline(never)]
fn parse<R: BufRead>(&mut self, st: &mut SubTreeReader<R>) -> Result<(), ParseError> {
if self.is_some() {
let current_path: &[u8] = &st.current_path();
return Err(ParseError::SchemaViolation(format!(
"{} must not occur two or more times.",
String::from_utf8_lossy(st.current_path()),
String::from_utf8_lossy(current_path),
)));
}
let mut v: T = Default::default();
Expand Down Expand Up @@ -629,13 +630,14 @@ impl CityGmlElement for Envelope {
// TODO: parse CRS URI

st.parse_children(|st| {
match st.current_path() {
let current_path: &[u8] = &st.current_path();
match current_path {
b"gml:lowerCorner" => self.lower_corner.parse(st)?,
b"gml:upperCorner" => self.upper_corner.parse(st)?,
_ => {
return Err(ParseError::SchemaViolation(format!(
"Expected gml:lowerCorner or gml:upperCorner, but got {}",
String::from_utf8_lossy(st.current_path()),
String::from_utf8_lossy(current_path),
)))
}
}
Expand Down Expand Up @@ -670,7 +672,8 @@ pub struct GenericAttribute {
impl CityGmlElement for GenericAttribute {
#[inline(never)]
fn parse<R: BufRead>(&mut self, st: &mut SubTreeReader<R>) -> Result<(), ParseError> {
match st.current_path() {
let current_path: &[u8] = &st.current_path();
match current_path {
b"gen:stringAttribute" | b"gen:StringAttribute" => {
self.string_attrs.push(parse_value(st)?)
}
Expand All @@ -690,7 +693,7 @@ impl CityGmlElement for GenericAttribute {
_ => {
return Err(ParseError::SchemaViolation(format!(
"generic attributes are expected but found {}",
String::from_utf8_lossy(st.current_path()),
String::from_utf8_lossy(current_path),
)))
}
}
Expand Down Expand Up @@ -780,7 +783,8 @@ where
Ok(())
})?;
st.parse_children(|st| {
match st.current_path() {
let current_path: &[u8] = &st.current_path();
match current_path {
// CityGML 3.0
b"gen:name" => {
name = Some(st.parse_text()?.to_string());
Expand Down Expand Up @@ -815,7 +819,8 @@ fn parse_generic_set<R: BufRead>(
Ok(())
})?;
st.parse_children(|st| {
match st.current_path() {
let current_path: &[u8] = &st.current_path();
match current_path {
b"gen:name" => {
name = Some(st.parse_text()?.to_string());
}
Expand Down
72 changes: 39 additions & 33 deletions nusamai-plateau/examples/parse_and_compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,41 +17,47 @@ fn example_toplevel_dispatcher<R: BufRead>(
) -> Result<(), ParseError> {
let bincode_params = bincode::config::standard();

match st.parse_children(|st| match st.current_path() {
b"core:cityObjectMember" => {
let mut cityobj: nusamai_plateau::models::TopLevelCityObject = Default::default();
cityobj.parse(st)?;
let geometries = st.collect_geometries();

if let Some(root) = cityobj.into_object() {
let obj = self::TopLevelCityObject { root, geometries };

// print top-level city object
// println!(
// "vertices={} polygons={}",
// toplevel_cityobj.geometries.vertices.len(),
// toplevel_cityobj.geometries.polygons.len()
// );
// println!("TLCO: {:#?}", toplevel_cityobj);
// println!("{}", serde_json::to_string(&toplevel_cityobj).unwrap());

// serialize with bincode
let start = encoded_data.len();
bincode::serde::encode_into_std_write(obj, encoded_data, bincode_params).unwrap();
encoded_sizes.push(encoded_data.len() - start);
}
let parse_result = {
let path: &[u8] = &st.current_path();
match path {
b"core:cityObjectMember" => {
let mut cityobj: nusamai_plateau::models::TopLevelCityObject = Default::default();
cityobj.parse(st)?;
let geometries = st.collect_geometries();

if let Some(root) = cityobj.into_object() {
let obj = self::TopLevelCityObject { root, geometries };

// print top-level city object
// println!(
// "vertices={} polygons={}",
// toplevel_cityobj.geometries.vertices.len(),
// toplevel_cityobj.geometries.polygons.len()
// );
// println!("TLCO: {:#?}", toplevel_cityobj);
// println!("{}", serde_json::to_string(&toplevel_cityobj).unwrap());

// serialize with bincode
let start = encoded_data.len();
bincode::serde::encode_into_std_write(obj, encoded_data, bincode_params)
.unwrap();
encoded_sizes.push(encoded_data.len() - start);
}

Ok(())
}
b"gml:boundedBy" | b"app:appearanceMember" => {
st.skip_current_element()?;
Ok(())
Ok(())
}
b"gml:boundedBy" | b"app:appearanceMember" => {
st.skip_current_element()?;
Ok(())
}
other => Err(ParseError::SchemaViolation(format!(
"Unrecognized element {}",
String::from_utf8_lossy(other)
))),
}
other => Err(ParseError::SchemaViolation(format!(
"Unrecognized element {}",
String::from_utf8_lossy(other)
))),
}) {
};

match parse_result {
Ok(_) => Ok(()),
Err(e) => {
println!("Err: {:?}", e);
Expand Down