diff --git a/src/fbs/header.fbs b/src/fbs/header.fbs index 096745e..ac7392c 100644 --- a/src/fbs/header.fbs +++ b/src/fbs/header.fbs @@ -20,9 +20,11 @@ enum ColumnType: ubyte { Json, // General JSON type intended to be application specific DateTime, // ISO 8601 date time Binary // General binary type intended to be application specific + // Array // Array of values } table Column { + index: ushort; // Column index (0 = first column) name: string (required); // Column name type: ColumnType; // Column type title: string; // Column title diff --git a/src/rust/benches/read.rs b/src/rust/benches/read.rs index 15b97bb..3c1353e 100644 --- a/src/rust/benches/read.rs +++ b/src/rust/benches/read.rs @@ -19,7 +19,7 @@ fn read_fcb(path: &str) -> Result<(u64, u64, u64)> { let mut multi_surface_count = 0; let mut other_count = 0; let mut feat_num = 0; - while let Ok(Some(feat_buf)) = reader.next() { + while let Some(feat_buf) = reader.next()? { let feature = feat_buf.cur_feature(); feature .objects() @@ -97,13 +97,13 @@ mod tests { } const DATASETS: &[(&str, (&str, &str))] = &[ - ( - "3DBAG", - ( - "benchmark_data/3DBAG.fcb", - "benchmark_data/3DBAG.city.jsonl", - ), - ), + // ( + // "3DBAG", + // ( + // "benchmark_data/3DBAG.fcb", + // "benchmark_data/3DBAG.city.jsonl", + // ), + // ), // ( // "3DBV", // ("benchmark_data/3DBV.fcb", "benchmark_data/3DBV.city.jsonl"), @@ -122,13 +122,13 @@ const DATASETS: &[(&str, (&str, &str))] = &[ // "benchmark_data/Ingolstadt.city.jsonl", // ), // ), - // ( - // "Montreal", - // ( - // "benchmark_data/Montreal.fcb", - // "benchmark_data/Montreal.city.jsonl", - // ), - // ), + ( + "Montreal", + ( + "benchmark_data/Montreal.fcb", + "benchmark_data/Montreal.city.jsonl", + ), + ), // ( // "NYC", // ("benchmark_data/NYC.fcb", "benchmark_data/NYC.city.jsonl"), diff --git a/src/rust/makefile b/src/rust/makefile index dfa7923..b354e2a 100644 --- a/src/rust/makefile +++ b/src/rust/makefile @@ -11,7 +11,8 @@ pre-commit: .PHONY: encode encode: - cargo run --bin flatcitybuf_cli serialize -i tests/data/delft.city.jsonl -o temp/delft.fcb + cargo run --bin flatcitybuf_cli serialize -i tests/data/delft.city.jsonl -o temp/delft_attr.fcb +# cargo run --bin flatcitybuf_cli serialize -i tests/data/delft.city.jsonl -o temp/delft.fcb .PHONY: decode decode: diff --git a/src/rust/src/bin/read.rs b/src/rust/src/bin/read.rs index 4fe32de..86f31a1 100644 --- a/src/rust/src/bin/read.rs +++ b/src/rust/src/bin/read.rs @@ -1,4 +1,4 @@ -use flatcitybuf::fcb_deserializer::{to_cj_feature, to_cj_metadata}; +use flatcitybuf::fcb_deserializer::to_cj_metadata; use flatcitybuf::FcbReader; use std::error::Error; use std::fs::File; @@ -20,16 +20,15 @@ fn read_file() -> Result<(), Box> { let mut reader = FcbReader::open(inputreader)?.select_all()?; let header = reader.header(); let cj = to_cj_metadata(&header)?; - let mut features = Vec::new(); let feat_count = header.features_count(); let mut feat_num = 0; - while let Ok(Some(feat_buf)) = reader.next() { - let feature = feat_buf.cur_feature(); + while let Some(feat_buf) = reader.next()? { + let feature = feat_buf.cur_cj_feature()?; if feat_num == 0 { println!("feature: {:?}", feature); } - features.push(to_cj_feature(feature)?); + features.push(feature); feat_num += 1; if feat_num >= feat_count { break; diff --git a/src/rust/src/bin/write.rs b/src/rust/src/bin/write.rs index 1e1e37b..32774a3 100644 --- a/src/rust/src/bin/write.rs +++ b/src/rust/src/bin/write.rs @@ -1,3 +1,4 @@ +use flatcitybuf::attribute::{AttributeSchema, AttributeSchemaMethods}; use flatcitybuf::header_writer::{HeaderMetadata, HeaderWriterOptions}; use flatcitybuf::{read_cityjson_from_reader, CJType, CJTypeKind, CityJSONSeq, FcbWriter}; use std::error::Error; @@ -28,7 +29,15 @@ fn write_file() -> Result<(), Box> { write_index: false, header_metadata, }); - let mut fcb = FcbWriter::new(cj, header_options, features.first())?; + let mut attr_schema = AttributeSchema::new(); + for feature in features.iter() { + for (_, co) in feature.city_objects.iter() { + if let Some(attributes) = &co.attributes { + attr_schema.add_attributes(attributes); + } + } + } + let mut fcb = FcbWriter::new(cj, header_options, features.first(), Some(&attr_schema))?; fcb.write_feature()?; for feature in features.iter().skip(1) { fcb.add_feature(feature)?; diff --git a/src/rust/src/error.rs b/src/rust/src/error.rs index ccbe6b2..b17fa5c 100644 --- a/src/rust/src/error.rs +++ b/src/rust/src/error.rs @@ -1,67 +1,67 @@ -use flatbuffers::InvalidFlatbuffer; -use std::fmt::{Display, Formatter}; +// use flatbuffers::InvalidFlatbuffer; +// use std::fmt::{Display, Formatter}; -#[derive(Debug)] -pub enum Error { - MissingMagicBytes, - NoIndex, - // #[cfg(feature = "http")] - // HttpClient(http_range_client::HttpError), - IllegalHeaderSize(usize), - InvalidFlatbuffer(InvalidFlatbuffer), - IO(std::io::Error), -} -pub type Result = std::result::Result; - -impl Display for Error { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Error::MissingMagicBytes => "Missing magic bytes. Is this an fgb file?".fmt(f), - Error::NoIndex => "Index missing".fmt(f), - // #[cfg(feature = "http")] - // Error::HttpClient(http_client) => http_client.fmt(f), - Error::IllegalHeaderSize(size) => write!(f, "Illegal header size: {size}"), - Error::InvalidFlatbuffer(invalid_flatbuffer) => invalid_flatbuffer.fmt(f), - Error::IO(io) => io.fmt(f), - } - } -} +// #[derive(Debug)] +// pub enum Error { +// MissingMagicBytes, +// NoIndex, +// // #[cfg(feature = "http")] +// // HttpClient(http_range_client::HttpError), +// IllegalHeaderSize(usize), +// InvalidFlatbuffer(InvalidFlatbuffer), +// IO(std::io::Error), +// } +// pub type Result = std::result::Result; -impl std::error::Error for Error {} +// impl Display for Error { +// fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { +// match self { +// Error::MissingMagicBytes => "Missing magic bytes. Is this an fgb file?".fmt(f), +// Error::NoIndex => "Index missing".fmt(f), +// // #[cfg(feature = "http")] +// // Error::HttpClient(http_client) => http_client.fmt(f), +// Error::IllegalHeaderSize(size) => write!(f, "Illegal header size: {size}"), +// Error::InvalidFlatbuffer(invalid_flatbuffer) => invalid_flatbuffer.fmt(f), +// Error::IO(io) => io.fmt(f), +// } +// } +// } -impl From for Error { - fn from(value: std::io::Error) -> Self { - Self::IO(value) - } -} +// impl std::error::Error for Error {} -impl From for Error { - fn from(value: InvalidFlatbuffer) -> Self { - Error::InvalidFlatbuffer(value) - } -} +// impl From for Error { +// fn from(value: std::io::Error) -> Self { +// Self::IO(value) +// } +// } -// #[cfg(feature = "http")] -// impl From for Error { -// fn from(value: http_range_client::HttpError) -> Self { -// Error::HttpClient(value) +// impl From for Error { +// fn from(value: InvalidFlatbuffer) -> Self { +// Error::InvalidFlatbuffer(value) // } // } -#[derive(Debug)] -pub enum CityJSONError { - MissingField(&'static str), - // ParseError(String), - // InvalidData(&'static str), -} -impl std::error::Error for CityJSONError {} +// // #[cfg(feature = "http")] +// // impl From for Error { +// // fn from(value: http_range_client::HttpError) -> Self { +// // Error::HttpClient(value) +// // } +// // } + +// #[derive(Debug)] +// pub enum CityJSONError { +// MissingField(&'static str), +// // ParseError(String), +// // InvalidData(&'static str), +// } +// impl std::error::Error for CityJSONError {} -impl std::fmt::Display for CityJSONError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - CityJSONError::MissingField(field) => write!(f, "Missing field: {}", field), - // CityJSONError::ParseError(err) => write!(f, "Parse error: {}", err), - // CityJSONError::InvalidData(msg) => write!(f, "Invalid data: {}", msg), - } - } -} +// impl std::fmt::Display for CityJSONError { +// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { +// match self { +// CityJSONError::MissingField(field) => write!(f, "Missing field: {}", field), +// // CityJSONError::ParseError(err) => write!(f, "Parse error: {}", err), +// // CityJSONError::InvalidData(msg) => write!(f, "Invalid data: {}", msg), +// } +// } +// } diff --git a/src/rust/src/fcb_serde/fcb_deserializer.rs b/src/rust/src/fcb_serde/fcb_deserializer.rs index a886508..87c06b4 100644 --- a/src/rust/src/fcb_serde/fcb_deserializer.rs +++ b/src/rust/src/fcb_serde/fcb_deserializer.rs @@ -6,6 +6,7 @@ use crate::{ header_generated::*, }; use anyhow::{Context, Result}; +use byteorder::{ByteOrder, LittleEndian}; use cjseq::{ Address as CjAddress, CityJSON, CityJSONFeature, CityObject as CjCityObject, Geometry as CjGeometry, Metadata as CjMetadata, PointOfContact as CjPointOfContact, @@ -135,7 +136,135 @@ pub(crate) fn to_cj_co_type(co_type: CityObjectType) -> String { } } -pub fn to_cj_feature(feature: CityFeature) -> Result { +pub fn decode_attributes( + columns: flatbuffers::Vector<'_, flatbuffers::ForwardsUOffset>>, + attributes: flatbuffers::Vector<'_, u8>, +) -> serde_json::Value { + let mut map = serde_json::Map::new(); + let bytes = attributes.bytes(); + let mut offset = 0; + while offset < bytes.len() { + let col_index = LittleEndian::read_u16(&bytes[offset..offset + size_of::()]) as u16; + offset += size_of::(); + if col_index >= columns.len() as u16 { + panic!("column index out of range"); //TODO: handle this as an error + } + let column = columns.iter().find(|c| c.index() == col_index); + if column.is_none() { + panic!("column not found"); //TODO: handle this as an error + } + let column = column.unwrap(); + match column.type_() { + ColumnType::Int => { + map.insert( + column.name().to_string(), + serde_json::Value::Number(serde_json::Number::from(LittleEndian::read_i32( + &bytes[offset..offset + size_of::()], + ))), + ); + offset += size_of::(); + } + ColumnType::UInt => { + map.insert( + column.name().to_string(), + serde_json::Value::Number(serde_json::Number::from(LittleEndian::read_u32( + &bytes[offset..offset + size_of::()], + ))), + ); + offset += size_of::(); + } + ColumnType::Bool => { + map.insert( + column.name().to_string(), + serde_json::Value::Bool(bytes[offset] != 0), + ); + offset += size_of::(); + } + ColumnType::Short => { + map.insert( + column.name().to_string(), + serde_json::Value::Number(serde_json::Number::from(LittleEndian::read_i16( + &bytes[offset..offset + size_of::()], + ))), + ); + offset += size_of::(); + } + ColumnType::UShort => { + map.insert( + column.name().to_string(), + serde_json::Value::Number(serde_json::Number::from(LittleEndian::read_u16( + &bytes[offset..offset + size_of::()], + ))), + ); + offset += size_of::(); + } + ColumnType::Long => { + map.insert( + column.name().to_string(), + serde_json::Value::Number(serde_json::Number::from(LittleEndian::read_i64( + &bytes[offset..offset + size_of::()], + ))), + ); + offset += size_of::(); + } + ColumnType::ULong => { + map.insert( + column.name().to_string(), + serde_json::Value::Number(serde_json::Number::from(LittleEndian::read_u64( + &bytes[offset..offset + size_of::()], + ))), + ); + offset += size_of::(); + } + ColumnType::Float => { + let f = LittleEndian::read_f32(&bytes[offset..offset + size_of::()]); + if let Some(num) = serde_json::Number::from_f64(f as f64) { + map.insert(column.name().to_string(), serde_json::Value::Number(num)); + } + offset += size_of::(); + } + ColumnType::Double => { + let f = LittleEndian::read_f64(&bytes[offset..offset + size_of::()]); + if let Some(num) = serde_json::Number::from_f64(f) { + map.insert(column.name().to_string(), serde_json::Value::Number(num)); + } + offset += size_of::(); + } + ColumnType::String => { + let len = LittleEndian::read_u32(&bytes[offset..offset + size_of::()]); + offset += size_of::(); + let s = String::from_utf8(bytes[offset..offset + len as usize].to_vec()) + .unwrap_or_default(); + map.insert(column.name().to_string(), serde_json::Value::String(s)); + offset += len as usize; + } + ColumnType::Json => { + let len = LittleEndian::read_u32(&bytes[offset..offset + size_of::()]); + offset += size_of::(); + let s = String::from_utf8(bytes[offset..offset + len as usize].to_vec()) + .unwrap_or_default(); + map.insert(column.name().to_string(), serde_json::from_str(&s).unwrap()); + offset += len as usize; + } + + // TODO: handle other column types + _ => unreachable!(), + } + } + + // check if there is any column that is not in the map, and set it to null + for col in columns.iter() { + if !map.contains_key(col.name()) { + map.insert(col.name().to_string(), serde_json::Value::Null); + } + } + serde_json::Value::Object(map) +} + +pub fn to_cj_feature( + feature: CityFeature, + root_attr_schema: Option>>>, +) -> Result { let mut cj = CityJSONFeature::new(); cj.id = feature.id().to_string(); @@ -159,17 +288,30 @@ pub fn to_cj_feature(feature: CityFeature) -> Result { .collect::>() }); + let mut attributes = None; + if root_attr_schema.is_none() && co.columns().is_none() { + attributes = None; + } else { + attributes = co.attributes().map(|a| { + decode_attributes(co.columns().unwrap_or(root_attr_schema.unwrap()), a) + }); + } + + let children_roles = co + .children_roles() + .map(|c| c.iter().map(|s| s.to_string()).collect()); + let cjco = CjCityObject::new( to_cj_co_type(co.type_()).to_string(), geographical_extent, - None, + attributes, geometries, co.children() .map(|c| c.iter().map(|s| s.to_string()).collect()), + children_roles, co.parents() .map(|p| p.iter().map(|s| s.to_string()).collect()), None, - None, ); (co.id().to_string(), cjco) }) diff --git a/src/rust/src/fcb_serde/fcb_serializer.rs b/src/rust/src/fcb_serde/fcb_serializer.rs index c47f7db..620377e 100644 --- a/src/rust/src/fcb_serde/fcb_serializer.rs +++ b/src/rust/src/fcb_serde/fcb_serializer.rs @@ -1,4 +1,4 @@ -use crate::error::CityJSONError; +use crate::attribute::{encode_attributes_with_schema, AttributeSchema, AttributeSchemaMethods}; use crate::feature_generated::{ CityFeature, CityFeatureArgs, CityObject, CityObjectArgs, CityObjectType, Geometry, GeometryArgs, GeometryType, SemanticObject, SemanticObjectArgs, SemanticSurfaceType, Vertex, @@ -8,12 +8,14 @@ use crate::header_generated::{ GeographicalExtent, Header, HeaderArgs, ReferenceSystem, ReferenceSystemArgs, Transform, Vector, }; use crate::header_writer::HeaderMetadata; +use crate::{Column, ColumnArgs}; use cjseq::{ CityJSON, CityObject as CjCityObject, Geometry as CjGeometry, GeometryType as CjGeometryType, Metadata as CjMetadata, Transform as CjTransform, }; use flatbuffers::FlatBufferBuilder; +use serde_json::Value; /// ----------------------------------- /// Serializer for Header @@ -30,12 +32,18 @@ pub fn to_fcb_header<'a>( fbb: &mut flatbuffers::FlatBufferBuilder<'a>, cj: &CityJSON, header_metadata: HeaderMetadata, + attr_schema: &AttributeSchema, ) -> flatbuffers::WIPOffset> { let metadata = cj .metadata .as_ref() - .ok_or(CityJSONError::MissingField("metadata")) + .ok_or(anyhow::anyhow!("metadata is missing")) .unwrap(); + // let metadata = cj + // .metadata + // .as_ref() + // .ok_or(anyhow::anyhow!(Error::MissingField("metadata".to_string()))) + // .unwrap(); let reference_system = to_fcb_reference_system(fbb, metadata); let transform = to_fcb_transform(&cj.transform); let geographical_extent = metadata @@ -45,7 +53,7 @@ pub fn to_fcb_header<'a>( let header_args = HeaderArgs { version: Some(fbb.create_string(&cj.version)), transform: Some(&transform), - columns: None, + columns: Some(to_fcb_columns(fbb, attr_schema)), features_count: header_metadata.features_count, geographical_extent: geographical_extent.as_ref(), reference_system, @@ -314,6 +322,7 @@ pub fn to_fcb_city_object<'a>( fbb: &mut flatbuffers::FlatBufferBuilder<'a>, id: &str, co: &CjCityObject, + attr_schema: &AttributeSchema, ) -> flatbuffers::WIPOffset> { let id = Some(fbb.create_string(id)); @@ -332,8 +341,23 @@ pub fn to_fcb_city_object<'a>( }); geometries.map(|geometries| fbb.create_vector(&geometries)) }; - // let attributes = Some(self.fbb.create_vector(co.attributes)); - // let columns = Some(self.fbb.create_vector(co.columns)); + + let attributes_and_columns = co + .attributes + .as_ref() + .map(|attr| { + if !attr.is_object() { + return (None, None); + } + let (attr_vec, own_schema) = to_fcb_attribute(fbb, attr, attr_schema); + let columns = own_schema.map(|schema| to_fcb_columns(fbb, &schema)); + (Some(attr_vec), columns) + }) + .unwrap_or((None, None)); + + let (attributes, columns) = attributes_and_columns; + + // todo: check if truncate is needed let children = { let children_strings = co .children @@ -342,15 +366,14 @@ pub fn to_fcb_city_object<'a>( children_strings.map(|children_strings| fbb.create_vector(&children_strings)) }; - // let children_roles = { - // let children_roles_strings: Vec<_> = co - // .childre - // .iter() - // .map(|s| self.fbb.create_string(s)) - // .collect(); - // Some(self.fbb.create_vector(&children_roles_strings)) - // }; - let children_roles = None; // TODO: implement this later + let children_roles = { + let children_roles_strings = co + .children_roles + .as_ref() + .map(|c| c.iter().map(|r| fbb.create_string(r)).collect::>()); + children_roles_strings + .map(|children_roles_strings| fbb.create_vector(&children_roles_strings)) + }; let parents = { let parents_strings = co @@ -367,8 +390,8 @@ pub fn to_fcb_city_object<'a>( type_, geographical_extent: geographical_extent.as_ref(), geometry: geometries, - attributes: None, - columns: None, + attributes, + columns, children, children_roles, parents, @@ -474,11 +497,62 @@ pub(crate) fn to_fcb_geometry<'a>( ) } +pub fn to_fcb_columns<'a>( + fbb: &mut FlatBufferBuilder<'a>, + attr_schema: &AttributeSchema, +) -> flatbuffers::WIPOffset>>> { + let mut sorted_schema: Vec<_> = attr_schema.iter().collect(); + sorted_schema.sort_by_key(|(_, (index, _))| *index); + let columns_vec = sorted_schema + .iter() + .map(|(name, (index, column_type))| { + let name = fbb.create_string(name); + Column::create( + fbb, + &ColumnArgs { + name: Some(name), + index: *index, + type_: *column_type, + ..Default::default() + }, + ) + }) + .collect::>(); + fbb.create_vector(&columns_vec) +} + +pub fn to_fcb_attribute<'a>( + fbb: &mut FlatBufferBuilder<'a>, + attr: &Value, + schema: &AttributeSchema, +) -> ( + flatbuffers::WIPOffset>, + Option, +) { + let mut is_own_schema = false; + for (key, _) in attr.as_object().unwrap().iter() { + if !schema.contains_key(key) { + is_own_schema = true; + } + } + if is_own_schema { + let mut own_schema = AttributeSchema::new(); + own_schema.add_attributes(attr); + let encoded = encode_attributes_with_schema(attr, &own_schema); + (fbb.create_vector(&encoded), Some(own_schema)) + } else { + let encoded = encode_attributes_with_schema(attr, schema); + (fbb.create_vector(&encoded), None) + } +} + #[cfg(test)] mod tests { use super::*; + use crate::fcb_serde::fcb_deserializer::to_cj_co_type; use crate::feature_generated::root_as_city_feature; + use anyhow::Result; use cjseq::CityJSONFeature; use flatbuffers::FlatBufferBuilder; @@ -489,12 +563,19 @@ mod tests { r#"{"type":"CityJSONFeature","id":"NL.IMBAG.Pand.0503100000005156","CityObjects":{"NL.IMBAG.Pand.0503100000005156-0":{"type":"BuildingPart","attributes":{},"geometry":[{"type":"Solid","lod":"1.2","boundaries":[[[[6,1,0,5,4,3,7,8]],[[9,5,0,10]],[[10,0,1,11]],[[12,3,4,13]],[[13,4,5,9]],[[14,7,3,12]],[[15,8,7,14]],[[16,6,8,15]],[[11,1,6,16]],[[11,16,15,14,12,13,9,10]]]],"semantics":{"surfaces":[{"type":"GroundSurface"},{"type":"RoofSurface"},{"on_footprint_edge":true,"type":"WallSurface"},{"on_footprint_edge":false,"type":"WallSurface"}],"values":[[0,2,2,2,2,2,2,2,2,1]]}},{"type":"Solid","lod":"1.3","boundaries":[[[[3,7,8,6,1,17,0,5,4,18]],[[19,5,0,20]],[[21,22,17,1,23]],[[24,7,3,25]],[[26,8,7,24]],[[20,0,17,43]],[[44,45,43,46]],[[47,4,5,36]],[[48,18,4,47]],[[39,1,6,49]],[[41,3,18,48,50]],[[46,43,17,35,38]],[[49,6,8,42]],[[51,52,45,44]],[[53,54,55]],[[54,53,56]],[[50,48,52,51]],[[53,55,38,39,49,42]],[[54,56,44,46,38,55]],[[50,51,44,56,53,42,40,41]],[[52,48,47,36,37,43,45]]]],"semantics":{"surfaces":[{"type":"GroundSurface"},{"type":"RoofSurface"},{"on_footprint_edge":true,"type":"WallSurface"},{"on_footprint_edge":false,"type":"WallSurface"}],"values":[[0,2,2,2,2,2,3,2,2,2,2,2,3,3,1,1]]}},{"type":"Solid","lod":"2.2","boundaries":[[[[1,35,17,0,5,4,18,3,7,8,6]],[[36,5,0,37]],[[38,35,1,39]],[[40,7,3,41]],[[42,8,7,40]],[[37,0,17,43]],[[44,45,43,46]],[[47,4,5,36]],[[48,18,4,47]],[[39,1,6,49]],[[41,3,18,48,50]],[[46,43,17,35,38]],[[49,6,8,42]],[[51,52,45,44]],[[53,54,55]],[[54,53,56]],[[50,48,52,51]],[[53,55,38,39,49,42]],[[54,56,44,46,38,55]],[[50,51,44,56,53,42,40,41]],[[52,48,47,36,37,43,45]]]],"semantics":{"surfaces":[{"type":"GroundSurface"},{"type":"RoofSurface"},{"on_footprint_edge":true,"type":"WallSurface"},{"on_footprint_edge":false,"type":"WallSurface"}],"values":[[0,2,2,2,2,2,3,2,2,2,2,2,2,3,3,3,3,1,1,1,1]]}}],"parents":["NL.IMBAG.Pand.0503100000005156"]},"NL.IMBAG.Pand.0503100000005156":{"type":"Building","geographicalExtent":[84734.8046875,446636.5625,0.6919999718666077,84746.9453125,446651.0625,11.119057655334473],"attributes":{"b3_bag_bag_overlap":0.0,"b3_bouwlagen":3,"b3_dak_type":"slanted","b3_h_dak_50p":8.609999656677246,"b3_h_dak_70p":9.239999771118164,"b3_h_dak_max":10.970000267028809,"b3_h_dak_min":3.890000104904175,"b3_h_maaiveld":0.6919999718666077,"b3_kas_warenhuis":false,"b3_mutatie_ahn3_ahn4":false,"b3_nodata_fractie_ahn3":0.002518891589716077,"b3_nodata_fractie_ahn4":0.0,"b3_nodata_radius_ahn3":0.359510600566864,"b3_nodata_radius_ahn4":0.34349295496940613,"b3_opp_buitenmuur":165.03,"b3_opp_dak_plat":51.38,"b3_opp_dak_schuin":63.5,"b3_opp_grond":99.21,"b3_opp_scheidingsmuur":129.53,"b3_puntdichtheid_ahn3":16.353534698486328,"b3_puntdichtheid_ahn4":46.19647216796875,"b3_pw_bron":"AHN4","b3_pw_datum":2020,"b3_pw_selectie_reden":"PREFERRED_AND_LATEST","b3_reconstructie_onvolledig":false,"b3_rmse_lod12":3.2317864894866943,"b3_rmse_lod13":0.642620861530304,"b3_rmse_lod22":0.09925124794244766,"b3_val3dity_lod12":"[]","b3_val3dity_lod13":"[]","b3_val3dity_lod22":"[]","b3_volume_lod12":845.0095825195312,"b3_volume_lod13":657.8263549804688,"b3_volume_lod22":636.9927368164062,"begingeldigheid":"1999-04-28","documentdatum":"1999-04-28","documentnummer":"408040.tif","eindgeldigheid":null,"eindregistratie":null,"geconstateerd":false,"identificatie":"NL.IMBAG.Pand.0503100000005156","oorspronkelijkbouwjaar":2000,"status":"Pand in gebruik","tijdstipeindregistratielv":null,"tijdstipinactief":null,"tijdstipinactieflv":null,"tijdstipnietbaglv":null,"tijdstipregistratie":"2010-10-13T12:29:24Z","tijdstipregistratielv":"2010-10-13T12:30:50Z","voorkomenidentificatie":1},"geometry":[{"type":"MultiSurface","lod":"0","boundaries":[[[0,1,2,3,4,5]]]}],"children":["NL.IMBAG.Pand.0503100000005156-0"]}},"vertices":[[-353581,253246,-44957],[-348730,242291,-44957],[-343550,244604,-44957],[-344288,246257,-44957],[-341437,247537,-44957],[-345635,256798,-44957],[-343558,244600,-44957],[-343662,244854,-44957],[-343926,244734,-44957],[-345635,256798,-36439],[-353581,253246,-36439],[-348730,242291,-36439],[-344288,246257,-36439],[-341437,247537,-36439],[-343662,244854,-36439],[-343926,244734,-36439],[-343558,244600,-36439],[-352596,251020,-44957],[-344083,246349,-44957],[-345635,256798,-41490],[-353581,253246,-41490],[-352596,251020,-35952],[-352596,251020,-41490],[-348730,242291,-35952],[-343662,244854,-35952],[-344288,246257,-35952],[-343926,244734,-35952],[-347233,253386,-35952],[-347233,253386,-41490],[-341437,247537,-41490],[-344083,246349,-41490],[-343558,244600,-35952],[-344083,246349,-35952],[-347089,253741,-35952],[-347089,253741,-41490],[-350613,246543,-44957],[-345635,256798,-41507],[-353581,253246,-41516],[-350613,246543,-34688],[-348730,242291,-36953],[-343662,244854,-37089],[-344288,246257,-37099],[-343926,244734,-36944],[-352596,251020,-41514],[-347233,253386,-37262],[-347233,253386,-41508],[-352596,251020,-37264],[-341437,247537,-41498],[-344083,246349,-41501],[-343558,244600,-37083],[-344083,246349,-37212],[-347089,253741,-37402],[-347089,253741,-41508],[-349425,246738,-34864],[-349425,246738,-34529],[-349862,246897,-34699],[-349238,248437,-35307]]}"#, )?; + let mut attr_schema = AttributeSchema::new(); + for (_, co) in cj_city_feature.city_objects.iter() { + if let Some(attr) = &co.attributes { + attr_schema.add_attributes(attr); + } + } + // Create FlatBuffer and encode let mut fbb = FlatBufferBuilder::new(); let city_objects_buf: Vec<_> = cj_city_feature .city_objects .iter() - .map(|(id, co)| to_fcb_city_object(&mut fbb, id, co)) + .map(|(id, co)| to_fcb_city_object(&mut fbb, id, co, &attr_schema)) .collect(); let city_feature = to_fcb_city_feature( &mut fbb, @@ -614,4 +695,79 @@ mod tests { Ok(()) } + + #[test] + fn test_encode_attributes() -> Result<()> { + // let json_data = json!({ + // "attributes": { + // "int": -1, + // "uint": 1, + // "bool": true, + // "float": 1.0, + // "string": "hoge", + // "array": [1, 2, 3], + // "json": { + // "hoge": "fuga" + // }, + // "null": null + // } + // }); + // let attrs = &json_data["attributes"]; + + // // Test case 1: Using common schema + // { + // let mut fbb = FlatBufferBuilder::new(); + // let mut common_schema = AttributeSchema::new(); + // common_schema.add_attributes(attrs); + + // let columns = to_fcb_columns(&mut fbb, &common_schema); + // let header = Header::create( + // &mut fbb, + // &HeaderArgs { + // columns: Some(columns), + // ..Default::default() + // }, + // ); + + // fbb.finish(header, None); + // let finished_data = fbb.finished_data(); + // let header_buf = root_as_header(finished_data).unwrap(); + + // // let feature = + + // let encoded = encode_attributes_with_schema(attrs, &common_schema); + + // // Verify encoded data + // assert!(!encoded.is_empty()); + + // let decoded = decode_attributes(header_buf.columns().unwrap(), encoded.); + // assert_eq!(attrs, &decoded); + // } + + // // Test case 2: Using own schema + // { + // let mut fbb = FlatBufferBuilder::new(); + // let (offset, schema) = to_fcb_attribute(&mut fbb, attrs, &AttributeSchema::new()); + + // // Verify schema is returned for own schema case + // assert!(schema.is_some()); + // let schema = schema.unwrap(); + + // // Verify schema contains expected types + // assert_eq!(schema.get("int"), Some(&ColumnType::Int)); + // assert_eq!(schema.get("uint"), Some(&ColumnType::UInt)); + // assert_eq!(schema.get("bool"), Some(&ColumnType::Bool)); + // assert_eq!(schema.get("float"), Some(&ColumnType::Float)); + // assert_eq!(schema.get("string"), Some(&ColumnType::String)); + // assert_eq!(schema.get("json"), Some(&ColumnType::Json)); + + // // Get the encoded data + // let data = fbb.finished_data(); + // assert!(!data.is_empty()); + // // First 2 bytes should be 1 (true) for own schema + // assert_eq!(&data[0..2], &[1, 0]); + // } + + Ok(()) + } } diff --git a/src/rust/src/header_generated.rs b/src/rust/src/header_generated.rs index f4dcf6f..5838fbb 100644 --- a/src/rust/src/header_generated.rs +++ b/src/rust/src/header_generated.rs @@ -503,16 +503,17 @@ impl<'a> flatbuffers::Follow<'a> for Column<'a> { } impl<'a> Column<'a> { - pub const VT_NAME: flatbuffers::VOffsetT = 4; - pub const VT_TYPE_: flatbuffers::VOffsetT = 6; - pub const VT_TITLE: flatbuffers::VOffsetT = 8; - pub const VT_DESCRIPTION: flatbuffers::VOffsetT = 10; - pub const VT_PRECISION: flatbuffers::VOffsetT = 12; - pub const VT_SCALE: flatbuffers::VOffsetT = 14; - pub const VT_NULLABLE: flatbuffers::VOffsetT = 16; - pub const VT_UNIQUE: flatbuffers::VOffsetT = 18; - pub const VT_PRIMARY_KEY: flatbuffers::VOffsetT = 20; - pub const VT_METADATA: flatbuffers::VOffsetT = 22; + pub const VT_INDEX: flatbuffers::VOffsetT = 4; + pub const VT_NAME: flatbuffers::VOffsetT = 6; + pub const VT_TYPE_: flatbuffers::VOffsetT = 8; + pub const VT_TITLE: flatbuffers::VOffsetT = 10; + pub const VT_DESCRIPTION: flatbuffers::VOffsetT = 12; + pub const VT_PRECISION: flatbuffers::VOffsetT = 14; + pub const VT_SCALE: flatbuffers::VOffsetT = 16; + pub const VT_NULLABLE: flatbuffers::VOffsetT = 18; + pub const VT_UNIQUE: flatbuffers::VOffsetT = 20; + pub const VT_PRIMARY_KEY: flatbuffers::VOffsetT = 22; + pub const VT_METADATA: flatbuffers::VOffsetT = 24; #[inline] pub unsafe fn init_from_table(table: flatbuffers::Table<'a>) -> Self { @@ -538,6 +539,7 @@ impl<'a> Column<'a> { if let Some(x) = args.name { builder.add_name(x); } + builder.add_index(args.index); builder.add_primary_key(args.primary_key); builder.add_unique(args.unique); builder.add_nullable(args.nullable); @@ -545,6 +547,13 @@ impl<'a> Column<'a> { builder.finish() } + #[inline] + pub fn index(&self) -> u16 { + // Safety: + // Created from valid Table for this object + // which contains a valid value in this slot + unsafe { self._tab.get::(Column::VT_INDEX, Some(0)).unwrap() } + } #[inline] pub fn name(&self) -> &'a str { // Safety: @@ -658,6 +667,7 @@ impl flatbuffers::Verifiable for Column<'_> { ) -> Result<(), flatbuffers::InvalidFlatbuffer> { use self::flatbuffers::Verifiable; v.visit_table(pos)? + .visit_field::("index", Self::VT_INDEX, false)? .visit_field::>("name", Self::VT_NAME, true)? .visit_field::("type_", Self::VT_TYPE_, false)? .visit_field::>("title", Self::VT_TITLE, false)? @@ -681,6 +691,7 @@ impl flatbuffers::Verifiable for Column<'_> { } } pub struct ColumnArgs<'a> { + pub index: u16, pub name: Option>, pub type_: ColumnType, pub title: Option>, @@ -696,6 +707,7 @@ impl<'a> Default for ColumnArgs<'a> { #[inline] fn default() -> Self { ColumnArgs { + index: 0, name: None, // required field type_: ColumnType::Byte, title: None, @@ -715,6 +727,10 @@ pub struct ColumnBuilder<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> { start_: flatbuffers::WIPOffset, } impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ColumnBuilder<'a, 'b, A> { + #[inline] + pub fn add_index(&mut self, index: u16) { + self.fbb_.push_slot::(Column::VT_INDEX, index, 0); + } #[inline] pub fn add_name(&mut self, name: flatbuffers::WIPOffset<&'b str>) { self.fbb_ @@ -783,6 +799,7 @@ impl<'a: 'b, 'b, A: flatbuffers::Allocator + 'a> ColumnBuilder<'a, 'b, A> { impl core::fmt::Debug for Column<'_> { fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { let mut ds = f.debug_struct("Column"); + ds.field("index", &self.index()); ds.field("name", &self.name()); ds.field("type_", &self.type_()); ds.field("title", &self.title()); diff --git a/src/rust/src/main.rs b/src/rust/src/main.rs index c53134a..4f95bdd 100644 --- a/src/rust/src/main.rs +++ b/src/rust/src/main.rs @@ -1,6 +1,7 @@ use anyhow::Result; use clap::{Parser, Subcommand}; use flatcitybuf::{ + attribute::{AttributeSchema, AttributeSchemaMethods}, fcb_deserializer, header_writer::{HeaderMetadata, HeaderWriterOptions}, read_cityjson_from_reader, CJType, CJTypeKind, CityJSONSeq, FcbReader, FcbWriter, @@ -74,6 +75,15 @@ fn serialize(input: &str, output: &str) -> Result<()> { }; let CityJSONSeq { cj, features } = cj_seq; + let mut attr_schema = AttributeSchema::new(); + for feature in features.iter() { + for (_, co) in feature.city_objects.iter() { + if let Some(attributes) = &co.attributes { + attr_schema.add_attributes(attributes); + } + } + } + let header_metadata = HeaderMetadata { features_count: features.len() as u64, }; @@ -81,10 +91,19 @@ fn serialize(input: &str, output: &str) -> Result<()> { write_index: false, header_metadata, }); - let mut fcb = FcbWriter::new(cj, header_options, features.first())?; + let mut fcb = FcbWriter::new( + cj, + header_options, + None, + if attr_schema.is_empty() { + None + } else { + Some(&attr_schema) + }, + )?; fcb.write_feature()?; - for feature in features.iter().skip(1) { + for feature in features.iter() { fcb.add_feature(feature)?; } fcb.write(writer)?; @@ -106,12 +125,13 @@ fn deserialize(input: &str, output: &str) -> Result<()> { // Write header writeln!(writer, "{}", serde_json::to_string(&cj)?)?; + let root_attr_schema = header.columns(); // Write features let feat_count = header.features_count(); let mut feat_num = 0; while let Ok(Some(feat_buf)) = fcb_reader.next() { let feature = feat_buf.cur_feature(); - let cj_feature = fcb_deserializer::to_cj_feature(feature)?; + let cj_feature = fcb_deserializer::to_cj_feature(feature, None)?; writeln!(writer, "{}", serde_json::to_string(&cj_feature)?)?; feat_num += 1; diff --git a/src/rust/src/reader/mod.rs b/src/rust/src/reader/mod.rs index 3daff8e..a7f827d 100644 --- a/src/rust/src/reader/mod.rs +++ b/src/rust/src/reader/mod.rs @@ -1,11 +1,13 @@ mod city_buffer; use city_buffer::FcbBuffer; +use cjseq::CityJSONFeature; -use crate::error::{Error, Result}; +use crate::fcb_deserializer::to_cj_feature; use crate::feature_generated::{size_prefixed_root_as_city_feature, CityFeature}; use crate::header_generated::*; use crate::{check_magic_bytes, HEADER_MAX_BUFFER_SIZE}; +use anyhow::{anyhow, Result}; use fallible_streaming_iterator::FallibleStreamingIterator; use std::io::{Read, Seek, SeekFrom, Write}; @@ -66,14 +68,14 @@ impl FcbReader { let mut magic_buf: [u8; 8] = [0; 8]; reader.read_exact(&mut magic_buf)?; if !check_magic_bytes(&magic_buf) { - return Err(Error::MissingMagicBytes); + return Err(anyhow!("Missing magic bytes. Is this an fgb file?")); } let mut size_buf: [u8; 4] = [0; 4]; // MEMO: 4 bytes for size prefix. This is comvention for FlatBuffers's size_prefixed_root reader.read_exact(&mut size_buf)?; let header_size = u32::from_le_bytes(size_buf) as usize; if header_size > HEADER_MAX_BUFFER_SIZE || header_size < 8 { - return Err(Error::IllegalHeaderSize(header_size)); + return Err(anyhow!("Illegal header size: {header_size}")); } let mut header_buf = Vec::with_capacity(header_size + 4); @@ -206,6 +208,12 @@ impl FcbReader { self.buffer.header() } + pub fn root_attr_schema( + &self, + ) -> Option>> { + self.buffer.header().columns() + } + fn index_size(&self) -> u64 { 0 // let header = self.buffer.header(); @@ -220,7 +228,7 @@ impl FcbReader { impl FallibleStreamingIterator for FeatureIter { type Item = FcbBuffer; - type Error = Error; + type Error = anyhow::Error; fn advance(&mut self) -> Result<()> { if self.advance_finished() { @@ -252,7 +260,7 @@ impl FallibleStreamingIterator for FeatureIter { impl FallibleStreamingIterator for FeatureIter { type Item = FcbBuffer; - type Error = Error; + type Error = anyhow::Error; fn advance(&mut self) -> Result<()> { if self.advance_finished() { @@ -287,6 +295,13 @@ impl FeatureIter { self.buffer.feature() } + pub fn cur_cj_feature(&self) -> Result { + let fcb_feature = self.buffer.feature(); + let root_attr_schema = self.buffer.header().columns(); + + to_cj_feature(fcb_feature, root_attr_schema) + } + pub fn get_features(&mut self) -> Result> { // let mut features: Vec = Vec::new(); // let mut count = 0; @@ -324,10 +339,16 @@ impl FeatureIter { } impl FeatureIter { - /// Return current feature pub fn cur_feature(&self) -> CityFeature { self.buffer.feature() } + /// Return current feature + pub fn cur_cj_feature(&self) -> Result { + let fcb_feature = self.buffer.feature(); + let root_attr_schema = self.buffer.header().columns(); + + to_cj_feature(fcb_feature, root_attr_schema) + } pub fn get_features(&mut self, out: impl Write) -> Result<()> { // println!("get features"); @@ -392,6 +413,12 @@ impl FeatureIter { self.buffer.header() } + pub fn root_attr_schema( + &self, + ) -> Option>> { + self.buffer.header().columns() + } + // pub fn features(&self) -> CityFeature { // self.buffer.feature() // } diff --git a/src/rust/src/writer/attribute.rs b/src/rust/src/writer/attribute.rs new file mode 100644 index 0000000..c61e968 --- /dev/null +++ b/src/rust/src/writer/attribute.rs @@ -0,0 +1,214 @@ +use crate::header_generated::ColumnType; +use byteorder::{ByteOrder, LittleEndian}; +use serde_json::Value; +use std::{collections::HashMap, u16}; + +pub type AttributeSchema = HashMap; + +pub trait AttributeSchemaMethods { + fn add_attributes(&mut self, attrs: &Value); +} + +impl AttributeSchemaMethods for AttributeSchema { + fn add_attributes(&mut self, attrs: &Value) { + if !attrs.is_object() { + self.insert("json".to_string(), (self.len() as u16, ColumnType::Json)); + return; + } + + let map = attrs.as_object().unwrap(); + for (key, val) in map.iter() { + if !self.contains_key(key) && !val.is_null() { + if let Some(coltype) = guess_type(val) { + self.insert(key.clone(), (self.len() as u16, coltype)); + } + } + } + } +} + +/// Naive type-guessing. You could use your schema or logic as in your Python code. +fn guess_type(value: &Value) -> Option { + match value { + Value::Bool(_) => Some(ColumnType::Bool), + Value::Number(n) => { + if n.is_f64() { + Some(ColumnType::Double) + } else if n.is_u64() { + Some(ColumnType::ULong) + } else if n.is_i64() { + Some(ColumnType::Long) + } else { + Some(ColumnType::ULong) //TODO: check if this is correct. To accurately guess the type, we need to know the range of the value. But, to do that, we need to read all the data. + } + } + Value::String(_) => Some(ColumnType::String), + Value::Array(_) => Some(ColumnType::Json), + Value::Object(_) => Some(ColumnType::Json), + _ => None, + } +} + +pub fn attr_size(coltype: &ColumnType, colval: &Value) -> usize { + match *coltype { + ColumnType::Byte => size_of::(), + ColumnType::UByte => size_of::(), + ColumnType::Bool => size_of::(), + ColumnType::Short => size_of::(), + ColumnType::UShort => size_of::(), + ColumnType::Int => size_of::(), + ColumnType::UInt => size_of::(), + ColumnType::Long => size_of::(), + ColumnType::ULong => size_of::(), + ColumnType::Float => size_of::(), + ColumnType::Double => size_of::(), + ColumnType::String | ColumnType::DateTime => { + size_of::() + colval.as_str().unwrap().len() + } + ColumnType::Json => { + let json = serde_json::to_string(colval).unwrap_or_default(); + size_of::() + json.as_bytes().len() + } + ColumnType::Binary => size_of::() + colval.as_str().unwrap().len(), //TODO: check if this is correct + _ => unreachable!(), + } +} + +pub fn encode_attributes_with_schema(attr: &Value, schema: &AttributeSchema) -> Vec { + let mut out = Vec::new(); + let mut sorted_schema: Vec<_> = schema.iter().collect(); + sorted_schema.sort_by_key(|(_, (index, _))| *index); + + for (name, (index, coltype)) in sorted_schema { + let (_, val) = attr + .as_object() + .unwrap() + .iter() + .find(|(k, _)| *k == name) + .unwrap(); + + if val.is_null() { + continue; + } + + let mut offset = out.len(); + let attr_size = attr_size(coltype, val); + + // Reserve space for index and value + out.resize(offset + size_of::() + attr_size, 0); + + // Write index + LittleEndian::write_u16(&mut out[offset..], *index); + offset += size_of::(); + + match *coltype { + ColumnType::Bool => { + let b = val.as_bool().unwrap_or(false); + out[offset] = b as u8; + } + ColumnType::Int => { + let i = val.as_i64().unwrap_or(0); + LittleEndian::write_i32(&mut out[offset..], i as i32); + } + ColumnType::UInt => { + let i = val.as_u64().unwrap_or(0); + LittleEndian::write_u32(&mut out[offset..], i as u32); + } + ColumnType::Byte => { + let b = val.as_i64().unwrap_or(0); + out[offset] = b as u8; + } + ColumnType::UByte => { + let b = val.as_u64().unwrap_or(0); + out[offset] = b as u8; + } + + ColumnType::Short => { + let i = val.as_i64().unwrap_or(0); + LittleEndian::write_i16(&mut out[offset..], i as i16); + } + ColumnType::UShort => { + let i = val.as_u64().unwrap_or(0); + LittleEndian::write_u16(&mut out[offset..], i as u16); + } + + ColumnType::Long => { + let i = val.as_i64().unwrap_or(0); + LittleEndian::write_i64(&mut out[offset..], i); + } + ColumnType::ULong => { + let i = val.as_u64().unwrap_or(0); + LittleEndian::write_u64(&mut out[offset..], i); + } + ColumnType::Float => { + let f = val.as_f64().unwrap_or(0.0); + LittleEndian::write_f32(&mut out[offset..], f as f32); + } + ColumnType::Double => { + let f = val.as_f64().unwrap_or(0.0); + LittleEndian::write_f64(&mut out[offset..], f); + } + ColumnType::String | ColumnType::DateTime => { + let s = val.as_str().unwrap_or(""); + LittleEndian::write_u32(&mut out[offset..], s.len() as u32); + out[offset + size_of::()..offset + size_of::() + s.len()] + .copy_from_slice(s.as_bytes()); + } + ColumnType::Json => { + let json = serde_json::to_string(val).unwrap_or_default(); + LittleEndian::write_u32(&mut out[offset..], json.len() as u32); + out[offset + size_of::()..offset + size_of::() + json.len()] + .copy_from_slice(json.as_bytes()); + } + ColumnType::Binary => { + let s = val.as_str().unwrap_or(""); + LittleEndian::write_u32(&mut out[offset..], s.len() as u32); + out[offset + size_of::()..offset + size_of::() + s.len()] + .copy_from_slice(s.as_bytes()); + } + _ => unreachable!(), + } + } + out +} + +#[cfg(test)] +mod tests { + use super::*; + + use anyhow::Result; + use serde_json::json; + + #[test] + fn test_add_attributes() -> Result<()> { + let json_data = json!({ + "attributes": { + "int": -10, + "uint": 5, + "bool": true, + "float": 1.0, + "string": "hoge", + "array": [1, 2, 3], + "json": { + "hoge": "fuga" + }, + "null": null + } + }); + + let mut attr_schema: AttributeSchema = AttributeSchema::new(); + + attr_schema.add_attributes(&json_data["attributes"]); + + // Check if the schema contains the expected keys and types + assert_eq!(attr_schema.get("int").unwrap().1, ColumnType::Int); + assert_eq!(attr_schema.get("uint").unwrap().1, ColumnType::UInt); + assert_eq!(attr_schema.get("bool").unwrap().1, ColumnType::Bool); + assert_eq!(attr_schema.get("float").unwrap().1, ColumnType::Float); + assert_eq!(attr_schema.get("string").unwrap().1, ColumnType::String); + assert_eq!(attr_schema.get("array").unwrap().1, ColumnType::Json); + assert_eq!(attr_schema.get("json").unwrap().1, ColumnType::Json); + + Ok(()) + } +} diff --git a/src/rust/src/writer/feature_writer.rs b/src/rust/src/writer/feature_writer.rs index fbcfc17..fed2561 100644 --- a/src/rust/src/writer/feature_writer.rs +++ b/src/rust/src/writer/feature_writer.rs @@ -2,6 +2,8 @@ use cjseq::CityJSONFeature; use crate::fcb_serde::fcb_serializer::*; +use super::attribute::AttributeSchema; + /// A writer that converts CityJSON features to FlatBuffers format /// /// This struct handles the serialization of CityJSON features into a binary @@ -11,6 +13,8 @@ pub struct FeatureWriter<'a> { city_feature: &'a CityJSONFeature, /// The FlatBuffers builder instance used for serialization fbb: flatbuffers::FlatBufferBuilder<'a>, + + attr_schema: AttributeSchema, } impl<'a> FeatureWriter<'a> { @@ -19,10 +23,14 @@ impl<'a> FeatureWriter<'a> { /// # Arguments /// /// * `city_feature` - A reference to the CityJSON feature to be serialized - pub fn new(city_feature: &'a CityJSONFeature) -> FeatureWriter<'a> { + pub fn new( + city_feature: &'a CityJSONFeature, + attr_schema: AttributeSchema, + ) -> FeatureWriter<'a> { FeatureWriter { city_feature, fbb: flatbuffers::FlatBufferBuilder::new(), + attr_schema, } } @@ -39,7 +47,7 @@ impl<'a> FeatureWriter<'a> { .city_feature .city_objects .iter() - .map(|(id, co)| to_fcb_city_object(&mut self.fbb, id, co)) + .map(|(id, co)| to_fcb_city_object(&mut self.fbb, id, co, &self.attr_schema)) .collect(); let cf_buf = to_fcb_city_feature( &mut self.fbb, diff --git a/src/rust/src/writer/header_writer.rs b/src/rust/src/writer/header_writer.rs index be2a228..685d4c6 100644 --- a/src/rust/src/writer/header_writer.rs +++ b/src/rust/src/writer/header_writer.rs @@ -1,10 +1,9 @@ -use crate::error::CityJSONError; -use crate::header_generated::{ - GeographicalExtent, Header, HeaderArgs, ReferenceSystem, ReferenceSystemArgs, Transform, Vector, -}; -use cjseq::{CityJSON, Metadata as CJMetadata, Transform as CjTransform}; +use crate::fcb_serializer::to_fcb_header; +use cjseq::CityJSON; use flatbuffers::FlatBufferBuilder; +use super::attribute::AttributeSchema; + /// Writer for converting CityJSON header information to FlatBuffers format pub struct HeaderWriter<'a> { /// FlatBuffers builder instance @@ -13,6 +12,8 @@ pub struct HeaderWriter<'a> { cj: CityJSON, /// Configuration options for header writing header_options: HeaderWriterOptions, + /// Attribute schema + attr_schema: AttributeSchema, } /// Configuration options for header writing process @@ -45,8 +46,12 @@ impl<'a> HeaderWriter<'a> { /// /// * `cj` - The CityJSON data to write /// * `header_options` - Optional configuration for the header writing process - pub fn new(cj: CityJSON, header_options: Option) -> HeaderWriter<'a> { - Self::new_with_options(header_options.unwrap_or_default(), cj) + pub fn new( + cj: CityJSON, + header_options: Option, + attr_schema: AttributeSchema, + ) -> HeaderWriter<'a> { + Self::new_with_options(header_options.unwrap_or_default(), cj, attr_schema) } /// Creates a new HeaderWriter with specific configuration @@ -55,13 +60,18 @@ impl<'a> HeaderWriter<'a> { /// /// * `options` - Configuration for the header writing process /// * `cj` - The CityJSON data to write - pub fn new_with_options(options: HeaderWriterOptions, cj: CityJSON) -> HeaderWriter<'a> { + pub fn new_with_options( + options: HeaderWriterOptions, + cj: CityJSON, + attr_schema: AttributeSchema, + ) -> HeaderWriter<'a> { let fbb = FlatBufferBuilder::new(); HeaderWriter { fbb, cj, header_options: options, + attr_schema, } } @@ -71,185 +81,13 @@ impl<'a> HeaderWriter<'a> { /// /// A size-prefixed FlatBuffer containing the serialized header pub fn finish_to_header(mut self) -> Vec { - let header = self.create_header(); + let header = to_fcb_header( + &mut self.fbb, + &self.cj, + self.header_options.header_metadata, + &self.attr_schema, + ); self.fbb.finish_size_prefixed(header, None); self.fbb.finished_data().to_vec() } - - /// Creates the header structure in FlatBuffers format - /// - /// # Panics - /// - /// Panics if required metadata fields are missing - fn create_header(&mut self) -> flatbuffers::WIPOffset> { - let metadata = self - .cj - .metadata - .as_ref() - .ok_or(CityJSONError::MissingField("metadata")) - .unwrap(); - let reference_system = Self::reference_system(&mut self.fbb, metadata); - let transform = Self::transform(&self.cj.transform); - let geographical_extent = metadata - .geographical_extent - .as_ref() - .map(Self::geographical_extent); - let features_count = self.header_options.header_metadata.features_count; - let header_args = HeaderArgs { - version: Some(self.fbb.create_string(&self.cj.version)), - transform: Some(&transform), - columns: None, - features_count, - geographical_extent: geographical_extent.as_ref(), - reference_system, - identifier: metadata - .identifier - .as_ref() - .map(|i| self.fbb.create_string(i)), - reference_date: metadata - .reference_date - .as_ref() - .map(|r| self.fbb.create_string(r)), - title: metadata.title.as_ref().map(|t| self.fbb.create_string(t)), - poc_contact_name: metadata - .point_of_contact - .as_ref() - .map(|poc| self.fbb.create_string(&poc.contact_name)), - poc_contact_type: metadata.point_of_contact.as_ref().and_then(|poc| { - poc.contact_type - .as_ref() - .map(|ct| self.fbb.create_string(ct)) - }), - poc_role: metadata - .point_of_contact - .as_ref() - .and_then(|poc| poc.role.as_ref().map(|r| self.fbb.create_string(r))), - poc_phone: metadata - .point_of_contact - .as_ref() - .and_then(|poc| poc.phone.as_ref().map(|p| self.fbb.create_string(p))), - poc_email: metadata - .point_of_contact - .as_ref() - .map(|poc| self.fbb.create_string(&poc.email_address)), - poc_website: metadata - .point_of_contact - .as_ref() - .and_then(|poc| poc.website.as_ref().map(|w| self.fbb.create_string(w))), - poc_address_thoroughfare_number: metadata.point_of_contact.as_ref().and_then(|poc| { - poc.address - .as_ref() - .map(|a| self.fbb.create_string(&a.thoroughfare_number.to_string())) - }), - poc_address_thoroughfare_name: metadata.point_of_contact.as_ref().map(|poc| { - self.fbb.create_string( - &poc.address - .as_ref() - .map(|a| a.thoroughfare_name.clone()) - .unwrap_or_default(), - ) - }), - poc_address_locality: metadata.point_of_contact.as_ref().map(|poc| { - self.fbb.create_string( - &poc.address - .as_ref() - .map(|a| a.locality.clone()) - .unwrap_or_default(), - ) - }), - poc_address_postcode: metadata.point_of_contact.as_ref().map(|poc| { - self.fbb.create_string( - &poc.address - .as_ref() - .map(|a| a.postal_code.clone()) - .unwrap_or_default(), - ) - }), - poc_address_country: metadata.point_of_contact.as_ref().map(|poc| { - self.fbb.create_string( - &poc.address - .as_ref() - .map(|a| a.country.clone()) - .unwrap_or_default(), - ) - }), - attributes: None, - }; - - Header::create(&mut self.fbb, &header_args) - } - - /// Converts CityJSON geographical extent to FlatBuffers format - /// - /// # Arguments - /// - /// * `geographical_extent` - Array of 6 values representing min/max coordinates - fn geographical_extent(geographical_extent: &[f64; 6]) -> GeographicalExtent { - let min = Vector::new( - geographical_extent[0], - geographical_extent[1], - geographical_extent[2], - ); - let max = Vector::new( - geographical_extent[3], - geographical_extent[4], - geographical_extent[5], - ); - GeographicalExtent::new(&min, &max) - } - - /// Converts CityJSON transform to FlatBuffers format - /// - /// # Arguments - /// - /// * `transform` - CityJSON transform data containing scale and translate values - fn transform(transform: &CjTransform) -> Transform { - let scale = Vector::new(transform.scale[0], transform.scale[1], transform.scale[2]); - let translate = Vector::new( - transform.translate[0], - transform.translate[1], - transform.translate[2], - ); - Transform::new(&scale, &translate) - } - - /// Creates a reference system entry in FlatBuffers format - /// - /// # Arguments - /// - /// * `fbb` - FlatBuffers builder - /// * `metadata` - CityJSON metadata containing reference system information - /// - /// # Returns - /// - /// Optional reference system offset in the FlatBuffer - fn reference_system( - fbb: &mut FlatBufferBuilder<'a>, - metadata: &CJMetadata, - ) -> Option>> { - metadata.reference_system.as_ref().map(|ref_sys| { - let authority = Some(fbb.create_string(&ref_sys.authority)); - - let version = ref_sys.version.parse::().unwrap_or_else(|e| { - println!("Failed to parse version: {}", e); - 0 - }); - let code = ref_sys.code.parse::().unwrap_or_else(|e| { - println!("Failed to parse code: {}", e); - 0 - }); - - let code_string = None; // TODO: implement code_string - - ReferenceSystem::create( - fbb, - &ReferenceSystemArgs { - authority, - version, - code, - code_string, - }, - ) - }) - } } diff --git a/src/rust/src/writer/mod.rs b/src/rust/src/writer/mod.rs index f27b2fd..3341cf3 100644 --- a/src/rust/src/writer/mod.rs +++ b/src/rust/src/writer/mod.rs @@ -1,11 +1,13 @@ -use crate::error::Result; use crate::MAGIC_BYTES; +use anyhow::Result; +use attribute::AttributeSchema; use cjseq::{CityJSON, CityJSONFeature}; use feature_writer::FeatureWriter; use header_writer::{HeaderWriter, HeaderWriterOptions}; use std::fs::File; use std::io::{BufWriter, Read, Seek, Write}; +pub mod attribute; pub mod feature_writer; pub mod geometry_encoderdecoder; pub mod header_writer; @@ -22,6 +24,8 @@ pub struct FcbWriter<'a> { header_writer: HeaderWriter<'a>, /// Optional writer for features feat_writer: Option>, + + attr_schema: AttributeSchema, } impl<'a> FcbWriter<'a> { @@ -40,13 +44,18 @@ impl<'a> FcbWriter<'a> { cj: CityJSON, header_option: Option, first_feature: Option<&'a CityJSONFeature>, + attr_schema: Option<&AttributeSchema>, ) -> Result { - let header_writer = HeaderWriter::new(cj, header_option); - let feat_writer = first_feature.map(FeatureWriter::new); + let owned_schema = AttributeSchema::new(); + let attr_schema = attr_schema.unwrap_or(&owned_schema); + + let header_writer = HeaderWriter::new(cj, header_option, attr_schema.clone()); // if attr_schema is None, instantiate an empty one + let feat_writer = first_feature.map(|feat| FeatureWriter::new(feat, attr_schema.clone())); Ok(Self { header_writer, feat_writer, tmpout: BufWriter::new(tempfile::tempfile()?), + attr_schema: attr_schema.clone(), }) } diff --git a/src/rust/tests/e2e.rs b/src/rust/tests/e2e.rs index 1b76e8c..29a301d 100644 --- a/src/rust/tests/e2e.rs +++ b/src/rust/tests/e2e.rs @@ -1,5 +1,6 @@ use anyhow::Result; use flatcitybuf::{ + attribute::{AttributeSchema, AttributeSchemaMethods}, fcb_deserializer, header_writer::{HeaderMetadata, HeaderWriterOptions}, read_cityjson_from_reader, CJType, CJTypeKind, FcbReader, FcbWriter, @@ -36,6 +37,14 @@ fn test_cityjson_serialization_cycle() -> Result<()> { let header_metadata = HeaderMetadata { features_count: original_cj_seq.features.len() as u64, }; + let mut attr_schema = AttributeSchema::new(); + for feature in original_cj_seq.features.iter() { + for (_, co) in feature.city_objects.iter() { + if let Some(attributes) = &co.attributes { + attr_schema.add_attributes(attributes); + } + } + } let mut fcb = FcbWriter::new( original_cj_seq.cj.clone(), Some(HeaderWriterOptions { @@ -43,6 +52,7 @@ fn test_cityjson_serialization_cycle() -> Result<()> { header_metadata, }), original_cj_seq.features.first(), + Some(&attr_schema), )?; fcb.write_feature()?; for feature in original_cj_seq.features.iter().skip(1) { @@ -65,8 +75,8 @@ fn test_cityjson_serialization_cycle() -> Result<()> { let feat_count = header.features_count(); let mut feat_num = 0; while let Ok(Some(feat_buf)) = reader.next() { - let feature = feat_buf.cur_feature(); - deserialized_features.push(fcb_deserializer::to_cj_feature(feature)?); + let feature = feat_buf.cur_cj_feature()?; + deserialized_features.push(feature); feat_num += 1; if feat_num >= feat_count { break; @@ -207,3 +217,78 @@ fn test_cityjson_serialization_cycle() -> Result<()> { Ok(()) } + +// #[test] +// fn test_attribute_serialization() -> Result<()> { +// let json_data = json!({ +// "attributes": { +// "int": -1, +// "uint": 1, +// "bool": true, +// "float": 1.0, +// "string": "hoge", +// "array": [1, 2, 3], +// "json": { +// "hoge": "fuga" +// }, +// "null": null +// } +// }); +// let attrs = &json_data["attributes"]; + +// // Test case 1: Using common schema +// { +// let mut fbb = FlatBufferBuilder::new(); +// let mut common_schema = AttributeSchema::new(); +// common_schema.add_attributes(attrs); + +// let columns = to_fcb_columns(&mut fbb, &common_schema); +// let header = Header::create( +// &mut fbb, +// &HeaderArgs { +// columns: Some(columns), +// ..Default::default() +// }, +// ); + +// fbb.finish(header, None); +// let finished_data = fbb.finished_data(); +// let header_buf = root_as_header(finished_data).unwrap(); + +// // let feature = + +// let encoded = encode_attributes_with_schema(attrs, &common_schema); + +// // Verify encoded data +// assert!(!encoded.is_empty()); + +// let decoded = decode_attributes(header_buf.columns().unwrap(), encoded.); +// assert_eq!(attrs, &decoded); +// } + +// // Test case 2: Using own schema +// { +// let mut fbb = FlatBufferBuilder::new(); +// let (offset, schema) = to_fcb_attribute(&mut fbb, attrs, &AttributeSchema::new()); + +// // Verify schema is returned for own schema case +// assert!(schema.is_some()); +// let schema = schema.unwrap(); + +// // Verify schema contains expected types +// assert_eq!(schema.get("int"), Some(&ColumnType::Int)); +// assert_eq!(schema.get("uint"), Some(&ColumnType::UInt)); +// assert_eq!(schema.get("bool"), Some(&ColumnType::Bool)); +// assert_eq!(schema.get("float"), Some(&ColumnType::Float)); +// assert_eq!(schema.get("string"), Some(&ColumnType::String)); +// assert_eq!(schema.get("json"), Some(&ColumnType::Json)); + +// // Get the encoded data +// let data = fbb.finished_data(); +// assert!(!data.is_empty()); +// // First 2 bytes should be 1 (true) for own schema +// assert_eq!(&data[0..2], &[1, 0]); +// } + +// Ok(()) +// } diff --git a/src/rust/tests/serde.rs b/src/rust/tests/serde.rs new file mode 100644 index 0000000..032653d --- /dev/null +++ b/src/rust/tests/serde.rs @@ -0,0 +1,105 @@ +use anyhow::Result; +use flatbuffers::FlatBufferBuilder; +use flatcitybuf::{ + attribute::{AttributeSchema, AttributeSchemaMethods}, + fcb_deserializer::decode_attributes, + fcb_serializer::{to_fcb_attribute, to_fcb_columns}, + root_as_city_feature, root_as_header, CityFeature, CityFeatureArgs, CityObject, CityObjectArgs, + Header, HeaderArgs, +}; +use serde_json::json; + +#[test] +fn test_attribute_serialization() -> Result<()> { + let json_data = json!({ + "attributes": { + "int": -10, + "uint": 5, + "bool": true, + "float": 1.0, + "string": "hoge", + "array": [1, 2, 3], + "json": { + "hoge": "fuga" + }, + "exceptional": null + } + }); + let schema = json!({ + "attributes": { + "int": -10, + "uint": 5, + "bool": true, + "float": 1.0, + "string": "hoge", + "array": [1, 2, 3], + "json": { + "hoge": "fuga" + }, + "exceptional": 1000 + } + }); + let attrs = &json_data["attributes"]; + let attr_schema = &schema["attributes"]; + + // Test case 1: Using common schema + { + let mut fbb = FlatBufferBuilder::new(); + let mut common_schema = AttributeSchema::new(); + common_schema.add_attributes(attr_schema); + + let columns = to_fcb_columns(&mut fbb, &common_schema); + let header = { + let version = fbb.create_string("1.0.0"); + Header::create( + &mut fbb, + &HeaderArgs { + version: Some(version), + columns: Some(columns), + ..Default::default() + }, + ) + }; + fbb.finish(header, None); + let finished_data = fbb.finished_data(); + let header_buf = root_as_header(finished_data).unwrap(); + let mut fbb = FlatBufferBuilder::new(); + let feature = { + let (attr_buf, _) = to_fcb_attribute(&mut fbb, attrs, &common_schema); + let city_object = { + let id = fbb.create_string("hoge"); + CityObject::create( + &mut fbb, + &CityObjectArgs { + id: Some(id), + attributes: Some(attr_buf), + ..Default::default() + }, + ) + }; + let objects = fbb.create_vector(&[city_object]); + let cf_id = fbb.create_string("hoge"); + CityFeature::create( + &mut fbb, + &CityFeatureArgs { + id: Some(cf_id), + objects: Some(objects), + ..Default::default() + }, + ) + }; + + fbb.finish(feature, None); + + let finished_data = fbb.finished_data(); + let feature_buf = root_as_city_feature(finished_data).unwrap(); + let attributes = feature_buf.objects().unwrap().get(0).attributes().unwrap(); + // Verify encoded data + assert!(!attributes.is_empty()); + + let decoded = decode_attributes(header_buf.columns().unwrap(), attributes); + assert_eq!(attrs, &decoded); + } + + Ok(()) +}