From 30978c8d5012decb31fa0f9fde2bfab393fb2ced Mon Sep 17 00:00:00 2001 From: shumon84 Date: Fri, 13 Dec 2024 13:17:15 +0900 Subject: [PATCH] wip --- engine/Cargo.lock | 155 +++++++++- engine/runtime/xml2-macro/Cargo.toml | 12 + engine/runtime/xml2-macro/src/lib.rs | 73 +++++ engine/runtime/xml2-macro/src/tuple.rs | 146 +++++++++ engine/runtime/xml2-macro/src/union.rs | 106 +++++++ engine/runtime/xml2/Cargo.toml | 19 ++ engine/runtime/xml2/src/lib.rs | 2 + engine/runtime/xml2/src/parser.rs | 59 ++++ engine/runtime/xml2/src/parser/all.rs | 41 +++ engine/runtime/xml2/src/parser/any.rs | 17 ++ .../runtime/xml2/src/parser/any_attribute.rs | 17 ++ engine/runtime/xml2/src/parser/attribute.rs | 80 +++++ .../xml2/src/parser/attribute_group.rs | 52 ++++ engine/runtime/xml2/src/parser/choice.rs | 25 ++ .../xml2/src/parser/complex_content.rs | 17 ++ .../runtime/xml2/src/parser/complex_type.rs | 82 +++++ engine/runtime/xml2/src/parser/constants.rs | 19 ++ engine/runtime/xml2/src/parser/element.rs | 180 +++++++++++ engine/runtime/xml2/src/parser/extension.rs | 91 ++++++ engine/runtime/xml2/src/parser/import.rs | 14 + engine/runtime/xml2/src/parser/list.rs | 31 ++ engine/runtime/xml2/src/parser/node_parser.rs | 49 +++ engine/runtime/xml2/src/parser/restriction.rs | 145 +++++++++ engine/runtime/xml2/src/parser/schema.rs | 39 +++ engine/runtime/xml2/src/parser/sequence.rs | 41 +++ .../runtime/xml2/src/parser/simple_content.rs | 17 ++ engine/runtime/xml2/src/parser/simple_type.rs | 34 +++ engine/runtime/xml2/src/parser/types.rs | 233 ++++++++++++++ engine/runtime/xml2/src/parser/union.rs | 79 +++++ engine/runtime/xml2/src/parser/utils.rs | 79 +++++ .../runtime/xml2/src/parser/xsd_elements.rs | 225 ++++++++++++++ engine/runtime/xml2/src/types.rs | 35 +++ engine/runtime/xml2/src/types/date.rs | 90 ++++++ engine/runtime/xml2/src/types/datetime.rs | 50 ++++ .../runtime/xml2/src/types/datetimestamp.rs | 40 +++ engine/runtime/xml2/src/types/decimal.rs | 31 ++ engine/runtime/xml2/src/types/duration.rs | 283 ++++++++++++++++++ engine/runtime/xml2/src/types/gday.rs | 83 +++++ engine/runtime/xml2/src/types/gmonth.rs | 83 +++++ engine/runtime/xml2/src/types/gmonthday.rs | 123 ++++++++ engine/runtime/xml2/src/types/gyear.rs | 102 +++++++ engine/runtime/xml2/src/types/gyearmonth.rs | 139 +++++++++ engine/runtime/xml2/src/types/integer.rs | 34 +++ .../xml2/src/types/negative_integer.rs | 39 +++ .../xml2/src/types/non_negative_integer.rs | 39 +++ .../xml2/src/types/non_positive_integer.rs | 39 +++ .../xml2/src/types/positive_integer.rs | 39 +++ engine/runtime/xml2/src/types/time.rs | 94 ++++++ engine/runtime/xml2/src/types/utils.rs | 30 ++ engine/runtime/xml2/src/types/yaserde.rs | 47 +++ engine/runtime/xml2/testcase/sample.xml | 23 ++ 51 files changed, 3615 insertions(+), 7 deletions(-) create mode 100644 engine/runtime/xml2-macro/Cargo.toml create mode 100644 engine/runtime/xml2-macro/src/lib.rs create mode 100644 engine/runtime/xml2-macro/src/tuple.rs create mode 100644 engine/runtime/xml2-macro/src/union.rs create mode 100644 engine/runtime/xml2/Cargo.toml create mode 100644 engine/runtime/xml2/src/lib.rs create mode 100644 engine/runtime/xml2/src/parser.rs create mode 100644 engine/runtime/xml2/src/parser/all.rs create mode 100644 engine/runtime/xml2/src/parser/any.rs create mode 100644 engine/runtime/xml2/src/parser/any_attribute.rs create mode 100644 engine/runtime/xml2/src/parser/attribute.rs create mode 100644 engine/runtime/xml2/src/parser/attribute_group.rs create mode 100644 engine/runtime/xml2/src/parser/choice.rs create mode 100644 engine/runtime/xml2/src/parser/complex_content.rs create mode 100644 engine/runtime/xml2/src/parser/complex_type.rs create mode 100644 engine/runtime/xml2/src/parser/constants.rs create mode 100644 engine/runtime/xml2/src/parser/element.rs create mode 100644 engine/runtime/xml2/src/parser/extension.rs create mode 100644 engine/runtime/xml2/src/parser/import.rs create mode 100644 engine/runtime/xml2/src/parser/list.rs create mode 100644 engine/runtime/xml2/src/parser/node_parser.rs create mode 100644 engine/runtime/xml2/src/parser/restriction.rs create mode 100644 engine/runtime/xml2/src/parser/schema.rs create mode 100644 engine/runtime/xml2/src/parser/sequence.rs create mode 100644 engine/runtime/xml2/src/parser/simple_content.rs create mode 100644 engine/runtime/xml2/src/parser/simple_type.rs create mode 100644 engine/runtime/xml2/src/parser/types.rs create mode 100644 engine/runtime/xml2/src/parser/union.rs create mode 100644 engine/runtime/xml2/src/parser/utils.rs create mode 100644 engine/runtime/xml2/src/parser/xsd_elements.rs create mode 100644 engine/runtime/xml2/src/types.rs create mode 100644 engine/runtime/xml2/src/types/date.rs create mode 100644 engine/runtime/xml2/src/types/datetime.rs create mode 100644 engine/runtime/xml2/src/types/datetimestamp.rs create mode 100644 engine/runtime/xml2/src/types/decimal.rs create mode 100644 engine/runtime/xml2/src/types/duration.rs create mode 100644 engine/runtime/xml2/src/types/gday.rs create mode 100644 engine/runtime/xml2/src/types/gmonth.rs create mode 100644 engine/runtime/xml2/src/types/gmonthday.rs create mode 100644 engine/runtime/xml2/src/types/gyear.rs create mode 100644 engine/runtime/xml2/src/types/gyearmonth.rs create mode 100644 engine/runtime/xml2/src/types/integer.rs create mode 100644 engine/runtime/xml2/src/types/negative_integer.rs create mode 100644 engine/runtime/xml2/src/types/non_negative_integer.rs create mode 100644 engine/runtime/xml2/src/types/non_positive_integer.rs create mode 100644 engine/runtime/xml2/src/types/positive_integer.rs create mode 100644 engine/runtime/xml2/src/types/time.rs create mode 100644 engine/runtime/xml2/src/types/utils.rs create mode 100644 engine/runtime/xml2/src/types/yaserde.rs create mode 100644 engine/runtime/xml2/testcase/sample.xml diff --git a/engine/Cargo.lock b/engine/Cargo.lock index 4b9b16b16..c4837972f 100644 --- a/engine/Cargo.lock +++ b/engine/Cargo.lock @@ -501,6 +501,19 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" +[[package]] +name = "bigdecimal" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f850665a0385e070b64c38d2354e6c104c8479c59868d1e48a0c13ee2c7a1c1" +dependencies = [ + "autocfg", + "libm", + "num-bigint", + "num-integer", + "num-traits", +] + [[package]] name = "bincode" version = "1.3.3" @@ -1733,7 +1746,7 @@ checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" dependencies = [ "libc", "redox_users", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -2351,6 +2364,15 @@ dependencies = [ "thiserror 1.0.69", ] +[[package]] +name = "getopts" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14dbbfd5c71d70241ecf9e6f13737f7b5ce823821063188d7e46c41d371eebd5" +dependencies = [ + "unicode-width", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -2419,7 +2441,7 @@ dependencies = [ "gobject-sys", "libc", "system-deps 6.2.2", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -3502,6 +3524,16 @@ dependencies = [ "simple_asn1", ] +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +dependencies = [ + "winapi 0.2.8", + "winapi-build", +] + [[package]] name = "kinded" version = "0.3.0" @@ -4081,7 +4113,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" dependencies = [ "overload", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -6458,6 +6490,12 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cbf4a6aa5f6d6888f39e980649f3ad6b666acdce1d78e95b8a2cb076e687ae30" +[[package]] +name = "roxmltree" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c20b6793b5c2fa6553b250154b78d6d0db37e72700ae35fad9387a46f487c97" + [[package]] name = "rsa" version = "0.9.6" @@ -6905,6 +6943,18 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_tokenstream" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64060d864397305347a78851c51588fd283767e7e7589829e8121d65512340f1" +dependencies = [ + "proc-macro2", + "quote", + "serde", + "syn 2.0.87", +] + [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -7194,7 +7244,7 @@ checksum = "b6e022d0b998abfe5c3782c1f03551a596269450ccd677ea51c56f8b214610e8" dependencies = [ "is-terminal", "slog", - "term", + "term 0.7.0", "thread_local", "time", ] @@ -7216,7 +7266,7 @@ dependencies = [ "slog-stdlog", "slog-term", "trackable", - "winapi", + "winapi 0.3.9", "windows-acl", ] @@ -7834,6 +7884,16 @@ dependencies = [ "utf-8", ] +[[package]] +name = "term" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2077e54d38055cf1ca0fd7933a2e00cd3ec8f6fed352b2a377f06dcdaaf3281" +dependencies = [ + "kernel32-sys", + "winapi 0.2.8", +] + [[package]] name = "term" version = "0.7.0" @@ -7842,7 +7902,17 @@ checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f" dependencies = [ "dirs-next", "rustversion", - "winapi", + "winapi 0.3.9", +] + +[[package]] +name = "text-diff" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "309238dd66f8bf11a20d015b727b926f294a13fcb8d56770bb984e7a22c43897" +dependencies = [ + "getopts", + "term 0.2.14", ] [[package]] @@ -9412,6 +9482,12 @@ version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c168940144dd21fd8046987c16a46a33d5fc84eec29ef9dcddc2ac9e31526b7c" +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" + [[package]] name = "winapi" version = "0.3.9" @@ -9422,6 +9498,12 @@ dependencies = [ "winapi-x86_64-pc-windows-gnu", ] +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" + [[package]] name = "winapi-i686-pc-windows-gnu" version = "0.4.0" @@ -9488,7 +9570,7 @@ dependencies = [ "field-offset", "libc", "widestring", - "winapi", + "winapi 0.3.9", ] [[package]] @@ -9955,6 +10037,39 @@ dependencies = [ "rustix", ] +[[package]] +name = "xml-rs" +version = "0.8.23" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "af310deaae937e48a26602b730250b4949e125f468f11e6990be3e5304ddd96f" + +[[package]] +name = "xml2" +version = "0.1.0" +dependencies = [ + "Inflector", + "anyhow", + "bigdecimal", + "chrono", + "num-bigint", + "roxmltree", + "syn 2.0.87", + "text-diff", + "xml-rs", + "xml2-macro", + "yaserde", + "yaserde_derive", +] + +[[package]] +name = "xml2-macro" +version = "0.1.0" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.87", +] + [[package]] name = "xxhash-rust" version = "0.8.12" @@ -10002,6 +10117,32 @@ dependencies = [ "linked-hash-map", ] +[[package]] +name = "yaserde" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bfa0d2b420fd005aa9b6f99f9584ebd964e6865d7ca787304cc1a3366c39231" +dependencies = [ + "log", + "xml-rs", +] + +[[package]] +name = "yaserde_derive" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f785831c0e09e0f1a83f917054fd59c088f6561db5b2a42c1c3e1687329325f" +dependencies = [ + "heck 0.5.0", + "log", + "proc-macro2", + "quote", + "serde", + "serde_tokenstream", + "syn 2.0.87", + "xml-rs", +] + [[package]] name = "yoke" version = "0.7.5" diff --git a/engine/runtime/xml2-macro/Cargo.toml b/engine/runtime/xml2-macro/Cargo.toml new file mode 100644 index 000000000..7757a06a4 --- /dev/null +++ b/engine/runtime/xml2-macro/Cargo.toml @@ -0,0 +1,12 @@ +[package] +edition = "2018" +name = "xml2-macro" +version = "0.1.0" + +[dependencies] +proc-macro2 = "1" +quote = "1" +syn = "2" + +[lib] +proc-macro = true diff --git a/engine/runtime/xml2-macro/src/lib.rs b/engine/runtime/xml2-macro/src/lib.rs new file mode 100644 index 000000000..b7b288927 --- /dev/null +++ b/engine/runtime/xml2-macro/src/lib.rs @@ -0,0 +1,73 @@ +use proc_macro::TokenStream; +use quote::quote; +use syn::{parse_macro_input, DeriveInput}; + +mod tuple; +mod union; + +#[proc_macro_derive(UtilsTupleIo)] +pub fn tuple_serde(input: TokenStream) -> TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + tuple::serde(&ast) + .unwrap_or_else(|err| err.to_compile_error()) + .into() +} + +// Adds YaSerialize and YaDeserialize implementations for types that support FromStr and Display traits. +#[proc_macro_derive(UtilsDefaultSerde)] +pub fn default_serde(input: TokenStream) -> TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + + let struct_name = &ast.ident; + let struct_name_literal = &ast.ident.to_string(); + + let serde = quote! { + impl ::yaserde::YaSerialize for #struct_name { + fn serialize( + &self, + writer: &mut ::yaserde::ser::Serializer, + ) -> ::std::result::Result<(), ::std::string::String> { + crate::types::yaserde::serialize( + self, + #struct_name_literal, + writer, |s| s.to_string(), + ) + } + + fn serialize_attributes( + &self, + attributes: ::std::vec::Vec<::xml::attribute::OwnedAttribute>, + namespace: ::xml::namespace::Namespace, + ) -> ::std::result::Result< + ( + ::std::vec::Vec<::xml::attribute::OwnedAttribute>, + ::xml::namespace::Namespace, + ), + ::std::string::String, + > { + Ok((attributes, namespace)) + } + } + + impl ::yaserde::YaDeserialize for #struct_name { + fn deserialize( + reader: &mut ::yaserde::de::Deserializer, + ) -> ::std::result::Result { + crate::types::yaserde::deserialize( + reader, + |s| #struct_name::from_str(s).map_err(|e| e.to_string()), + ) + } + } + }; + + serde.into() +} + +#[proc_macro_derive(UtilsUnionSerDe)] +pub fn union_serde(input: TokenStream) -> TokenStream { + let ast = parse_macro_input!(input as DeriveInput); + union::serde(&ast) + .unwrap_or_else(|err| err.to_compile_error()) + .into() +} diff --git a/engine/runtime/xml2-macro/src/tuple.rs b/engine/runtime/xml2-macro/src/tuple.rs new file mode 100644 index 000000000..6131d2279 --- /dev/null +++ b/engine/runtime/xml2-macro/src/tuple.rs @@ -0,0 +1,146 @@ +use proc_macro2::TokenStream; +use quote::quote; +use syn::spanned::Spanned; + +enum Type<'a> { + Simple(&'a syn::Path), + String(&'a syn::Path), + Struct(&'a syn::Path), + Vec(&'a syn::Path), +} + +pub fn serde(ast: &syn::DeriveInput) -> syn::Result { + let from_str = from_str(ast)?; + let display = display(ast)?; + + Ok(quote! { + #from_str + #display + }) +} + +fn from_str(ast: &syn::DeriveInput) -> syn::Result { + let convert = match extract_field_type(ast) { + Type::String(_) => quote! { s.to_string() }, + Type::Struct(ty) | Type::Simple(ty) => { + quote! { <#ty as ::std::str::FromStr>::from_str(s).map_err(|e| e.to_string())? } + } + Type::Vec(subtype) => match Type::from_path(subtype) { + Type::String(subtype) | Type::Struct(subtype) | Type::Simple(subtype) => quote! { + s.split_whitespace() + .filter_map(|s| <#subtype as ::std::str::FromStr>::from_str(s).ok()) + .collect() + }, + _ => { + return Err(syn::Error::new( + subtype.span(), + "Not implemented for this subtype", + )) + } + }, + }; + + let struct_name = &ast.ident; + + Ok(quote! { + impl ::std::str::FromStr for #struct_name { + type Err = ::std::string::String; + + fn from_str(s: &::std::primitive::str) -> ::std::result::Result { + Ok(#struct_name(#convert)) + } + } + }) +} + +fn display(ast: &syn::DeriveInput) -> syn::Result { + let write = match extract_field_type(ast) { + Type::String(_) | Type::Simple(_) | Type::Struct(_) => quote! { + write!(f, "{}", self.0) + }, + Type::Vec(subtype) => match Type::from_path(subtype) { + Type::String(_) | Type::Simple(_) | Type::Struct(_) => quote! { + let mut it = self.0.iter(); + if let Some(val) = it.next() { + write!(f, "{}", val)?; + } + for val in it { + write!(f, " {}", val)?; + } + + Ok(()) + }, + _ => { + return Err(syn::Error::new( + subtype.span(), + "Not implemented for this subtype", + )) + } + }, + }; + + let struct_name = &ast.ident; + + Ok(quote! { + impl std::fmt::Display for #struct_name { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + #write + } + } + }) +} + +impl Type<'_> { + pub fn from_path(path: &syn::Path) -> Type { + match path + .segments + .last() + .expect("Empty type") + .ident + .to_string() + .as_str() + { + "bool" | "i8" | "u8" | "i16" | "u16" | "i32" | "u32" | "i64" | "u64" | "f32" + | "f64" => Type::Simple(path), + "String" => Type::String(path), + "Vec" => Type::Vec( + extract_subtype(path.segments.last().expect("Missing subtype")) + .expect("Vec subtype not found"), + ), + _ => Type::Struct(path), + } + } +} + +fn extract_field_type(ast: &syn::DeriveInput) -> Type { + match &ast.data { + syn::Data::Struct(data_struct) => { + let field_path = extract_field_path(data_struct).expect("Bad field count or type"); + + Type::from_path(field_path) + } + _ => unimplemented!("Implemented only for structs"), + } +} + +fn extract_field_path(data_struct: &syn::DataStruct) -> Option<&syn::Path> { + if let syn::Fields::Unnamed(fields) = &data_struct.fields { + if let Some(field) = fields.unnamed.first() { + if let syn::Type::Path(path) = &field.ty { + return Some(&path.path); + } + } + } + + None +} + +fn extract_subtype(path: &syn::PathSegment) -> Option<&syn::Path> { + if let syn::PathArguments::AngleBracketed(args) = &path.arguments { + if let Some(syn::GenericArgument::Type(syn::Type::Path(path))) = args.args.last() { + return Some(&path.path); + } + } + + None +} diff --git a/engine/runtime/xml2-macro/src/union.rs b/engine/runtime/xml2-macro/src/union.rs new file mode 100644 index 000000000..37f0508f9 --- /dev/null +++ b/engine/runtime/xml2-macro/src/union.rs @@ -0,0 +1,106 @@ +use proc_macro2::{Span, TokenStream}; +use quote::quote; + +pub fn serde(ast: &syn::DeriveInput) -> syn::Result { + let struct_name = &ast.ident; + let struct_name_literal = &ast.ident.to_string(); + + let variants = match &ast.data { + syn::Data::Enum(data_enum) => data_enum + .variants + .iter() + .filter(|variant| &variant.ident.to_string() != "__Unknown__") + .map(|variant| { + let subtype = match &variant.fields { + syn::Fields::Unnamed(fields) => { + &fields + .unnamed + .first() + .ok_or_else(|| { + syn::Error::new_spanned( + fields, + "One unnamed field per variant is expected", + ) + })? + .ty + } + fields => { + return Err(syn::Error::new_spanned( + fields, + "Only unnamed fields are supported", + )); + } + }; + + Ok((&variant.ident, subtype)) + }) + .collect::>>()?, + _ => { + return Err(syn::Error::new( + Span::call_site(), + "This macro can only be used on enums", + )); + } + }; + + let ser_variants = variants + .iter() + .map(|(ident, _subtype)| { + quote! { + #struct_name::#ident(val) => val.to_string(), + } + }) + .collect::(); + + let de_variants = variants + .iter() + .map(|(ident, subtype)| { + quote! { + if let Ok(de) = s.parse::<#subtype>() { + return Ok(#struct_name::#ident(de)) + } + } + }) + .collect::(); + + Ok(quote! { + impl ::yaserde::YaSerialize for #struct_name { + fn serialize( + &self, + writer: &mut ::yaserde::ser::Serializer, + ) -> ::std::result::Result<(), ::std::string::String> { + crate::types::yaserde::serialize(self, #struct_name_literal, writer, |s| { + match s { + #ser_variants + #struct_name::__Unknown__(_) => "".to_string() + } + }) + } + + fn serialize_attributes( + &self, + attributes: ::std::vec::Vec<::xml::attribute::OwnedAttribute>, + namespace: ::xml::namespace::Namespace, + ) -> ::std::result::Result< + ( + Vec<::xml::attribute::OwnedAttribute>, + ::xml::namespace::Namespace, + ), + ::std::string::String, + > { + Ok((attributes, namespace)) + } + } + + impl ::yaserde::YaDeserialize for #struct_name { + fn deserialize( + reader: &mut ::yaserde::de::Deserializer, + ) -> ::std::result::Result { + crate::types::yaserde::deserialize(reader, |s| { + #de_variants + Ok(#struct_name::__Unknown__(s.to_string())) + }) + } + } + }) +} diff --git a/engine/runtime/xml2/Cargo.toml b/engine/runtime/xml2/Cargo.toml new file mode 100644 index 000000000..0555aa6f4 --- /dev/null +++ b/engine/runtime/xml2/Cargo.toml @@ -0,0 +1,19 @@ +[package] +edition = "2021" +name = "xml2" +version = "0.1.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[dependencies] +Inflector = "0.11" +anyhow = "1.0" +bigdecimal = "0.4" +chrono = "0.4" +num-bigint = "0.4" +roxmltree = "0.20" +syn = { version = "2", features = ["extra-traits", "full"] } +text-diff = "0.4" +xml-rs = "0.8" +xml2-macro = { path = "../xml2-macro" } +yaserde = "0.12" +yaserde_derive = "0.12" diff --git a/engine/runtime/xml2/src/lib.rs b/engine/runtime/xml2/src/lib.rs new file mode 100644 index 000000000..07295a2b4 --- /dev/null +++ b/engine/runtime/xml2/src/lib.rs @@ -0,0 +1,2 @@ +pub mod parser; +pub mod types; diff --git a/engine/runtime/xml2/src/parser.rs b/engine/runtime/xml2/src/parser.rs new file mode 100644 index 000000000..9519cbc2a --- /dev/null +++ b/engine/runtime/xml2/src/parser.rs @@ -0,0 +1,59 @@ +mod all; +mod any; +mod any_attribute; +mod attribute; +mod attribute_group; +mod choice; +mod complex_content; +mod complex_type; +pub mod constants; +mod element; +mod extension; +mod import; +mod list; +mod node_parser; +mod restriction; +pub mod schema; +mod sequence; +mod simple_content; +mod simple_type; +pub mod types; +mod union; +mod utils; +pub mod xsd_elements; + +use std::collections::HashMap; + +use crate::parser::{ + schema::parse_schema, + types::{RsEntity, RsFile}, +}; + +pub fn parse(text: &str) -> anyhow::Result { + let doc = roxmltree::Document::parse(text)?; + let root = doc.root(); + let mut map = HashMap::new(); + let schema = root + .children() + .filter(|e| e.is_element()) + .last() + .ok_or(anyhow::anyhow!("No schema found"))?; + let schema_rs = parse_schema(&schema); + for ty in &schema_rs.types { + if let RsEntity::Struct(st) = ty { + map.extend(st.get_types_map()); + } + } + for ag in &schema_rs.attribute_groups { + if let RsEntity::Struct(st) = ag { + map.extend(st.get_types_map()); + } + } + for ty in &schema_rs.types { + if let RsEntity::Struct(st) = ty { + st.extend_base(&map); + st.extend_attribute_group(&map); + } + } + Ok(schema_rs) +} diff --git a/engine/runtime/xml2/src/parser/all.rs b/engine/runtime/xml2/src/parser/all.rs new file mode 100644 index 000000000..04d3e9bfe --- /dev/null +++ b/engine/runtime/xml2/src/parser/all.rs @@ -0,0 +1,41 @@ +use std::cell::RefCell; + +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::{RsEntity, Struct, StructField, TypeModifier}, + utils::{enum_to_field, get_documentation, get_parent_name}, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_all(node: &Node, parent: &Node) -> RsEntity { + let name = get_parent_name(node); + RsEntity::Struct(Struct { + name: name.into(), + comment: get_documentation(parent), + subtypes: vec![], + fields: RefCell::new(elements_to_fields(node, name)), + ..Default::default() + }) +} + +fn elements_to_fields(choice: &Node, parent_name: &str) -> Vec { + choice + .children() + .filter(|n| n.is_element() && n.xsd_type() != ElementType::Annotation) + .map(|n| match parse_node(&n, choice) { + RsEntity::StructField(mut sf) => { + if sf.type_name.ends_with(parent_name) { + sf.type_modifiers.push(TypeModifier::Recursive) + } + sf + } + RsEntity::Enum(mut en) => { + en.name = format!("{}Choice", parent_name); + enum_to_field(en) + } + _ => unreachable!("\nError: {:?}\n{:?}", n, parse_node(&n, choice)), + }) + .collect() +} diff --git a/engine/runtime/xml2/src/parser/any.rs b/engine/runtime/xml2/src/parser/any.rs new file mode 100644 index 000000000..b73cbcb9f --- /dev/null +++ b/engine/runtime/xml2/src/parser/any.rs @@ -0,0 +1,17 @@ +use roxmltree::Node; + +use crate::parser::{ + types::{RsEntity, StructField, StructFieldSource, TypeModifier}, + utils::get_documentation, +}; + +pub fn parse_any(node: &Node) -> RsEntity { + RsEntity::StructField(StructField { + name: "any".to_string(), + type_name: "String".to_string(), + comment: get_documentation(node), + source: StructFieldSource::Element, + type_modifiers: vec![TypeModifier::Empty], + ..Default::default() + }) +} diff --git a/engine/runtime/xml2/src/parser/any_attribute.rs b/engine/runtime/xml2/src/parser/any_attribute.rs new file mode 100644 index 000000000..b3a89f5f7 --- /dev/null +++ b/engine/runtime/xml2/src/parser/any_attribute.rs @@ -0,0 +1,17 @@ +use roxmltree::Node; + +use crate::parser::{ + types::{RsEntity, StructField, StructFieldSource, TypeModifier}, + utils::get_documentation, +}; + +pub fn parse_any_attribute(node: &Node) -> RsEntity { + RsEntity::StructField(StructField { + name: "any_attribute".to_string(), + type_name: "String".to_string(), + comment: get_documentation(node), + source: StructFieldSource::Attribute, + type_modifiers: vec![TypeModifier::Empty], + ..Default::default() + }) +} diff --git a/engine/runtime/xml2/src/parser/attribute.rs b/engine/runtime/xml2/src/parser/attribute.rs new file mode 100644 index 000000000..85ba7f37e --- /dev/null +++ b/engine/runtime/xml2/src/parser/attribute.rs @@ -0,0 +1,80 @@ +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::{Alias, RsEntity, Struct, StructField, StructFieldSource, TypeModifier}, + utils::get_documentation, + xsd_elements::{ElementType, UseType, XsdNode}, +}; + +pub fn parse_attribute(node: &Node, parent: &Node) -> RsEntity { + if parent.xsd_type() == ElementType::Schema { + return parse_global_attribute(node); + } + + let name = node + .attr_name() + .or_else(|| node.attr_ref()) + .expect("All attributes have name or ref") + .to_string(); + + let type_name = node + .attr_type() + .or_else(|| node.attr_ref()) + .unwrap_or("String") + .to_string(); + + let type_modifier = match node.attr_use() { + UseType::Optional => TypeModifier::Option, + UseType::Prohibited => TypeModifier::Empty, + UseType::Required => TypeModifier::None, + }; + + RsEntity::StructField(StructField { + type_name, + comment: get_documentation(node), + subtypes: vec![], + name, + source: StructFieldSource::Attribute, + type_modifiers: vec![type_modifier], + }) +} + +fn parse_global_attribute(node: &Node) -> RsEntity { + if let Some(reference) = node.attr_ref() { + return RsEntity::Alias(Alias { + name: reference.to_string(), + original: reference.to_string(), + comment: get_documentation(node), + ..Default::default() + }); + } + + let name = node + .attr_name() + .unwrap_or_else(|| panic!("Name attribute required. {:?}", node)); + + if let Some(ty) = node.attr_type() { + return RsEntity::Alias(Alias { + name: name.to_string(), + original: ty.to_string(), + comment: get_documentation(node), + ..Default::default() + }); + } + + if let Some(content) = node + .children() + .filter(|n| n.is_element() && n.xsd_type() == ElementType::SimpleType) + .last() + { + let mut entity = parse_node(&content, node); + entity.set_name(name); + return entity; + } + + RsEntity::Struct(Struct { + name: name.to_string(), + ..Default::default() + }) +} diff --git a/engine/runtime/xml2/src/parser/attribute_group.rs b/engine/runtime/xml2/src/parser/attribute_group.rs new file mode 100644 index 000000000..8fb2f80c5 --- /dev/null +++ b/engine/runtime/xml2/src/parser/attribute_group.rs @@ -0,0 +1,52 @@ +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::{Alias, RsEntity, Struct, StructField}, + utils::get_documentation, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_attribute_group(node: &Node, parent: &Node) -> RsEntity { + if parent.xsd_type() == ElementType::Schema { + return parse_global_attribute_group(node); + } + + let reference = node + .attr_ref() + .expect("Non-global attributeGroups must be references.") + .to_string(); + + RsEntity::Alias(Alias { + name: reference.to_string(), + original: reference, + comment: get_documentation(node), + ..Default::default() + }) +} + +fn parse_global_attribute_group(node: &Node) -> RsEntity { + let name = node + .attr_name() + .unwrap_or_else(|| panic!("Name attribute required. {:?}", node)); + + let fields = attributes_to_fields(node); + + RsEntity::Struct(Struct { + name: name.to_string(), + fields: std::cell::RefCell::new(fields), + ..Default::default() + }) +} + +pub fn attributes_to_fields(node: &Node) -> Vec { + node.children() + .filter(|n| { + n.xsd_type() == ElementType::Attribute || n.xsd_type() == ElementType::AnyAttribute + }) + .map(|n| match parse_node(&n, node) { + RsEntity::StructField(sf) => sf, + _ => unreachable!("Invalid attribute parsing: {:?}", n), + }) + .collect() +} diff --git a/engine/runtime/xml2/src/parser/choice.rs b/engine/runtime/xml2/src/parser/choice.rs new file mode 100644 index 000000000..f28661304 --- /dev/null +++ b/engine/runtime/xml2/src/parser/choice.rs @@ -0,0 +1,25 @@ +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::{Enum, EnumSource, RsEntity}, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_choice(choice: &Node) -> RsEntity { + let enum_cases = choice + .children() + .filter(|n| n.is_element() && n.xsd_type() == ElementType::Element) + .map(|n| match parse_node(&n, choice) { + RsEntity::EnumCase(case) => case, + _ => unreachable!("Elements in choice must be a enum variants"), + }) + .collect(); + + RsEntity::Enum(Enum { + cases: enum_cases, + type_name: "String".to_string(), + source: EnumSource::Choice, + ..Default::default() + }) +} diff --git a/engine/runtime/xml2/src/parser/complex_content.rs b/engine/runtime/xml2/src/parser/complex_content.rs new file mode 100644 index 000000000..641e2457c --- /dev/null +++ b/engine/runtime/xml2/src/parser/complex_content.rs @@ -0,0 +1,17 @@ +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::RsEntity, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_complex_content(node: &Node) -> RsEntity { + let content = node + .children() + .filter(|n| n.is_element() && n.xsd_type() != ElementType::Annotation) + .last() + .expect("Content in complexContent required"); + + parse_node(&content, node) +} diff --git a/engine/runtime/xml2/src/parser/complex_type.rs b/engine/runtime/xml2/src/parser/complex_type.rs new file mode 100644 index 000000000..3b2715593 --- /dev/null +++ b/engine/runtime/xml2/src/parser/complex_type.rs @@ -0,0 +1,82 @@ +use std::cell::RefCell; + +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::{RsEntity, Struct, StructField, StructFieldSource}, + utils::{ + attribute_groups_to_aliases, attributes_to_fields, get_documentation, get_parent_name, + }, + xsd_elements::{ElementType, XsdNode}, +}; + +const AVAILABLE_CONTENT_TYPES: [ElementType; 6] = [ + ElementType::All, //No in ONVIF + ElementType::Choice, + ElementType::ComplexContent, + ElementType::Group, //No in ONVIF + ElementType::Sequence, + ElementType::SimpleContent, +]; + +pub fn parse_complex_type(node: &Node, parent: &Node) -> RsEntity { + let name = if parent.xsd_type() == ElementType::Schema { + node.attr_name() + .expect("Name required if the complexType element is a child of the schema element") + } else { + get_parent_name(node) + }; + + let mut fields = attributes_to_fields(node); + + let content = node + .children() + .filter(|n| n.is_element() && AVAILABLE_CONTENT_TYPES.contains(&n.xsd_type())) + .last(); + + if content.is_none() + || content + .unwrap() + .children() + .filter(|n| n.is_element()) + .count() + == 0 + { + // attribute のみの場合 + return RsEntity::Struct(Struct { + fields: RefCell::new(fields), + attribute_groups: RefCell::new(attribute_groups_to_aliases(node)), + comment: get_documentation(node), + subtypes: vec![], + name: name.to_string(), + }); + } + let content_node = content.unwrap(); + + let mut res = parse_node(&content_node, node); + match &mut res { + RsEntity::Struct(st) => { + st.fields.borrow_mut().append(&mut fields); + st.name = name.to_string(); + } + RsEntity::Enum(en) => { + en.name = format!("{}Choice", name); + fields.push(StructField { + name: en.name.clone(), + type_name: en.name.clone(), + source: StructFieldSource::Choice, + ..Default::default() + }); + en.subtypes = vec![RsEntity::Struct(Struct { + name: name.to_string(), + subtypes: vec![], + comment: get_documentation(node), + fields: RefCell::new(fields), + attribute_groups: RefCell::new(attribute_groups_to_aliases(node)), + })]; + } + _ => (), + }; + res +} diff --git a/engine/runtime/xml2/src/parser/constants.rs b/engine/runtime/xml2/src/parser/constants.rs new file mode 100644 index 000000000..010812755 --- /dev/null +++ b/engine/runtime/xml2/src/parser/constants.rs @@ -0,0 +1,19 @@ +pub mod tag { + pub const BASE: &str = "__base__"; +} + +pub mod attribute { + pub const NAME: &str = "name"; + pub const TYPE: &str = "type"; + pub const REF: &str = "ref"; + pub const VALUE: &str = "value"; + pub const ITEM_TYPE: &str = "itemType"; + pub const BASE: &str = "base"; + pub const USE: &str = "use"; + pub const TARGET_NAMESPACE: &str = "targetNamespace"; + pub const NAMESPACE: &str = "namespace"; + pub const SCHEMA_LOCATION: &str = "schemaLocation"; + pub const MIN_OCCURS: &str = "minOccurs"; + pub const MAX_OCCURS: &str = "maxOccurs"; + pub const MEMBER_TYPES: &str = "memberTypes"; +} diff --git a/engine/runtime/xml2/src/parser/element.rs b/engine/runtime/xml2/src/parser/element.rs new file mode 100644 index 000000000..fff968924 --- /dev/null +++ b/engine/runtime/xml2/src/parser/element.rs @@ -0,0 +1,180 @@ +use roxmltree::Node; + +use crate::parser::{ + constants::attribute, + node_parser::parse_node, + types::{ + Alias, EnumCase, EnumSource, RsEntity, Struct, StructField, StructFieldSource, TypeModifier, + }, + utils::get_documentation, + xsd_elements::{max_occurs, min_occurs, ElementType, MaxOccurs, XsdNode}, +}; + +const SUPPORTED_CONTENT_TYPES: [ElementType; 2] = + [ElementType::SimpleType, ElementType::ComplexType]; + +pub fn parse_element(node: &Node, parent: &Node) -> RsEntity { + match parent.xsd_type() { + ElementType::Schema => parse_global_element(node), + ElementType::All => parse_field_of_sequence(node, parent), + ElementType::Sequence => parse_field_of_sequence(node, parent), + ElementType::Choice => parse_case_of_choice(node), + _ => element_default(node), + } +} + +fn element_default(node: &Node) -> RsEntity { + let ty = node.attr_type().unwrap_or("UNSUPPORTED"); + RsEntity::Alias(Alias { + name: "UNSUPPORTED".into(), + original: ty.into(), + comment: get_documentation(node), + subtypes: vec![], + }) +} + +fn parse_case_of_choice(element: &Node) -> RsEntity { + if element.has_attribute(attribute::REF) { + let ref_attr = element.attr_ref().unwrap(); + + return RsEntity::EnumCase(EnumCase { + name: ref_attr.to_string(), + value: String::default(), + type_name: Some(ref_attr.to_string()), + comment: get_documentation(element), + type_modifiers: vec![element_modifier(element)], + source: EnumSource::Choice, + }); + } + + let name = element.attr_name().unwrap_or("UNSUPPORTED_ELEMENT_NAME"); + + if element.has_attribute(attribute::TYPE) { + return RsEntity::EnumCase(EnumCase { + name: name.to_string(), + value: String::default(), + type_name: Some(element.attr_type().unwrap().to_string()), + comment: get_documentation(element), + type_modifiers: vec![element_modifier(element)], + source: EnumSource::Choice, + }); + } + + RsEntity::EnumCase(EnumCase { + name: name.to_string(), + value: String::default(), + type_name: None, + comment: get_documentation(element), + type_modifiers: vec![element_modifier(element)], + source: EnumSource::Choice, + }) +} + +fn parse_field_of_sequence(node: &Node, _: &Node) -> RsEntity { + let name = node + .attr_name() + .unwrap_or_else(|| node.attr_ref().unwrap_or("UNSUPPORTED_ELEMENT_NAME")) + .to_string(); + + if node.has_attribute(attribute::TYPE) || node.has_attribute(attribute::REF) { + let type_name = node + .attr_type() + .unwrap_or_else(|| node.attr_ref().unwrap_or("String")) + .to_string(); + + return RsEntity::StructField(StructField { + name, + type_name, + comment: get_documentation(node), + source: StructFieldSource::Element, + type_modifiers: vec![element_modifier(node)], + ..Default::default() + }); + } + + let content_node = node + .children() + .filter(|n| SUPPORTED_CONTENT_TYPES.contains(&n.xsd_type())) + .last() + .unwrap_or_else(|| { + panic!( + "Must have content if no 'type' or 'ref' attribute: {:?}", + node + ) + }); + + let mut field_type = parse_node(&content_node, node); + + field_type.set_name(format!("{}Type", name).as_str()); + + RsEntity::StructField(StructField { + name, + type_name: field_type.name().to_string(), + comment: get_documentation(node), + subtypes: vec![field_type], + source: StructFieldSource::Element, + type_modifiers: vec![element_modifier(node)], + }) +} + +fn parse_global_element(node: &Node) -> RsEntity { + let name = node + .attr_name() + .expect("Name required if the element is a child of the schema"); + + if node.has_attribute(attribute::TYPE) { + return RsEntity::Alias(Alias { + name: name.into(), + original: node.attr_type().unwrap().into(), + comment: get_documentation(node), + subtypes: vec![], + }); + } + + let content_node = node + .children() + .filter(|n| SUPPORTED_CONTENT_TYPES.contains(&n.xsd_type())) + .last(); + + if let Some(content) = content_node { + let mut content_entity = parse_node(&content, node); + content_entity.set_name(name); + return content_entity; + } + + RsEntity::Struct(Struct { + name: name.to_string(), + comment: get_documentation(node), + ..Default::default() + }) +} + +pub fn element_modifier(node: &Node) -> TypeModifier { + let min = min_occurs(node); + let max = max_occurs(node); + match min { + 0 => match max { + MaxOccurs::None => TypeModifier::Option, + MaxOccurs::Unbounded => TypeModifier::Array, + MaxOccurs::Bounded(val) => { + if val > 1 { + TypeModifier::Array + } else { + TypeModifier::None + } + } + }, + 1 => match max { + MaxOccurs::None => TypeModifier::None, + MaxOccurs::Unbounded => TypeModifier::Array, + MaxOccurs::Bounded(val) => { + if val > 1 { + TypeModifier::Array + } else { + TypeModifier::None + } + } + }, + _ => TypeModifier::Array, + } +} diff --git a/engine/runtime/xml2/src/parser/extension.rs b/engine/runtime/xml2/src/parser/extension.rs new file mode 100644 index 000000000..e62bd5c4b --- /dev/null +++ b/engine/runtime/xml2/src/parser/extension.rs @@ -0,0 +1,91 @@ +use std::cell::RefCell; + +use roxmltree::Node; + +use crate::parser::{ + constants::{attribute, tag}, + node_parser::parse_node, + types::{RsEntity, Struct, StructField, StructFieldSource}, + utils::{attribute_groups_to_aliases, attributes_to_fields, get_base, get_documentation}, + xsd_elements::{ElementType, ExtensionType, XsdNode}, +}; + +const AVAILABLE_CONTENT_TYPES: [ElementType; 6] = [ + ElementType::All, + ElementType::Attribute, + ElementType::AttributeGroup, + ElementType::Choice, + ElementType::Group, + ElementType::Sequence, +]; + +pub fn parse_extension(node: &Node, _: &Node) -> RsEntity { + use ElementType::Extension; + match node.xsd_type() { + Extension(ExtensionType::SimpleContent) => simple_content_extension(node), + Extension(ExtensionType::ComplexContent) => complex_content_extension(node), + _ => unreachable!("Invalid extension node: {:#?}", node), + } +} + +fn simple_content_extension(node: &Node) -> RsEntity { + let base = get_base(node); + let mut fields = attributes_to_fields(node); + + fields.push(StructField { + name: tag::BASE.to_string(), + type_name: base.to_string(), + comment: get_documentation(node), + source: StructFieldSource::Base, + ..Default::default() + }); + + RsEntity::Struct(Struct { + name: String::default(), + subtypes: vec![], + comment: get_documentation(node), + fields: RefCell::new(fields), + attribute_groups: RefCell::new(attribute_groups_to_aliases(node)), + }) +} + +fn complex_content_extension(node: &Node) -> RsEntity { + let base = node + .attribute(attribute::BASE) + .expect("The base value is required"); + + let mut fields = attributes_to_fields(node); + + fields.push(StructField { + name: tag::BASE.to_string(), + type_name: base.to_string(), + comment: get_documentation(node), + source: StructFieldSource::Base, + ..Default::default() + }); + + let content = node + .children() + .filter(|n| { + n.is_element() + && n.xsd_type() != ElementType::Attribute + && AVAILABLE_CONTENT_TYPES.contains(&n.xsd_type()) + }) + .last(); + + if let Some(cont) = content { + let mut res = parse_node(&cont, node); + if let RsEntity::Struct(s) = &mut res { + s.fields.borrow_mut().append(&mut fields); + s.comment = get_documentation(node); + return res; + } + } + + RsEntity::Struct(Struct { + comment: get_documentation(node), + fields: RefCell::new(fields), + attribute_groups: RefCell::new(attribute_groups_to_aliases(node)), + ..Default::default() + }) +} diff --git a/engine/runtime/xml2/src/parser/import.rs b/engine/runtime/xml2/src/parser/import.rs new file mode 100644 index 000000000..c19d9937c --- /dev/null +++ b/engine/runtime/xml2/src/parser/import.rs @@ -0,0 +1,14 @@ +use roxmltree::Node; + +use crate::parser::{ + constants::attribute, + types::{Import, RsEntity}, +}; + +pub fn parse_import(node: &Node) -> RsEntity { + RsEntity::Import(Import { + name: node.attribute(attribute::NAMESPACE).unwrap_or("").into(), + location: node.attribute(attribute::SCHEMA_LOCATION).unwrap_or("").into(), + comment: None, + }) +} diff --git a/engine/runtime/xml2/src/parser/list.rs b/engine/runtime/xml2/src/parser/list.rs new file mode 100644 index 000000000..a5db2c152 --- /dev/null +++ b/engine/runtime/xml2/src/parser/list.rs @@ -0,0 +1,31 @@ +use roxmltree::Node; + +use crate::parser::{ + constants::attribute, + node_parser::parse_node, + types::{RsEntity, TupleStruct, TypeModifier}, + utils::find_child, +}; + +pub fn parse_list(list: &Node) -> RsEntity { + let mut result = match list.attribute(attribute::ITEM_TYPE) { + Some(item_type) => TupleStruct { type_name: item_type.to_string(), ..Default::default() }, + None => { + let nested_simple_type = find_child(list, "simpleType").expect( + "itemType not allowed if the content contains a simpleType element. Otherwise, required." + ); + + match parse_node(&nested_simple_type, list) { + RsEntity::Enum(en) => TupleStruct { + type_name: en.name.clone(), + subtypes: vec![RsEntity::Enum(en)], + ..Default::default() + }, + RsEntity::TupleStruct(ts) => ts, + _ => unreachable!(), + } + } + }; + result.type_modifiers.push(TypeModifier::Array); + RsEntity::TupleStruct(result) +} diff --git a/engine/runtime/xml2/src/parser/node_parser.rs b/engine/runtime/xml2/src/parser/node_parser.rs new file mode 100644 index 000000000..2cc021951 --- /dev/null +++ b/engine/runtime/xml2/src/parser/node_parser.rs @@ -0,0 +1,49 @@ +use roxmltree::Node; + +use crate::parser::{ + all::parse_all, + any::parse_any, + any_attribute::parse_any_attribute, + attribute::parse_attribute, + attribute_group::parse_attribute_group, + choice::parse_choice, + complex_content::parse_complex_content, + complex_type::parse_complex_type, + element::parse_element, + extension::parse_extension, + import::parse_import, + list::parse_list, + restriction::parse_restriction, + sequence::parse_sequence, + simple_content::parse_simple_content, + simple_type::parse_simple_type, + types::RsEntity, + union::parse_union, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_node(node: &Node, parent: &Node) -> RsEntity { + use ElementType::*; + + match node.xsd_type() { + All => parse_all(node, parent), + Any => parse_any(node), + AnyAttribute => parse_any_attribute(node), + Attribute => parse_attribute(node, parent), + AttributeGroup => parse_attribute_group(node, parent), + Choice => parse_choice(node), + ComplexContent => parse_complex_content(node), + ComplexType => parse_complex_type(node, parent), + Element => parse_element(node, parent), + Extension(_) => parse_extension(node, parent), + Import | Include => parse_import(node), + List => parse_list(node), + Restriction(_) => parse_restriction(node, parent), + Sequence => parse_sequence(node, parent), + SimpleContent => parse_simple_content(node), + SimpleType => parse_simple_type(node, parent), + Union => parse_union(node), + + _ => unreachable!("Unsupported node:\n {:?}\nparent = {:?}\n", node, node.parent()), + } +} diff --git a/engine/runtime/xml2/src/parser/restriction.rs b/engine/runtime/xml2/src/parser/restriction.rs new file mode 100644 index 000000000..cba3a85b5 --- /dev/null +++ b/engine/runtime/xml2/src/parser/restriction.rs @@ -0,0 +1,145 @@ +use std::cell::RefCell; + +use roxmltree::Node; + +use crate::parser::{ + constants::tag, + node_parser::parse_node, + types::{ + Enum, EnumCase, EnumSource, Facet, RsEntity, Struct, StructField, StructFieldSource, + TupleStruct, + }, + utils::{ + attribute_groups_to_aliases, attributes_to_fields, get_base, get_documentation, + get_parent_name, + }, + xsd_elements::{ElementType, FacetType, RestrictionType, XsdNode}, +}; + +const AVAILABLE_CONTENT_TYPES: [ElementType; 7] = [ + ElementType::All, // Not presented in ONVIF + ElementType::AnyAttribute, + ElementType::Attribute, + ElementType::AttributeGroup, // Not presented in ONVIF + ElementType::Choice, // Not presented in ONVIF + ElementType::Group, // Not presented in ONVIF + ElementType::Sequence, // Not presented in ONVIF +]; + +pub fn parse_restriction(node: &Node, _: &Node) -> RsEntity { + use ElementType::Restriction; + match node.xsd_type() { + Restriction(RestrictionType::SimpleType) => simple_type_restriction(node), + Restriction(RestrictionType::SimpleContent) => simple_content_restriction(node), + Restriction(RestrictionType::ComplexContent) => complex_content_restriction(node), + _ => unreachable!("Invalid restriction node: {:#?}", node), + } +} + +fn simple_type_restriction(node: &Node) -> RsEntity { + let base = get_base(node); + let facets = facets(node); + + if is_simple_enumerations(node) { + let cases = cases(facets.as_ref()); + if !cases.is_empty() { + return RsEntity::Enum(Enum { + name: format!("{}Enum", get_parent_name(node)), + cases, + type_name: base.to_string(), + source: EnumSource::Restriction, + ..Default::default() + }); + } + }; + + RsEntity::TupleStruct(TupleStruct { + type_name: base.to_string(), + facets, + ..Default::default() + }) +} + +fn simple_content_restriction(node: &Node) -> RsEntity { + unimplemented!("\n{:?}\n", node) +} + +fn complex_content_restriction(node: &Node) -> RsEntity { + let base = get_base(node); + let mut fields = attributes_to_fields(node); + + fields.push(StructField { + name: tag::BASE.to_string(), + type_name: base.to_string(), + comment: get_documentation(node), + source: StructFieldSource::Base, + ..Default::default() + }); + + let content = node + .children() + .filter(|n| { + n.is_element() + && n.xsd_type() != ElementType::Attribute + && AVAILABLE_CONTENT_TYPES.contains(&n.xsd_type()) + }) + .last(); + + if let Some(cont) = content { + let mut res = parse_node(&cont, node); + if let RsEntity::Struct(s) = &mut res { + s.comment = get_documentation(node); + s.fields.borrow_mut().append(&mut fields); + return res; + } + } + + RsEntity::Struct(Struct { + comment: get_documentation(node), + fields: RefCell::new(fields), + attribute_groups: RefCell::new(attribute_groups_to_aliases(node)), + ..Default::default() + }) +} + +fn facets(node: &Node) -> Vec { + node.children() + .filter_map(|n| match n.xsd_type() { + ElementType::Facet(x) => Some(Facet { + facet_type: x, + comment: get_documentation(&n), + }), + _ => None, + }) + .collect() +} + +fn cases(facets: &[Facet]) -> Vec { + facets + .iter() + .filter_map(|f| match &f.facet_type { + FacetType::Enumeration(value) => Some(EnumCase { + comment: f.comment.clone(), + name: value.clone(), + value: value.clone(), + type_name: None, + type_modifiers: vec![], + source: EnumSource::Restriction, + }), + _ => None, + }) + .collect() +} + +fn is_simple_enumerations(node: &Node) -> bool { + node.children() + .filter(|n| matches!(n.xsd_type(), ElementType::Facet(FacetType::Enumeration(_)))) + .all(|n| is_simple_enumeration(&n)) +} + +fn is_simple_enumeration(node: &Node) -> bool { + let val = node + .attr_value() + .expect("Value required for xsd:enumeration"); + !val.is_empty() && val.chars().all(|c| c.is_alphanumeric() || c == '-') +} diff --git a/engine/runtime/xml2/src/parser/schema.rs b/engine/runtime/xml2/src/parser/schema.rs new file mode 100644 index 000000000..e70189f67 --- /dev/null +++ b/engine/runtime/xml2/src/parser/schema.rs @@ -0,0 +1,39 @@ +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::RsFile, + utils::target_namespace, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_schema<'input>(schema: &Node<'_, 'input>) -> RsFile<'input> { + let mut xsd_namespaces = schema + .namespaces() + .filter(|namespace| namespace.uri() == "http://www.w3.org/2001/XMLSchema"); + + RsFile { + name: "".into(), + namespace: None, + target_ns: target_namespace(schema).cloned(), + xsd_ns: xsd_namespaces + .clone() + .find(|namespace| namespace.name().is_some()) + .or_else(|| xsd_namespaces.next()) + .cloned(), + types: schema + .children() + .filter(|n| { + n.is_element() + && n.xsd_type() != ElementType::Annotation + && n.xsd_type() != ElementType::AttributeGroup + }) + .map(|node| parse_node(&node, schema)) + .collect(), + attribute_groups: schema + .children() + .filter(|n| n.is_element() && n.xsd_type() == ElementType::AttributeGroup) + .map(|node| parse_node(&node, schema)) + .collect(), + } +} diff --git a/engine/runtime/xml2/src/parser/sequence.rs b/engine/runtime/xml2/src/parser/sequence.rs new file mode 100644 index 000000000..48b8a1929 --- /dev/null +++ b/engine/runtime/xml2/src/parser/sequence.rs @@ -0,0 +1,41 @@ +use std::cell::RefCell; + +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::{RsEntity, Struct, StructField, TypeModifier}, + utils::{enum_to_field, get_documentation, get_parent_name}, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_sequence(sequence: &Node, parent: &Node) -> RsEntity { + let name = get_parent_name(sequence); + RsEntity::Struct(Struct { + name: name.into(), + comment: get_documentation(parent), + subtypes: vec![], + fields: RefCell::new(elements_to_fields(sequence, name)), + ..Default::default() + }) +} + +fn elements_to_fields(sequence: &Node, parent_name: &str) -> Vec { + sequence + .children() + .filter(|n| n.is_element() && n.xsd_type() != ElementType::Annotation) + .map(|n| match parse_node(&n, sequence) { + RsEntity::StructField(mut sf) => { + if sf.type_name.ends_with(parent_name) { + sf.type_modifiers.push(TypeModifier::Recursive) + } + sf + } + RsEntity::Enum(mut en) => { + en.name = format!("{}Choice", parent_name); + enum_to_field(en) + } + _ => unreachable!("\nError: {:?}\n{:?}", n, parse_node(&n, sequence)), + }) + .collect() +} diff --git a/engine/runtime/xml2/src/parser/simple_content.rs b/engine/runtime/xml2/src/parser/simple_content.rs new file mode 100644 index 000000000..4e3bd3b17 --- /dev/null +++ b/engine/runtime/xml2/src/parser/simple_content.rs @@ -0,0 +1,17 @@ +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::RsEntity, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_simple_content(node: &Node) -> RsEntity { + let content = node + .children() + .filter(|n| n.is_element() && n.xsd_type() != ElementType::Annotation) + .last() + .expect("Content in simpleContent required"); + + parse_node(&content, node) +} diff --git a/engine/runtime/xml2/src/parser/simple_type.rs b/engine/runtime/xml2/src/parser/simple_type.rs new file mode 100644 index 000000000..1686db2e2 --- /dev/null +++ b/engine/runtime/xml2/src/parser/simple_type.rs @@ -0,0 +1,34 @@ +use roxmltree::Node; + +use crate::parser::{ + node_parser::parse_node, + types::RsEntity, + utils::get_documentation, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_simple_type(node: &Node, parent: &Node) -> RsEntity { + let name = node.attr_name(); + + assert_eq!( + parent.xsd_type() == ElementType::Schema, + name.is_some(), + "Name required if the simpleType element is a child of the schema element, and not allowed at other times" + ); + + let content = node + .children() + .filter(|n| n.is_element() && n.xsd_type() != ElementType::Annotation) + .last() + .expect( + "Simple types must be defined in one of the following ways: [Union, List, Restriction]", + ); + + let mut content_type = parse_node(&content, node); + + if let Some(n) = name { + content_type.set_name(n); + } + content_type.set_comment(get_documentation(node)); + content_type +} diff --git a/engine/runtime/xml2/src/parser/types.rs b/engine/runtime/xml2/src/parser/types.rs new file mode 100644 index 000000000..1a88243b1 --- /dev/null +++ b/engine/runtime/xml2/src/parser/types.rs @@ -0,0 +1,233 @@ +use std::{cell::RefCell, collections::HashMap}; + +use roxmltree::Namespace; + +use crate::parser::{constants::tag, xsd_elements::FacetType}; + +#[derive(Debug, Clone, Default)] +pub struct RsFile<'input> { + pub name: String, + pub namespace: Option, + pub types: Vec, + pub attribute_groups: Vec, + pub target_ns: Option>, + pub xsd_ns: Option>, +} + +#[derive(Debug, Default, Clone)] +pub struct Struct { + pub name: String, + pub comment: Option, + pub fields: RefCell>, + pub attribute_groups: RefCell>, + pub subtypes: Vec, +} + +impl Struct { + pub fn get_types_map(&self) -> HashMap<&String, &Self> { + let mut map = HashMap::new(); + map.insert(&self.name, self); + for ty in &self.subtypes { + if let RsEntity::Struct(st) = ty { + map.extend(st.get_types_map()); + } + } + map + } + + pub fn extend_base(&self, types: &HashMap<&String, &Self>) { + self.fields.borrow_mut().iter_mut().for_each(|f| f.extend_base(types)); + + let mut fields = self + .fields + .borrow() + .iter() + .filter(|f| f.name.as_str() == tag::BASE) + .flat_map(|f| { + let key = f.type_name.split(':').last().unwrap().to_string(); + types.get(&key).map(|s| s.fields.borrow().clone()).unwrap_or_default() + }) + .filter(|f| { + //TODO: remove this workaround for fields names clash + !self.fields.borrow().iter().any(|field| field.name == f.name) + }) + .collect::>(); + + self.fields.borrow_mut().append(&mut fields); + + self.fields.borrow_mut().retain(|field| field.name.as_str() != tag::BASE); + + for subtype in &self.subtypes { + if let RsEntity::Struct(s) = subtype { + s.extend_base(types); + } + } + } + + pub fn extend_attribute_group(&self, types: &HashMap<&String, &Self>) { + let mut fields = self + .attribute_groups + .borrow() + .iter() + .flat_map(|f| { + let key = f.original.split(':').last().unwrap().to_string(); + types.get(&key).map(|s| s.fields.borrow().clone()).unwrap_or_default() + }) + .collect::>(); + + self.fields.borrow_mut().append(&mut fields); + } +} + +#[derive(Debug, Clone, Default)] +pub struct StructField { + pub name: String, + pub type_name: String, + pub comment: Option, + pub subtypes: Vec, + pub source: StructFieldSource, + pub type_modifiers: Vec, +} + +impl StructField { + pub fn extend_base(&mut self, types: &HashMap<&String, &Struct>) { + for subtype in &mut self.subtypes { + if let RsEntity::Struct(st) = subtype { + st.extend_base(types); + } + } + } +} + +#[allow(clippy::upper_case_acronyms)] +#[derive(Debug, Clone, Default)] +pub enum StructFieldSource { + Attribute, + Element, + Base, + Choice, + #[default] + NA, +} + +#[derive(Debug, Clone)] +pub struct Facet { + pub facet_type: FacetType, + pub comment: Option, +} + +#[derive(Debug, Clone, Default)] +pub struct TupleStruct { + pub name: String, + pub comment: Option, + pub type_name: String, + pub subtypes: Vec, + pub type_modifiers: Vec, + pub facets: Vec, +} + +#[derive(Debug, Clone, Default)] +pub struct Enum { + pub name: String, + pub cases: Vec, + pub comment: Option, + pub type_name: String, + pub subtypes: Vec, + pub source: EnumSource, +} + +#[allow(clippy::upper_case_acronyms)] +#[derive(Debug, Clone, PartialEq, Default)] +pub enum EnumSource { + Restriction, + Choice, + Union, + #[default] + NA, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum TypeModifier { + None, + Array, + Option, + Recursive, + Empty, +} + +#[derive(Debug, Clone, Default)] +pub struct EnumCase { + pub name: String, + pub comment: Option, + pub value: String, + pub type_name: Option, + pub type_modifiers: Vec, + pub source: EnumSource, +} + +#[derive(Debug, Clone, Default)] +pub struct Alias { + pub name: String, + pub original: String, + pub comment: Option, + pub subtypes: Vec, +} + +#[derive(Debug, Clone)] +pub struct Import { + pub name: String, + pub location: String, + pub comment: Option, +} + +#[derive(Debug, Clone)] +pub enum RsEntity { + Struct(Struct), + StructField(StructField), + TupleStruct(TupleStruct), + Enum(Enum), + EnumCase(EnumCase), + Alias(Alias), + Import(Import), +} + +impl RsEntity { + pub fn name(&self) -> &str { + use RsEntity::*; + match self { + Struct(s) => s.name.as_str(), + TupleStruct(tp) => tp.name.as_str(), + Enum(e) => e.name.as_str(), + EnumCase(ec) => ec.name.as_str(), + Alias(al) => al.name.as_str(), + StructField(sf) => sf.name.as_str(), + Import(im) => im.name.as_str(), + } + } + + pub fn set_name(&mut self, name: &str) { + use RsEntity::*; + match self { + Struct(s) => s.name = name.to_string(), + TupleStruct(tp) => tp.name = name.to_string(), + Enum(e) => e.name = name.to_string(), + EnumCase(ec) => ec.name = name.to_string(), + Alias(al) => al.name = name.to_string(), + StructField(sf) => sf.name = name.to_string(), + Import(im) => im.name = name.to_string(), + } + } + + pub fn set_comment(&mut self, comment: Option) { + use RsEntity::*; + match self { + Struct(s) => s.comment = comment, + TupleStruct(tp) => tp.comment = comment, + Enum(e) => e.comment = comment, + EnumCase(ec) => ec.comment = comment, + Alias(al) => al.comment = comment, + StructField(sf) => sf.comment = comment, + Import(im) => im.comment = comment, + } + } +} diff --git a/engine/runtime/xml2/src/parser/union.rs b/engine/runtime/xml2/src/parser/union.rs new file mode 100644 index 000000000..12a890d02 --- /dev/null +++ b/engine/runtime/xml2/src/parser/union.rs @@ -0,0 +1,79 @@ +use std::cell::RefCell; + +use roxmltree::Node; + +use crate::parser::{ + constants::attribute, + node_parser::parse_node, + types::{Enum, EnumCase, EnumSource, RsEntity, Struct}, + utils::{attributes_to_fields, enum_to_field, get_documentation, get_parent_name}, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn parse_union(union: &Node) -> RsEntity { + let mut cases = union + .attribute(attribute::MEMBER_TYPES) + .map(create_enum_cases) + .unwrap_or_default(); + + let subtypes = union + .children() + .filter(|e| e.is_element() && e.xsd_type() == ElementType::SimpleType) + .enumerate() + .map(|st| enum_subtype_from_node(&st.1, union, st.0)) + .collect::>(); + + cases.append( + &mut subtypes + .iter() + .enumerate() + .map(|val| EnumCase { + name: format!("EnumCase_{}", val.0), + type_name: Some(val.1.name().to_string()), + source: EnumSource::Union, + ..Default::default() + }) + .collect(), + ); + + let mut union_enum = Enum { + cases, + subtypes, + comment: get_documentation(union), + type_name: "String".into(), + source: EnumSource::Union, + ..Default::default() + }; + + let mut fields = attributes_to_fields(union); + + if fields.is_empty() { + RsEntity::Enum(union_enum) + } else { + union_enum.name = format!("{}Choice", get_parent_name(union)); + fields.push(enum_to_field(union_enum)); + RsEntity::Struct(Struct { + fields: RefCell::new(fields), + ..Default::default() + }) + } +} + +fn create_enum_cases(member_types: &str) -> Vec { + member_types + .split(' ') + .filter(|s| !s.is_empty()) + .map(|mt| EnumCase { + name: mt.to_string(), + type_name: Some(mt.to_string()), + source: EnumSource::Union, + ..Default::default() + }) + .collect() +} + +fn enum_subtype_from_node(node: &Node, parent: &Node, index: usize) -> RsEntity { + let mut entity = parse_node(node, parent); + entity.set_name(format!("EnumCaseType_{}", index).as_str()); + entity +} diff --git a/engine/runtime/xml2/src/parser/utils.rs b/engine/runtime/xml2/src/parser/utils.rs new file mode 100644 index 000000000..4f35e01d2 --- /dev/null +++ b/engine/runtime/xml2/src/parser/utils.rs @@ -0,0 +1,79 @@ +use std::str; + +use roxmltree::{Namespace, Node}; + +use crate::parser::{ + constants::attribute, + node_parser::parse_node, + types::{Alias, Enum, RsEntity, StructField, StructFieldSource}, + xsd_elements::{ElementType, XsdNode}, +}; + +pub fn target_namespace<'a, 'input>(node: &Node<'a, 'input>) -> Option<&'a Namespace<'input>> { + match node.attribute(attribute::TARGET_NAMESPACE) { + Some(tn) => node.namespaces().find(|a| a.uri() == tn), + None => None, + } +} + +pub fn find_child<'a, 'input>(node: &Node<'a, 'input>, tag_name: &str) -> Option> { + node.children().find(|e| e.is_element() && e.tag_name().name() == tag_name) +} + +pub fn get_documentation(node: &Node<'_, '_>) -> Option { + find_child(node, "annotation") + .and_then(|node| find_child(&node, "documentation")) + .and_then(|node| node.text().map(|s| s.to_string())) +} + +pub fn get_parent_name<'a>(node: &Node<'a, '_>) -> &'a str { + match node.parent_element() { + Some(parent) => { + if parent.xsd_type() == ElementType::Schema { + return "SchemaElement"; + } + + match parent.attribute(attribute::NAME) { + Some(s) => s, + None => get_parent_name(&parent), + } + } + None => "UnsupportedName", + } +} + +pub fn get_base<'a>(node: &Node<'a, '_>) -> &'a str { + node.attribute(attribute::BASE).expect("The base value is required") +} + +pub fn attributes_to_fields(node: &Node) -> Vec { + node.children() + .filter(|n| { + n.xsd_type() == ElementType::Attribute || n.xsd_type() == ElementType::AnyAttribute + }) + .map(|n| match parse_node(&n, node) { + RsEntity::StructField(sf) => sf, + _ => unreachable!("Invalid attribute parsing: {:?}", n), + }) + .collect() +} + +pub fn attribute_groups_to_aliases(node: &Node) -> Vec { + node.children() + .filter(|n| n.xsd_type() == ElementType::AttributeGroup) + .map(|n| match parse_node(&n, node) { + RsEntity::Alias(a) => a, + _ => unreachable!("Invalid attribute group parsing: {:?}", n), + }) + .collect() +} + +pub fn enum_to_field(en: Enum) -> StructField { + StructField { + name: en.name.clone(), + type_name: en.name.clone(), + subtypes: vec![RsEntity::Enum(en)], + source: StructFieldSource::Element, + ..Default::default() + } +} diff --git a/engine/runtime/xml2/src/parser/xsd_elements.rs b/engine/runtime/xml2/src/parser/xsd_elements.rs new file mode 100644 index 000000000..bbeda2189 --- /dev/null +++ b/engine/runtime/xml2/src/parser/xsd_elements.rs @@ -0,0 +1,225 @@ +use crate::parser::constants::attribute; + +#[derive(Debug, PartialEq)] +pub enum ElementType { + All, + Annotation, + Any, + AnyAttribute, + AppInfo, + Attribute, + AttributeGroup, + Choice, + ComplexContent, + ComplexType, + Documentation, + Element, + Extension(ExtensionType), + Field, + Group, + Import, + Include, + Key, + KeyRef, + List, + Notation, + Redefine, + Restriction(RestrictionType), + Schema, + Selector, + Sequence, + SimpleContent, + SimpleType, + Union, + Unique, + Facet(FacetType), + UnknownElement(String), + XsdError(String), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum FacetType { + Enumeration(String), + FractionDigits(String), + Length(String), + MaxExclusive(String), + MaxInclusive(String), + MaxLength(String), + MinExclusive(String), + MinInclusive(String), + MinLength(String), + Pattern(String), + TotalDigits(String), + WhiteSpace(WhiteSpace), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum WhiteSpace { + Preserve, + Replace, + Collapse, +} + +#[derive(Debug, PartialEq)] +pub enum ExtensionType { + ComplexContent, + SimpleContent, +} + +#[derive(Debug, PartialEq)] +pub enum RestrictionType { + ComplexContent, + SimpleContent, + SimpleType, +} + +pub trait XsdNode { + fn xsd_type(&self) -> ElementType; + fn attr_name(&self) -> Option<&str>; + fn attr_type(&self) -> Option<&str>; + fn attr_ref(&self) -> Option<&str>; + fn attr_use(&self) -> UseType; + fn attr_value(&self) -> Option<&str>; +} + +impl<'a> XsdNode for roxmltree::Node<'a, '_> { + fn xsd_type(&self) -> ElementType { + use ElementType::*; + match self.tag_name().name() { + "all" => All, + "annotation" => Annotation, + "any" => Any, + "anyAttribute" => AnyAttribute, + "appInfo" => AppInfo, + "attribute" => Attribute, + "attributeGroup" => AttributeGroup, + "choice" => Choice, + "complexContent" => ComplexContent, + "complexType" => ComplexType, + "documentation" => Documentation, + "element" => Element, + "extension" => match self.parent_element() { + Some(parent) => match parent.tag_name().name() { + "complexContent" => Extension(ExtensionType::ComplexContent), + "simpleContent" => Extension(ExtensionType::SimpleContent), + _ => XsdError(format!( + "Unsupported parent type ({}) for 'extension' element", + parent.tag_name().name() + )), + }, + None => XsdError("'extension' element must have a parent".to_string()), + }, + "field" => Field, + "group" => Group, + "import" => Import, + "include" => Include, + "key" => Key, + "keyRef" => KeyRef, + "list" => List, + "notation" => Notation, + "redefine" => Redefine, + "restriction" => match self.parent_element() { + Some(parent) => match parent.tag_name().name() { + "complexContent" => Restriction(RestrictionType::ComplexContent), + "simpleContent" => Restriction(RestrictionType::SimpleContent), + "simpleType" => Restriction(RestrictionType::SimpleType), + _ => XsdError(format!( + "Unsupported parent type ({}) for 'restriction' element", + parent.tag_name().name() + )), + }, + None => XsdError("'restriction' element must have a parent".to_string()), + }, + "schema" => Schema, + "selector" => Selector, + "sequence" => Sequence, + "simpleContent" => SimpleContent, + "simpleType" => SimpleType, + "union" => Union, + "unique" => Unique, + + "enumeration" => Facet(FacetType::Enumeration(get_string_value(self))), + "fractionDigits" => Facet(FacetType::FractionDigits(get_string_value(self))), + "length" => Facet(FacetType::Length(get_string_value(self))), + "maxExclusive" => Facet(FacetType::MaxExclusive(get_string_value(self))), + "maxInclusive" => Facet(FacetType::MaxInclusive(get_string_value(self))), + "maxLength" => Facet(FacetType::MaxLength(get_string_value(self))), + "minExclusive" => Facet(FacetType::MinExclusive(get_string_value(self))), + "minInclusive" => Facet(FacetType::MinInclusive(get_string_value(self))), + "minLength" => Facet(FacetType::MinLength(get_string_value(self))), + "pattern" => Facet(FacetType::Pattern(get_string_value(self))), + "totalDigits" => Facet(FacetType::TotalDigits(get_string_value(self))), + "whiteSpace" => match self.attr_value() { + Some(val) => match val { + "preserve" => Facet(FacetType::WhiteSpace(WhiteSpace::Preserve)), + "replace" => Facet(FacetType::WhiteSpace(WhiteSpace::Replace)), + "collapse" => Facet(FacetType::WhiteSpace(WhiteSpace::Collapse)), + x => unreachable!("Invalid WhiteSpace value: {}.\n {:?}", x, self), + }, + None => unreachable!("Value is required for facets"), + }, + + _ => UnknownElement(self.tag_name().name().to_string()), + } + } + + fn attr_name(&self) -> Option<&str> { + self.attribute(attribute::NAME) + } + + fn attr_type(&self) -> Option<&str> { + self.attribute(attribute::TYPE) + } + + fn attr_ref(&self) -> Option<&str> { + self.attribute(attribute::REF) + } + + fn attr_use(&self) -> UseType { + match self.attribute(attribute::USE).unwrap_or("optional") { + "optional" => UseType::Optional, + "prohibited" => UseType::Prohibited, + "required" => UseType::Required, + _ => unreachable!( + "If 'use' specified, this attribute must have one of the following values [optional, prohibited, required]" + ) + } + } + + fn attr_value(&self) -> Option<&str> { + self.attribute(attribute::VALUE) + } +} + +fn get_string_value(node: &roxmltree::Node) -> String { + node.attr_value() + .map(|s| s.to_string()) + .unwrap_or_else(|| panic!("Value is required. {:?}", node)) +} + +pub enum UseType { + Required, + Prohibited, + Optional, +} + +pub type MinOccurs = usize; +pub enum MaxOccurs { + Bounded(usize), + Unbounded, + None, +} + +pub fn min_occurs(node: &roxmltree::Node) -> MinOccurs { + node.attribute(attribute::MIN_OCCURS).and_then(|v| v.parse::().ok()).unwrap_or(1) +} + +pub fn max_occurs(node: &roxmltree::Node) -> MaxOccurs { + match node.attribute(attribute::MAX_OCCURS) { + Some(s) => match s { + "unbounded" => MaxOccurs::Unbounded, + s => s.parse::().ok().map(MaxOccurs::Bounded).unwrap_or(MaxOccurs::None), + }, + None => MaxOccurs::None, + } +} diff --git a/engine/runtime/xml2/src/types.rs b/engine/runtime/xml2/src/types.rs new file mode 100644 index 000000000..776ae6abc --- /dev/null +++ b/engine/runtime/xml2/src/types.rs @@ -0,0 +1,35 @@ +pub mod date; +pub use date::*; +pub mod datetime; +pub use datetime::*; +pub mod datetimestamp; +pub use datetimestamp::*; +pub mod decimal; +pub use decimal::*; +pub mod duration; +pub use duration::*; +pub mod gday; +pub use gday::*; +pub mod gmonth; +pub use gmonth::*; +pub mod gmonthday; +pub use gmonthday::*; +pub mod gyear; +pub use gyearmonth::*; +pub mod gyearmonth; +pub use gyear::*; +pub mod integer; +pub use integer::*; +pub mod negative_integer; +pub use negative_integer::*; +pub mod non_negative_integer; +pub use non_negative_integer::*; +pub mod non_positive_integer; +pub use non_positive_integer::*; +pub mod positive_integer; +pub use positive_integer::*; +pub mod time; +pub use time::*; +pub mod utils; +pub use utils::*; +pub mod yaserde; diff --git a/engine/runtime/xml2/src/types/date.rs b/engine/runtime/xml2/src/types/date.rs new file mode 100644 index 000000000..6df789910 --- /dev/null +++ b/engine/runtime/xml2/src/types/date.rs @@ -0,0 +1,90 @@ +use std::{fmt, str::FromStr}; + +use chrono::{format::strftime::StrftimeItems, FixedOffset, NaiveDate}; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::utils::parse_timezone; + +#[derive(PartialEq, Debug, Clone, UtilsDefaultSerde)] +pub struct Date { + pub value: NaiveDate, + pub timezone: Option, +} + +impl Date { + pub fn from_chrono_naive_date(date: NaiveDate) -> Self { + Date { + value: date, + timezone: None, + } + } + + pub fn to_chrono_naive_date(&self) -> NaiveDate { + self.value + } +} + +impl Default for Date { + fn default() -> Date { + Self { + value: NaiveDate::from_ymd_opt(1, 1, 1).unwrap(), + timezone: None, + } + } +} + +impl FromStr for Date { + type Err = String; + + fn from_str(s: &str) -> Result { + fn parse_naive_date(s: &str) -> Result { + NaiveDate::parse_from_str(s, "%Y-%m-%d").map_err(|e| e.to_string()) + } + + if let Some(s) = s.strip_suffix('Z') { + return Ok(Date { + value: parse_naive_date(s)?, + timezone: Some(FixedOffset::east_opt(0).unwrap()), + }); + } + + if s.contains('+') { + if s.matches('+').count() > 1 { + return Err("bad date format".to_string()); + } + + let idx: usize = s.match_indices('+').collect::>()[0].0; + let date_token = &s[..idx]; + let tz_token = &s[idx..]; + return Ok(Date { + value: parse_naive_date(date_token)?, + timezone: Some(parse_timezone(tz_token)?), + }); + } + + if s.matches('-').count() == 3 { + let idx: usize = s.match_indices('-').collect::>()[2].0; + let date_token = &s[..idx]; + let tz_token = &s[idx..]; + return Ok(Date { + value: parse_naive_date(date_token)?, + timezone: Some(parse_timezone(tz_token)?), + }); + } + + Ok(Date { + value: parse_naive_date(s)?, + timezone: None, + }) + } +} + +impl fmt::Display for Date { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let fmt = StrftimeItems::new("%Y-%m-%d"); + match self.timezone { + Some(tz) => write!(f, "{}{}", self.value.format_with_items(fmt.clone()), tz), + None => write!(f, "{}", self.value.format_with_items(fmt.clone())), + } + } +} diff --git a/engine/runtime/xml2/src/types/datetime.rs b/engine/runtime/xml2/src/types/datetime.rs new file mode 100644 index 000000000..8492b743e --- /dev/null +++ b/engine/runtime/xml2/src/types/datetime.rs @@ -0,0 +1,50 @@ +use std::{fmt, str::FromStr}; + +use chrono::{format::ParseError, DateTime as CDateTime, FixedOffset}; +use xml2_macro::UtilsDefaultSerde; + +#[derive(PartialEq, PartialOrd, Debug, Clone, UtilsDefaultSerde)] +pub struct DateTime { + pub value: CDateTime, +} + +impl DateTime { + pub fn from_chrono_datetime(datetime: CDateTime) -> Self { + DateTime { value: datetime } + } + + pub fn to_chrono_datetime(&self) -> CDateTime { + self.value + } +} + +impl Default for DateTime { + fn default() -> DateTime { + Self { + value: CDateTime::parse_from_rfc3339("0001-01-01T00:00:00Z").unwrap(), + } + } +} + +impl FromStr for DateTime { + type Err = ParseError; + + fn from_str(s: &str) -> Result { + let tz_provided = s.ends_with('Z') || s.contains('+') || s.matches('-').count() == 3; + let s_with_timezone = if tz_provided { + s.to_string() + } else { + format!("{}Z", s) + }; + match CDateTime::parse_from_rfc3339(&s_with_timezone) { + Ok(cdt) => Ok(DateTime { value: cdt }), + Err(err) => Err(err), + } + } +} + +impl fmt::Display for DateTime { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.value.to_rfc3339()) + } +} diff --git a/engine/runtime/xml2/src/types/datetimestamp.rs b/engine/runtime/xml2/src/types/datetimestamp.rs new file mode 100644 index 000000000..7c10332c5 --- /dev/null +++ b/engine/runtime/xml2/src/types/datetimestamp.rs @@ -0,0 +1,40 @@ +use std::{fmt, str::FromStr}; + +use chrono::{format::ParseError, DateTime as CDateTime, FixedOffset}; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::datetime::DateTime; + +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct DateTimeStamp { + pub value: DateTime, +} + +impl DateTimeStamp { + pub fn from_chrono_datetime(datetime: CDateTime) -> Self { + DateTimeStamp { + value: DateTime::from_chrono_datetime(datetime), + } + } + + pub fn to_chrono_datetime(&self) -> CDateTime { + self.value.to_chrono_datetime() + } +} + +impl FromStr for DateTimeStamp { + type Err = ParseError; + + fn from_str(s: &str) -> Result { + match CDateTime::parse_from_rfc3339(s) { + Ok(cdt) => Ok(DateTimeStamp::from_chrono_datetime(cdt)), + Err(err) => Err(err), + } + } +} + +impl fmt::Display for DateTimeStamp { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.value) + } +} diff --git a/engine/runtime/xml2/src/types/decimal.rs b/engine/runtime/xml2/src/types/decimal.rs new file mode 100644 index 000000000..c0b2c8dc7 --- /dev/null +++ b/engine/runtime/xml2/src/types/decimal.rs @@ -0,0 +1,31 @@ +use std::{fmt, str::FromStr}; + +use bigdecimal::{BigDecimal, ParseBigDecimalError}; +use xml2_macro::UtilsDefaultSerde; + +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct Decimal(pub BigDecimal); + +impl Decimal { + pub fn from_bigdecimal(bigdecimal: BigDecimal) -> Self { + Decimal(bigdecimal) + } + + pub fn to_bigdecimal(&self) -> BigDecimal { + self.0.clone() + } +} + +impl FromStr for Decimal { + type Err = ParseBigDecimalError; + + fn from_str(s: &str) -> Result { + Ok(Decimal(BigDecimal::from_str(s)?)) + } +} + +impl fmt::Display for Decimal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} diff --git a/engine/runtime/xml2/src/types/duration.rs b/engine/runtime/xml2/src/types/duration.rs new file mode 100644 index 000000000..6c647ca99 --- /dev/null +++ b/engine/runtime/xml2/src/types/duration.rs @@ -0,0 +1,283 @@ +use std::{fmt, fmt::Write, str::FromStr}; + +use xml2_macro::UtilsDefaultSerde; + +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct Duration { + pub is_negative: bool, + + pub years: u64, + pub months: u64, + pub days: u64, + + pub hours: u64, + pub minutes: u64, + pub seconds: f64, +} + +impl Duration { + pub fn to_std_duration(&self) -> Result { + if self.years > 0 || self.months > 0 { + Err("Duration with months or years require a starting date to be converted".into()) + } else { + let secs = self.seconds as u64; + + let nanos = ((self.seconds - secs as f64) * 1e9) as u32; + let secs = secs + 60 * (self.minutes + 60 * (self.hours + 24 * self.days)); + + Ok(std::time::Duration::new(secs, nanos)) + } + } + + // TODO: from_std_duration +} + +impl FromStr for Duration { + type Err = String; + + fn from_str(s: &str) -> Result { + fn fill_component( + context: &mut ParsingContext, + component: &mut u64, + idx: i32, + name: &str, + symbol: char, + ) -> Result<(), String> { + if context.is_number_empty { + return Err(format!( + "No value is specified for {}, so '{}' must not be present", + name, symbol + )); + } + + if context.is_dot_found { + return Err("Only the seconds can be expressed as a decimal".into()); + } + + if context.last_filled_component >= idx { + return Err("Bad order of duration components".into()); + } + + *component = context.number; + context.last_filled_component = idx; + context.number = 0; + context.is_number_empty = true; + + Ok(()) + } + + fn fill_seconds( + context: &mut ParsingContext, + seconds: &mut f64, + ) -> Result<(), &'static str> { + if context.is_number_empty { + return Err("No value is specified for seconds, so 'S' must not be present"); + } + + if context.is_dot_found && context.denom == 1 { + return Err("At least one digit must follow the decimal point if it appears"); + } + + if context.last_filled_component >= 6 { + return Err("Bad order of duration components"); + } + + *seconds = context.number as f64 + context.numer as f64 / context.denom as f64; + context.last_filled_component = 6; + context.number = 0; + context.is_number_empty = true; + + Ok(()) + } + + let mut dur: Duration = Default::default(); + let mut context = ParsingContext::new(); + + if s.is_empty() { + return Ok(dur); + } + + for (i, c) in s.chars().enumerate() { + match c { + '-' => { + if i == 0 { + dur.is_negative = true; + } else { + return Err("The minus sign must appear first".into()); + } + } + 'P' => { + if i == 0 || i == 1 && dur.is_negative { + context.is_p_found = true; + } else { + return Err("Symbol 'P' occurred at the wrong position".into()); + } + } + 'T' => { + if context.is_t_found { + return Err("Symbol 'T' occurred twice".into()); + } + + if context.number > 0 { + return Err("Symbol 'T' occurred after a number".into()); + } + + context.is_t_found = true; + context.last_filled_component = 3; + } + + // Duration components: + 'Y' => { + fill_component(&mut context, &mut dur.years, 1, "years", 'Y')?; + } + 'M' => { + if context.is_t_found { + fill_component(&mut context, &mut dur.minutes, 5, "minutes", 'M')?; + } else { + fill_component(&mut context, &mut dur.months, 2, "months", 'M')?; + } + } + 'D' => { + fill_component(&mut context, &mut dur.days, 3, "days", 'D')?; + } + 'H' => { + if !context.is_t_found { + return Err("No symbol 'T' found before hours components".into()); + } + fill_component(&mut context, &mut dur.hours, 4, "hours", 'H')?; + } + 'S' => { + if !context.is_t_found { + return Err("No symbol 'T' found before seconds components".into()); + } + fill_seconds(&mut context, &mut dur.seconds)?; + } + + // Number: + '.' => { + if context.is_dot_found { + return Err("Dot occurred twice".into()); + } + + if context.is_number_empty { + return Err("No digits before dot".into()); + } + + context.is_dot_found = true; + } + digit => { + if !digit.is_ascii_digit() { + return Err("Incorrect character occurred".into()); + } + + if context.is_dot_found { + context.numer *= 10; + context.numer += + digit.to_digit(10).expect("error converting a digit") as u64; + context.denom *= 10; + } else { + context.number *= 10; + context.number += + digit.to_digit(10).expect("error converting a digit") as u64; + context.is_number_empty = false; + } + } + } + } + + if context.number > 0 { + return Err("Number at the end of the string".into()); + } + + if !context.is_p_found { + return Err("'P' must always be present".into()); + } + + if context.last_filled_component == 0 { + return Err("At least one number and designator are required".into()); + } + + if context.last_filled_component <= 3 && context.is_t_found { + return Err("No time items are present, so 'T' must not be present".into()); + } + + Ok(dur) + } +} + +impl fmt::Display for Duration { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let mut s = if self.is_negative { + "-P".to_string() + } else { + "P".to_string() + }; + + let mut date_str = String::new(); + if self.years > 0 { + write!(&mut date_str, "{}Y", self.years)?; + } + if self.months > 0 { + write!(&mut date_str, "{}M", self.months)?; + } + if self.days > 0 { + write!(&mut date_str, "{}D", self.days)?; + } + + let mut time_str = String::new(); + if self.hours > 0 { + write!(&mut time_str, "{}H", self.hours)?; + } + if self.minutes > 0 { + write!(&mut time_str, "{}M", self.minutes)?; + } + if self.seconds > 0.0 { + write!(&mut time_str, "{}S", self.seconds)?; + } + + if time_str.is_empty() { + if date_str.is_empty() { + s.push_str("0Y"); + } else { + s.push_str(&date_str); + } + } else { + s.push_str(&date_str); + s.push('T'); + s.push_str(&time_str); + } + + write!(f, "{}", s) + } +} + +struct ParsingContext { + is_p_found: bool, + is_t_found: bool, + last_filled_component: i32, + + number: u64, + is_number_empty: bool, + + is_dot_found: bool, + + numer: u64, + denom: u64, +} + +impl ParsingContext { + pub fn new() -> ParsingContext { + ParsingContext { + is_p_found: false, + is_t_found: false, + last_filled_component: 0, + + number: 0, + is_number_empty: true, + + is_dot_found: false, + numer: 0, + denom: 1, + } + } +} diff --git a/engine/runtime/xml2/src/types/gday.rs b/engine/runtime/xml2/src/types/gday.rs new file mode 100644 index 000000000..7b7e5ea48 --- /dev/null +++ b/engine/runtime/xml2/src/types/gday.rs @@ -0,0 +1,83 @@ +use std::{fmt, str::FromStr}; + +use chrono::FixedOffset; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::utils::parse_timezone; + +#[derive(PartialEq, Debug, Clone, UtilsDefaultSerde)] +pub struct GDay { + pub value: i32, + pub timezone: Option, +} + +impl GDay { + pub fn new(day: i32, timezone: Option) -> Result { + if !(1..=31).contains(&day) { + return Err("gDay value should lie between 1 and 31".to_string()); + } + Ok(GDay { + value: day, + timezone, + }) + } +} + +impl Default for GDay { + fn default() -> GDay { + Self { + value: 1, + timezone: None, + } + } +} + +impl FromStr for GDay { + type Err = String; + + fn from_str(s: &str) -> Result { + fn parse_value(s: &str) -> Result { + if s.len() != 5 || &s[0..3] != "---" { + return Err("bad gDay format".to_string()); + } + let token = &s[3..5]; + if !token.chars().all(|c| c.is_ascii_digit()) { + return Err("bad gDay format".to_string()); + } + token.parse::().map_err(|e| e.to_string()) + } + + if let Some(s) = s.strip_suffix('Z') { + return GDay::new(parse_value(s)?, Some(FixedOffset::east_opt(0).unwrap())); + } + + if s.contains('+') { + if s.matches('+').count() > 1 { + return Err("bad gDay format".to_string()); + } + + let idx: usize = s.match_indices('+').collect::>()[0].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + return GDay::new(parse_value(value_token)?, Some(parse_timezone(tz_token)?)); + } + + if s.matches('-').count() == 4 { + let idx: usize = s.match_indices('-').collect::>()[3].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + return GDay::new(parse_value(value_token)?, Some(parse_timezone(tz_token)?)); + } + + GDay::new(parse_value(s)?, None) + } +} + +impl fmt::Display for GDay { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.timezone { + Some(tz) => write!(f, "---{:02}{}", self.value, tz), + None => write!(f, "---{:02}", self.value), + } + } +} diff --git a/engine/runtime/xml2/src/types/gmonth.rs b/engine/runtime/xml2/src/types/gmonth.rs new file mode 100644 index 000000000..d5bab05ae --- /dev/null +++ b/engine/runtime/xml2/src/types/gmonth.rs @@ -0,0 +1,83 @@ +use std::{fmt, str::FromStr}; + +use chrono::FixedOffset; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::utils::parse_timezone; + +#[derive(PartialEq, Debug, Clone, UtilsDefaultSerde)] +pub struct GMonth { + pub value: i32, + pub timezone: Option, +} + +impl GMonth { + pub fn new(month: i32, timezone: Option) -> Result { + if !(1..=12).contains(&month) { + return Err("GMonth value should lie between 1 and 12".to_string()); + } + Ok(GMonth { + value: month, + timezone, + }) + } +} + +impl Default for GMonth { + fn default() -> GMonth { + Self { + value: 1, + timezone: None, + } + } +} + +impl FromStr for GMonth { + type Err = String; + + fn from_str(s: &str) -> Result { + fn parse_value(s: &str) -> Result { + if s.len() != 4 || &s[0..2] != "--" { + return Err("bad gMonth format".to_string()); + } + let token = &s[2..4]; + if !token.chars().all(|c| c.is_ascii_digit()) { + return Err("bad gMonth format".to_string()); + } + token.parse::().map_err(|e| e.to_string()) + } + + if let Some(s) = s.strip_suffix('Z') { + return GMonth::new(parse_value(s)?, Some(FixedOffset::east_opt(0).unwrap())); + } + + if s.contains('+') { + if s.matches('+').count() > 1 { + return Err("bad gMonth format".to_string()); + } + + let idx: usize = s.match_indices('+').collect::>()[0].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + return GMonth::new(parse_value(value_token)?, Some(parse_timezone(tz_token)?)); + } + + if s.matches('-').count() == 3 { + let idx: usize = s.match_indices('-').collect::>()[2].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + return GMonth::new(parse_value(value_token)?, Some(parse_timezone(tz_token)?)); + } + + GMonth::new(parse_value(s)?, None) + } +} + +impl fmt::Display for GMonth { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.timezone { + Some(tz) => write!(f, "--{:02}{}", self.value, tz), + None => write!(f, "--{:02}", self.value), + } + } +} diff --git a/engine/runtime/xml2/src/types/gmonthday.rs b/engine/runtime/xml2/src/types/gmonthday.rs new file mode 100644 index 000000000..9acec0706 --- /dev/null +++ b/engine/runtime/xml2/src/types/gmonthday.rs @@ -0,0 +1,123 @@ +use std::{fmt, str::FromStr}; + +use chrono::FixedOffset; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::{gday::GDay, gmonth::GMonth, utils::parse_timezone}; + +#[derive(PartialEq, Debug, Clone, UtilsDefaultSerde)] +pub struct GMonthDay { + pub month: i32, + pub day: i32, + pub timezone: Option, +} + +impl GMonthDay { + pub fn new(month: i32, day: i32, timezone: Option) -> Result { + if !(1..=12).contains(&month) { + return Err("Month value within GMonthDay should lie between 1 and 12".to_string()); + } + + if !(1..=31).contains(&day) { + return Err("Day value within GMonthDay should lie between 1 and 31".to_string()); + } + + const MONTH_MAX_LEN: [i32; 12] = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]; + if day > MONTH_MAX_LEN[month as usize - 1] { + return Err("Day value within GMonthDay is to big for specified month".to_string()); + } + + Ok(GMonthDay { + month, + day, + timezone, + }) + } + + pub fn gmonth(self) -> GMonth { + GMonth { + value: self.month, + timezone: self.timezone, + } + } + + pub fn gday(self) -> GDay { + GDay { + value: self.day, + timezone: self.timezone, + } + } +} + +impl Default for GMonthDay { + fn default() -> GMonthDay { + Self { + month: 1, + day: 1, + timezone: None, + } + } +} + +impl FromStr for GMonthDay { + type Err = String; + + fn from_str(s: &str) -> Result { + fn parse_value(s: &str) -> Result<(i32, i32), String> { + if s.len() != 7 || &s[0..2] != "--" || &s[4..5] != "-" { + return Err("bad gMonthDay format".to_string()); + } + + let month_token = &s[2..4]; + if !month_token.chars().all(|c| c.is_ascii_digit()) { + return Err("bad month format within gMonthDay".to_string()); + } + let month = month_token.parse::().map_err(|e| e.to_string())?; + + let day_token = &s[5..7]; + if !day_token.chars().all(|c| c.is_ascii_digit()) { + return Err("bad day format within gMonthDay".to_string()); + } + let day = day_token.parse::().map_err(|e| e.to_string())?; + + Ok((month, day)) + } + + if let Some(s) = s.strip_suffix('Z') { + let (month, day) = parse_value(s)?; + return GMonthDay::new(month, day, Some(FixedOffset::east_opt(0).unwrap())); + } + + if s.contains('+') { + if s.matches('+').count() > 1 { + return Err("bad gMonthDay format".to_string()); + } + + let idx: usize = s.match_indices('+').collect::>()[0].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + let (month, day) = parse_value(value_token)?; + return GMonthDay::new(month, day, Some(parse_timezone(tz_token)?)); + } + + if s.matches('-').count() == 4 { + let idx: usize = s.match_indices('-').collect::>()[3].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + let (month, day) = parse_value(value_token)?; + return GMonthDay::new(month, day, Some(parse_timezone(tz_token)?)); + } + + let (month, day) = parse_value(s)?; + GMonthDay::new(month, day, None) + } +} + +impl fmt::Display for GMonthDay { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self.timezone { + Some(tz) => write!(f, "--{:02}-{:02}{}", self.month, self.day, tz), + None => write!(f, "--{:02}-{:02}", self.month, self.day), + } + } +} diff --git a/engine/runtime/xml2/src/types/gyear.rs b/engine/runtime/xml2/src/types/gyear.rs new file mode 100644 index 000000000..e66396340 --- /dev/null +++ b/engine/runtime/xml2/src/types/gyear.rs @@ -0,0 +1,102 @@ +use std::{fmt, str::FromStr}; + +use chrono::FixedOffset; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::utils::parse_timezone; + +#[derive(PartialEq, Debug, Clone, UtilsDefaultSerde)] +pub struct GYear { + pub value: i32, + pub timezone: Option, +} + +impl GYear { + pub fn new(year: i32, timezone: Option) -> Result { + if year == 0 { + return Err("bad gYear format: year 0 occurred".to_string()); + } + Ok(GYear { + value: year, + timezone, + }) + } +} + +impl Default for GYear { + fn default() -> GYear { + Self { + value: 1, + timezone: None, + } + } +} + +impl FromStr for GYear { + type Err = String; + + fn from_str(s: &str) -> Result { + if let Some(s) = s.strip_prefix('-') { + let mut gyear = parse_str_positive(s)?; + gyear.value *= -1; + return Ok(gyear); + } + parse_str_positive(s) + } +} + +fn parse_str_positive(s: &str) -> Result { + fn parse_value(s: &str) -> Result { + if s.len() < 4 { + return Err("bad gYear format: to short".to_string()); + } + if !s.chars().all(|c| c.is_ascii_digit()) { + return Err("bad gYear format".to_string()); + } + s.parse::().map_err(|e| e.to_string()) + } + + if let Some(s) = s.strip_suffix('Z') { + return GYear::new(parse_value(s)?, Some(FixedOffset::east_opt(0).unwrap())); + } + + if s.contains('+') { + if s.matches('+').count() > 1 { + return Err("bad gYear format".to_string()); + } + + let idx: usize = s.match_indices('+').collect::>()[0].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + return GYear::new(parse_value(value_token)?, Some(parse_timezone(tz_token)?)); + } + + if s.contains('-') { + if s.matches('-').count() > 1 { + return Err("bad gYear format".to_string()); + } + + let idx: usize = s.match_indices('-').collect::>()[0].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + return GYear::new(parse_value(value_token)?, Some(parse_timezone(tz_token)?)); + } + + GYear::new(parse_value(s)?, None) +} + +impl fmt::Display for GYear { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.value > 0 { + match self.timezone { + Some(tz) => write!(f, "{:04}{}", self.value, tz), + None => write!(f, "{:04}", self.value), + } + } else { + match self.timezone { + Some(tz) => write!(f, "-{:04}{}", -self.value, tz), + None => write!(f, "-{:04}", -self.value), + } + } + } +} diff --git a/engine/runtime/xml2/src/types/gyearmonth.rs b/engine/runtime/xml2/src/types/gyearmonth.rs new file mode 100644 index 000000000..2e890f6f6 --- /dev/null +++ b/engine/runtime/xml2/src/types/gyearmonth.rs @@ -0,0 +1,139 @@ +use std::{fmt, str::FromStr}; + +use chrono::FixedOffset; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::{gmonth::GMonth, gyear::GYear, utils::parse_timezone}; + +#[derive(PartialEq, Debug, Clone, UtilsDefaultSerde)] +pub struct GYearMonth { + pub year: i32, + pub month: i32, + pub timezone: Option, +} + +impl GYearMonth { + pub fn new(year: i32, month: i32, timezone: Option) -> Result { + if year == 0 { + return Err("bad gYear format: year 0 occurred".to_string()); + } + + if !(1..=12).contains(&month) { + return Err("Month value within GYearMonth should lie between 1 and 12".to_string()); + } + + Ok(GYearMonth { + year, + month, + timezone, + }) + } + + pub fn gyear(self) -> GYear { + GYear { + value: self.year, + timezone: self.timezone, + } + } + + pub fn gmonth(self) -> GMonth { + GMonth { + value: self.month, + timezone: self.timezone, + } + } +} + +impl Default for GYearMonth { + fn default() -> GYearMonth { + Self { + year: 1, + month: 1, + timezone: None, + } + } +} + +impl FromStr for GYearMonth { + type Err = String; + + fn from_str(s: &str) -> Result { + if let Some(s) = s.strip_prefix('-') { + let mut gyearmonth = parse_str_positive(s)?; + gyearmonth.year *= -1; + return Ok(gyearmonth); + } + parse_str_positive(s) + } +} + +fn parse_str_positive(s: &str) -> Result { + fn parse_value(s: &str) -> Result<(i32, i32), String> { + if s.matches('-').count() != 1 { + return Err("bad gYearMonth format".to_string()); + } + + let idx: usize = s.match_indices('-').collect::>()[0].0; + let year_token = &s[..idx]; + let month_token = &s[idx + 1..]; + if year_token.len() < 4 || month_token.len() != 2 { + return Err("bad gYearMonth format".to_string()); + } + + if !year_token.chars().all(|c| c.is_ascii_digit()) { + return Err("bad year format within gYearMonth".to_string()); + } + let year = year_token.parse::().map_err(|e| e.to_string())?; + + if !month_token.chars().all(|c| c.is_ascii_digit()) { + return Err("bad month format within gYearMonth".to_string()); + } + let month = month_token.parse::().map_err(|e| e.to_string())?; + + Ok((year, month)) + } + + if let Some(s) = s.strip_suffix('Z') { + let (year, month) = parse_value(s)?; + return GYearMonth::new(year, month, Some(FixedOffset::east_opt(0).unwrap())); + } + + if s.contains('+') { + if s.matches('+').count() > 1 { + return Err("bad gMonthDay format".to_string()); + } + + let idx: usize = s.match_indices('+').collect::>()[0].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + let (year, month) = parse_value(value_token)?; + return GYearMonth::new(year, month, Some(parse_timezone(tz_token)?)); + } + + if s.matches('-').count() == 2 { + let idx: usize = s.match_indices('-').collect::>()[1].0; + let value_token = &s[..idx]; + let tz_token = &s[idx..]; + let (year, month) = parse_value(value_token)?; + return GYearMonth::new(year, month, Some(parse_timezone(tz_token)?)); + } + + let (year, month) = parse_value(s)?; + GYearMonth::new(year, month, None) +} + +impl fmt::Display for GYearMonth { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.year > 0 { + match self.timezone { + Some(tz) => write!(f, "{:04}-{:02}{}", self.year, self.month, tz), + None => write!(f, "{:04}-{:02}", self.year, self.month), + } + } else { + match self.timezone { + Some(tz) => write!(f, "-{:04}-{:02}{}", -self.year, self.month, tz), + None => write!(f, "-{:04}-{:02}", -self.year, self.month), + } + } + } +} diff --git a/engine/runtime/xml2/src/types/integer.rs b/engine/runtime/xml2/src/types/integer.rs new file mode 100644 index 000000000..a67b3fe9b --- /dev/null +++ b/engine/runtime/xml2/src/types/integer.rs @@ -0,0 +1,34 @@ +use std::{fmt, str::FromStr}; + +use num_bigint::{BigInt, ParseBigIntError, ToBigInt}; +use xml2_macro::UtilsDefaultSerde; + +// https://www.w3.org/TR/xmlschema-2/#integer +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct Integer(pub BigInt); + +impl Integer { + pub fn from_bigint(bigint: BigInt) -> Self { + Integer(bigint) + } +} + +impl ToBigInt for Integer { + fn to_bigint(&self) -> Option { + Some(self.0.clone()) + } +} + +impl FromStr for Integer { + type Err = ParseBigIntError; + + fn from_str(s: &str) -> Result { + Ok(Integer(BigInt::from_str(s)?)) + } +} + +impl fmt::Display for Integer { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0.to_str_radix(10)) + } +} diff --git a/engine/runtime/xml2/src/types/negative_integer.rs b/engine/runtime/xml2/src/types/negative_integer.rs new file mode 100644 index 000000000..596f1dbbf --- /dev/null +++ b/engine/runtime/xml2/src/types/negative_integer.rs @@ -0,0 +1,39 @@ +use std::{fmt, str::FromStr}; + +use num_bigint::{BigInt, ToBigInt}; +use xml2_macro::UtilsDefaultSerde; + +// https://www.w3.org/TR/xmlschema-2/#negativeInteger +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct NegativeInteger(pub BigInt); + +impl NegativeInteger { + pub fn from_bigint(bigint: BigInt) -> Self { + NegativeInteger(bigint) + } +} + +impl ToBigInt for NegativeInteger { + fn to_bigint(&self) -> Option { + Some(self.0.clone()) + } +} + +impl FromStr for NegativeInteger { + type Err = String; + + fn from_str(s: &str) -> Result { + let value = BigInt::from_str(s).map_err(|e| e.to_string())?; + if value >= 0.to_bigint().unwrap() { + Err("Bad value for NegativeInteger".to_string()) + } else { + Ok(NegativeInteger(value)) + } + } +} + +impl fmt::Display for NegativeInteger { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0.to_str_radix(10)) + } +} diff --git a/engine/runtime/xml2/src/types/non_negative_integer.rs b/engine/runtime/xml2/src/types/non_negative_integer.rs new file mode 100644 index 000000000..ff7b1bbf3 --- /dev/null +++ b/engine/runtime/xml2/src/types/non_negative_integer.rs @@ -0,0 +1,39 @@ +use std::{fmt, str::FromStr}; + +use num_bigint::{BigUint, ToBigUint}; +use xml2_macro::UtilsDefaultSerde; + +// https://www.w3.org/TR/xmlschema-2/#nonNegativeInteger +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct NonNegativeInteger(pub BigUint); + +impl NonNegativeInteger { + pub fn from_biguint(bigint: BigUint) -> Self { + NonNegativeInteger(bigint) + } +} + +impl ToBigUint for NonNegativeInteger { + fn to_biguint(&self) -> Option { + Some(self.0.clone()) + } +} + +impl FromStr for NonNegativeInteger { + type Err = String; + + fn from_str(s: &str) -> Result { + let value = BigUint::from_str(s).map_err(|e| e.to_string())?; + if value < 0.to_biguint().unwrap() { + Err("Bad value for NonNegativeInteger".to_string()) + } else { + Ok(NonNegativeInteger(value)) + } + } +} + +impl fmt::Display for NonNegativeInteger { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0.to_str_radix(10)) + } +} diff --git a/engine/runtime/xml2/src/types/non_positive_integer.rs b/engine/runtime/xml2/src/types/non_positive_integer.rs new file mode 100644 index 000000000..403c3e4ba --- /dev/null +++ b/engine/runtime/xml2/src/types/non_positive_integer.rs @@ -0,0 +1,39 @@ +use std::{fmt, str::FromStr}; + +use num_bigint::{BigInt, ToBigInt}; +use xml2_macro::UtilsDefaultSerde; + +// https://www.w3.org/TR/xmlschema-2/#nonPositiveInteger +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct NonPositiveInteger(pub BigInt); + +impl NonPositiveInteger { + pub fn from_bigint(bigint: BigInt) -> Self { + NonPositiveInteger(bigint) + } +} + +impl ToBigInt for NonPositiveInteger { + fn to_bigint(&self) -> Option { + Some(self.0.clone()) + } +} + +impl FromStr for NonPositiveInteger { + type Err = String; + + fn from_str(s: &str) -> Result { + let value = BigInt::from_str(s).map_err(|e| e.to_string())?; + if value > 0.to_bigint().unwrap() { + Err("Bad value for NonPositiveInteger".to_string()) + } else { + Ok(NonPositiveInteger(value)) + } + } +} + +impl fmt::Display for NonPositiveInteger { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0.to_str_radix(10)) + } +} diff --git a/engine/runtime/xml2/src/types/positive_integer.rs b/engine/runtime/xml2/src/types/positive_integer.rs new file mode 100644 index 000000000..97262a785 --- /dev/null +++ b/engine/runtime/xml2/src/types/positive_integer.rs @@ -0,0 +1,39 @@ +use std::{fmt, str::FromStr}; + +use num_bigint::{BigUint, ToBigUint}; +use xml2_macro::UtilsDefaultSerde; + +// https://www.w3.org/TR/xmlschema-2/#positiveInteger +#[derive(Default, Clone, PartialEq, PartialOrd, Debug, UtilsDefaultSerde)] +pub struct PositiveInteger(pub BigUint); + +impl PositiveInteger { + pub fn from_biguint(bigint: BigUint) -> Self { + PositiveInteger(bigint) + } +} + +impl ToBigUint for PositiveInteger { + fn to_biguint(&self) -> Option { + Some(self.0.clone()) + } +} + +impl FromStr for PositiveInteger { + type Err = String; + + fn from_str(s: &str) -> Result { + let value = BigUint::from_str(s).map_err(|e| e.to_string())?; + if value <= 0.to_biguint().unwrap() { + Err("Bad value for PositiveInteger".to_string()) + } else { + Ok(PositiveInteger(value)) + } + } +} + +impl fmt::Display for PositiveInteger { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0.to_str_radix(10)) + } +} diff --git a/engine/runtime/xml2/src/types/time.rs b/engine/runtime/xml2/src/types/time.rs new file mode 100644 index 000000000..40e84cd16 --- /dev/null +++ b/engine/runtime/xml2/src/types/time.rs @@ -0,0 +1,94 @@ +use std::{fmt, str::FromStr}; + +use chrono::{format::strftime::StrftimeItems, FixedOffset, NaiveTime}; +use xml2_macro::UtilsDefaultSerde; + +use crate::types::utils::parse_timezone; + +#[derive(PartialEq, Debug, Clone, UtilsDefaultSerde)] +pub struct Time { + pub value: NaiveTime, + pub timezone: Option, +} + +impl Time { + pub fn from_chrono_naive_time(time: NaiveTime) -> Self { + Time { + value: time, + timezone: None, + } + } + + pub fn to_chrono_naive_time(&self) -> NaiveTime { + self.value + } +} + +impl Default for Time { + fn default() -> Time { + Self { + value: NaiveTime::from_hms_opt(0, 0, 0).unwrap(), + timezone: None, + } + } +} + +impl FromStr for Time { + type Err = String; + + fn from_str(s: &str) -> Result { + fn parse_naive_time(s: &str) -> Result { + NaiveTime::parse_from_str(s, "%H:%M:%S").map_err(|e| e.to_string()) + } + + if let Some(s) = s.strip_suffix('Z') { + return Ok(Time { + value: parse_naive_time(s)?, + timezone: Some(FixedOffset::east_opt(0).unwrap()), + }); + } + + if s.contains('+') { + if s.matches('+').count() > 1 { + return Err("bad date format".to_string()); + } + + let idx: usize = s.match_indices('+').collect::>()[0].0; + let time_token = &s[..idx]; + let tz_token = &s[idx..]; + return Ok(Time { + value: parse_naive_time(time_token)?, + timezone: Some(parse_timezone(tz_token)?), + }); + } + + if s.contains('-') { + if s.matches('-').count() > 1 { + return Err("bad date format".to_string()); + } + + let idx: usize = s.match_indices('-').collect::>()[0].0; + let time_token = &s[..idx]; + let tz_token = &s[idx..]; + return Ok(Time { + value: parse_naive_time(time_token)?, + timezone: Some(parse_timezone(tz_token)?), + }); + } + + Ok(Time { + value: parse_naive_time(s)?, + timezone: None, + }) + } +} + +impl fmt::Display for Time { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let fmt = StrftimeItems::new("%H:%M:%S"); + match self.timezone { + Some(tz) => write!(f, "{}{}", self.value.format_with_items(fmt.clone()), tz), + None => write!(f, "{}", self.value.format_with_items(fmt.clone())), + } + } +} diff --git a/engine/runtime/xml2/src/types/utils.rs b/engine/runtime/xml2/src/types/utils.rs new file mode 100644 index 000000000..5855159ac --- /dev/null +++ b/engine/runtime/xml2/src/types/utils.rs @@ -0,0 +1,30 @@ +use chrono::FixedOffset; + +// Parses ISO 8601 timezone. +pub fn parse_timezone(s: &str) -> Result { + if s == "Z" { + return Ok(FixedOffset::east_opt(0).unwrap()); + } + + let tokens: Vec<&str> = s[1..].split(':').collect(); + if tokens.len() != 2 || tokens[0].len() != 2 || tokens[1].len() != 2 { + return Err("bad timezone format".to_string()); + } + if !tokens.iter().all(|t| t.chars().all(|c| c.is_ascii_digit())) { + return Err("bad timezone format".to_string()); + } + + let hours = tokens[0].parse::().unwrap(); + let minutes = tokens[1].parse::().unwrap(); + + if hours > 14 || (hours == 14 && minutes != 0) || minutes >= 60 { + return Err("bad timezone format: out of range".to_string()); + } + + let offset_secs = 60 * (60 * hours + minutes); + match s.chars().next().unwrap() { + '+' => FixedOffset::east_opt(offset_secs).ok_or("Seconds out of bound".to_owned()), + '-' => FixedOffset::west_opt(offset_secs).ok_or("Seconds out of bound".to_owned()), + _ => Err("bad timezone format: timezone should start with '+' or '-'".to_string()), + } +} diff --git a/engine/runtime/xml2/src/types/yaserde.rs b/engine/runtime/xml2/src/types/yaserde.rs new file mode 100644 index 000000000..6af759293 --- /dev/null +++ b/engine/runtime/xml2/src/types/yaserde.rs @@ -0,0 +1,47 @@ +use std::io::{Read, Write}; + +use yaserde::{de, ser}; + +pub fn serialize( + self_bypass: &S, + default_name: &str, + writer: &mut ser::Serializer, + ser_fn: impl FnOnce(&S) -> String, +) -> Result<(), String> { + let name = writer.get_start_event_name().unwrap_or_else(|| default_name.to_string()); + + if !writer.skip_start_end() { + writer + .write(xml::writer::XmlEvent::start_element(name.as_str())) + .map_err(|_e| "Start element write failed".to_string())?; + } + + writer + .write(xml::writer::XmlEvent::characters(ser_fn(self_bypass).as_str())) + .map_err(|_e| "Element value write failed".to_string())?; + + if !writer.skip_start_end() { + writer + .write(xml::writer::XmlEvent::end_element()) + .map_err(|_e| "End element write failed".to_string())?; + } + + Ok(()) +} + +pub fn deserialize( + reader: &mut de::Deserializer, + de_fn: impl FnOnce(&str) -> Result, +) -> Result { + if let Ok(xml::reader::XmlEvent::StartElement { .. }) = reader.peek() { + reader.next_event()?; + } else { + return Err("Start element not found".to_string()); + } + + if let Ok(xml::reader::XmlEvent::Characters(ref text)) = reader.peek() { + de_fn(text) + } else { + de_fn("") + } +} diff --git a/engine/runtime/xml2/testcase/sample.xml b/engine/runtime/xml2/testcase/sample.xml new file mode 100644 index 000000000..494ef93cd --- /dev/null +++ b/engine/runtime/xml2/testcase/sample.xml @@ -0,0 +1,23 @@ + + + + Charter Group +
+ 100 Main + Framingham + MA + 01701 +
+
+ 720 Prospect + Framingham + MA + 01701 +
+
+ 120 Ridge + MA + 01760 +
+
+