From dca2f9b8de367417cc32b242ecfe64d3f69c0989 Mon Sep 17 00:00:00 2001 From: giacomociti Date: Wed, 14 Sep 2016 22:25:22 +0200 Subject: [PATCH] recursive schema support --- src/FSharp.Data.Xsd/Xml/XmlProvider.fs | 2 +- src/FSharp.Data.Xsd/Xml/XsdInference.fs | 225 +++++++++--------- .../FSharp.Data.Xsd.ProviderTests/Examples.fs | 12 +- 3 files changed, 114 insertions(+), 125 deletions(-) diff --git a/src/FSharp.Data.Xsd/Xml/XmlProvider.fs b/src/FSharp.Data.Xsd/Xml/XmlProvider.fs index da6a54a..d1ceb44 100644 --- a/src/FSharp.Data.Xsd/Xml/XmlProvider.fs +++ b/src/FSharp.Data.Xsd/Xml/XmlProvider.fs @@ -83,7 +83,7 @@ type public XmlProvider(cfg:TypeProviderConfig) as this = using (IO.logTime "TypeGeneration" sample) <| fun _ -> - let ctx = XmlGenerationContext.Create(cultureStr, tpType, globalInference, bindingContext) + let ctx = XmlGenerationContext.Create(cultureStr, tpType, (*globalInference*)true, bindingContext) let result = XmlTypeBuilder.generateXmlType ctx inferedType { GeneratedType = tpType diff --git a/src/FSharp.Data.Xsd/Xml/XsdInference.fs b/src/FSharp.Data.Xsd/Xml/XsdInference.fs index 4204b90..a6f4451 100644 --- a/src/FSharp.Data.Xsd/Xml/XsdInference.fs +++ b/src/FSharp.Data.Xsd/Xml/XsdInference.fs @@ -8,14 +8,12 @@ // valid elements according to the definitions in the given schema. // The InferedType derived from a schema should be essentialy the same as one // infered from a significant set of valid samples. -// With this perspective we can support some XSD leveraging the existing functionalities. +// Adopting this perspective we can support XSD leveraging the existing functionalities. // The implementation uses a simplfied XSD model to split the task of deriving an InferedType: // - element definitions in xsd files map to this simplified xsd model // - instances of this xsd model map to InferedType. - - namespace ProviderImplementation open System.Xml @@ -27,15 +25,17 @@ module XsdModel = type IsOptional = bool type Occurs = decimal * decimal + // reference equality and mutable type allows for cycles + [] type XsdElement = { Name: XmlQualifiedName - Type: XsdType + mutable Type: XsdType SubstitutionGroup: XsdElement list IsAbstract: bool IsNillable: bool } and XsdType = SimpleType of XmlTypeCode | ComplexType of XsdComplexType - and XsdComplexType = + and [] XsdComplexType = { Attributes: (XmlQualifiedName * XmlTypeCode * IsOptional) list Contents: XsdContent } @@ -93,57 +93,41 @@ module XsdParsing = fun elm -> if subst'.ContainsKey elm then subst'.Item elm else [] - // worth memoizing? - let hasCycles element = - let items = System.Collections.Generic.HashSet() - let rec closure (obj: XmlSchemaObject) = - let nav innerObj = - if items.Add innerObj then closure innerObj - match obj with - | :? XmlSchemaElement as e -> - if e.RefName.IsEmpty then - nav e.ElementSchemaType - (getSubst e) |> Seq.iter nav - else nav (getElm e.RefName) - | :? XmlSchemaComplexType as c -> - nav c.ContentTypeParticle - | :? XmlSchemaGroupRef as r -> - nav r.Particle - | :? XmlSchemaGroupBase as x -> - x.Items - |> ofType - |> Seq.iter nav - | _ -> () - closure element - items.Contains element - + let elements = System.Collections.Generic.Dictionary() member x.getElement name = getElm name member x.getSubstitutions elm = getSubst elm - member x.isRecursive elm = hasCycles elm + member x.Elements = elements + open XsdModel let rec parseElement (ctx: ParsingContext) elm = + match ctx.Elements.TryGetValue elm with + | true, x -> x + | _ -> let substitutionGroup = ctx.getSubstitutions elm |> List.filter (fun x -> x <> elm) |> List.map (parseElement ctx) - let elementType = - if ctx.isRecursive elm - then // empty content - ComplexType { Attributes = []; Contents = ComplexContent Empty } - else - match elm.ElementSchemaType with - | :? XmlSchemaSimpleType as x -> SimpleType x.Datatype.TypeCode - | :? XmlSchemaComplexType as x -> ComplexType (parseComplexType ctx x) - | x -> failwithf "unknown ElementSchemaType: %A" x - { Name = elm.QualifiedName - Type = elementType - SubstitutionGroup = substitutionGroup - IsAbstract = elm.IsAbstract - IsNillable = elm.IsNillable } - + // another attempt in case the element is put while parsing substitution groups + match ctx.Elements.TryGetValue elm with + | true, x -> x + | _ -> + let result = + { Name = elm.QualifiedName + Type = XsdType.SimpleType XmlTypeCode.None // temporary dummy value + SubstitutionGroup = substitutionGroup + IsAbstract = elm.IsAbstract + IsNillable = elm.IsNillable } + ctx.Elements.Add(elm, result) + // computing the real type after filling the dictionary allows for cycles + result.Type <- + match elm.ElementSchemaType with + | :? XmlSchemaSimpleType as x -> SimpleType x.Datatype.TypeCode + | :? XmlSchemaComplexType as x -> ComplexType (parseComplexType ctx x) + | x -> failwithf "unknown ElementSchemaType: %A" x + result and parseComplexType (ctx: ParsingContext) (x: XmlSchemaComplexType) = { Attributes = @@ -174,21 +158,23 @@ module XsdParsing = group.Items |> ofType |> Seq.map (parseParticle ctx) - |> List.ofSeq // beware of recursive schemas + |> List.ofSeq match group with | :? XmlSchemaAll -> All (occurs, particles) | :? XmlSchemaChoice -> Choice (occurs, particles) | :? XmlSchemaSequence -> Sequence (occurs, particles) | _ -> failwithf "unknown group base: %A" group - match par with - | :? XmlSchemaAny -> Any occurs - | :? XmlSchemaGroupBase as grp -> parseParticles grp - | :? XmlSchemaGroupRef as grpRef -> parseParticle ctx grpRef.Particle - | :? XmlSchemaElement as elm -> - let e = if elm.RefName.IsEmpty then elm else ctx.getElement elm.RefName - Element (occurs, parseElement ctx e) - | _ -> Empty // XmlSchemaParticle.EmptyParticle + let result = + match par with + | :? XmlSchemaAny -> Any occurs + | :? XmlSchemaGroupBase as grp -> parseParticles grp + | :? XmlSchemaGroupRef as grpRef -> parseParticle ctx grpRef.Particle + | :? XmlSchemaElement as elm -> + let e = if elm.RefName.IsEmpty then elm else ctx.getElement elm.RefName + Element (occurs, parseElement ctx e) + | _ -> Empty // XmlSchemaParticle.EmptyParticle + result let getElements schema = let ctx = ParsingContext schema @@ -240,7 +226,9 @@ module XsdParsing = cache.Set(uri, value) value |> fun value -> - // what if it's not UTF8? + // what if it's not UTF8? probably we should peek the xml string + // looking for an encoding declaration. + // the best solution would be to have the cache store the raw bytes let bytes = System.Text.Encoding.UTF8.GetBytes value new System.IO.MemoryStream(bytes) :> obj else base.GetEntity(absoluteUri, role, ofObjectToReturn) @@ -299,8 +287,65 @@ module XsdInference = let nil = { InferedProperty.Name = "{http://www.w3.org/2001/XMLSchema-instance}nil" Type = InferedType.Primitive(typeof, None, true) } + type Ctx = System.Collections.Generic.Dictionary + + // derives an InferedType for an element definition + let rec inferElementType (ctx: Ctx) (elm: XsdElement) = + let name = getElementName elm + if elm.IsAbstract + then InferedType.Record(name, [], optional = false) + else + match elm.Type with + | SimpleType typeCode -> + let ty = InferedType.Primitive (getType typeCode, None, elm.IsNillable) + let prop = { InferedProperty.Name = ""; Type = ty } + let props = if elm.IsNillable then [prop; nil] else [prop] + InferedType.Record(name, props, optional = false) + | ComplexType cty -> + let props = inferProperties ctx cty + let props = + if elm.IsNillable then + for prop in props do + prop.Type <- prop.Type.EnsuresHandlesMissingValues false + nil::props + else props + InferedType.Record(name, props, optional = false) + + + and inferProperties (ctx: Ctx) cty = + let attrs: InferedProperty list = + cty.Attributes + |> List.map (fun (name, typeCode, optional) -> + { Name = name.Name + Type = InferedType.Primitive (getType typeCode, None, optional) } ) + + match cty.Contents with + | SimpleContent typeCode -> + let body = { InferedProperty.Name = "" + Type = InferedType.Primitive (getType typeCode, None, false)} + body::attrs + | ComplexContent xsdParticle -> + let body = + if ctx.ContainsKey cty then ctx.Item cty else + let result = { InferedProperty.Name = ""; Type = InferedType.Top } + ctx.Add(cty, result) + let getRecordTag (e:XsdElement) = InferedTypeTag.Record(getElementName e) + result.Type <- + match getElements ctx Single xsdParticle with + | [] -> InferedType.Null + | items -> + let tags = items |> List.map (fst >> getRecordTag) + let types = + items + |> Seq.zip tags + |> Seq.map (fun (tag, (e, m)) -> tag, (m, inferElementType ctx e)) + |> Map.ofSeq + InferedType.Collection(tags, types) + result + if body.Type = InferedType.Null then attrs else body::attrs + // collects element definitions in a particle - let rec getElements parentMultiplicity = function + and getElements (ctx: Ctx) parentMultiplicity = function | XsdParticle.Element(occ, elm) -> let mult = combineMultiplicity(parentMultiplicity, getMultiplicity occ) match elm.IsAbstract, elm.SubstitutionGroup with @@ -311,78 +356,22 @@ module XsdInference = | XsdParticle.Sequence (occ, particles) | XsdParticle.All (occ, particles) -> let mult = combineMultiplicity(parentMultiplicity, getMultiplicity occ) - particles |> List.collect (getElements mult) + particles |> List.collect (getElements ctx mult) | XsdParticle.Choice (occ, particles) -> let mult = makeOptional (getMultiplicity occ) let mult' = combineMultiplicity(parentMultiplicity, mult) - particles |> List.collect (getElements mult') + particles |> List.collect (getElements ctx mult') | XsdParticle.Empty -> [] | XsdParticle.Any _ -> [] - // derives an InferedType for an element definition - and inferElementType (elm: XsdElement) = - let name = getElementName elm - if elm.IsAbstract - then InferedType.Record(name, [], optional = false) - else - match elm.Type with - | SimpleType typeCode -> - let ty = InferedType.Primitive (getType typeCode, None, elm.IsNillable) - let prop = { InferedProperty.Name = ""; Type = ty } - let props = if elm.IsNillable then [prop; nil] else [prop] - InferedType.Record(name, props, optional = false) - | ComplexType cty -> - let props = inferProperties cty - let props = - if elm.IsNillable then - for prop in props do - prop.Type <- prop.Type.EnsuresHandlesMissingValues false - nil::props - else props - InferedType.Record(name, props, optional = false) - - and inferElements = function + let inferElements elms = + match elms |> List.filter (fun elm -> not elm.IsAbstract) with | [] -> failwith "No suitable element definition found in the schema." - | [elm] -> inferElementType elm + | [elm] -> inferElementType (Ctx()) elm | elms -> elms |> List.map (fun elm -> - InferedTypeTag.Record (getElementName elm), inferElementType elm) + InferedTypeTag.Record (getElementName elm), inferElementType (Ctx()) elm) |> Map.ofList |> InferedType.Heterogeneous - - - and inferProperties cty = - let attrs: InferedProperty list = - cty.Attributes - |> List.map (fun (name, typeCode, optional) -> - { Name = name.Name - Type = InferedType.Primitive (getType typeCode, None, optional) } ) - match cty.Contents with - | SimpleContent typeCode -> - let ty = InferedType.Primitive (getType typeCode, None, false) - { Name = ""; Type = ty }::attrs - | ComplexContent xsdParticle -> - match inferParticle InferedMultiplicity.Single xsdParticle with - | InferedTypeTag.Null, _ -> attrs // empty content - | _tag, (_mul, ty) -> { Name = ""; Type = ty }::attrs - - - and inferParticle (parentMultiplicity: InferedMultiplicity) particle = - let getRecordTag (e:XsdElement) = InferedTypeTag.Record(getElementName e) - match getElements parentMultiplicity particle with - | [] -> - InferedTypeTag.Null, - (InferedMultiplicity.OptionalSingle, InferedType.Null) - | items -> - let tags = items |> List.map (fst >> getRecordTag) - - let types = - items - |> Seq.zip tags - |> Seq.map (fun (tag, (e, m)) -> tag, (m, inferElementType e)) - |> Map.ofSeq - InferedTypeTag.Collection, - (parentMultiplicity, InferedType.Collection(tags, types)) - diff --git a/tests/FSharp.Data.Xsd.ProviderTests/Examples.fs b/tests/FSharp.Data.Xsd.ProviderTests/Examples.fs index 3e446af..a6c1f27 100644 --- a/tests/FSharp.Data.Xsd.ProviderTests/Examples.fs +++ b/tests/FSharp.Data.Xsd.ProviderTests/Examples.fs @@ -151,7 +151,7 @@ type recursiveElements = XmlProvider [] -let ``recursive elements have only the XElement property``() = +let ``recursive elements are supported``() = let doc = recursiveElements.Parse """ @@ -165,11 +165,11 @@ let ``recursive elements have only the XElement property``() = printfn "%A" doc.XElement match doc.Bold, doc.Italic, doc.Underline with | None, Some x, None -> - x.XElement.Elements(XName.Get "bold") |> Seq.length |> should equal 2 - x.XElement.Elements(XName.Get "italic") |> Seq.length |> should equal 0 - x.XElement.Elements(XName.Get "underline") |> Seq.length |> should equal 1 - - + x.Bolds.Length |> should equal 2 + x.Italics.Length |>should equal 0 + x.Underlines.Length |> should equal 1 + x.Bolds.[1].Bolds.Length |> should equal 1 + | _ -> failwith "unexpected"