Skip to content

Commit

Permalink
recursive schema support
Browse files Browse the repository at this point in the history
  • Loading branch information
giacomociti committed Sep 14, 2016
1 parent 7388b40 commit dca2f9b
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 125 deletions.
2 changes: 1 addition & 1 deletion src/FSharp.Data.Xsd/Xml/XmlProvider.fs
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ type public XmlProvider(cfg:TypeProviderConfig) as this =

using (IO.logTime "TypeGeneration" sample) <| fun _ ->

let ctx = XmlGenerationContext.Create(cultureStr, tpType, globalInference, bindingContext)
let ctx = XmlGenerationContext.Create(cultureStr, tpType, (*globalInference*)true, bindingContext)
let result = XmlTypeBuilder.generateXmlType ctx inferedType

{ GeneratedType = tpType
Expand Down
225 changes: 107 additions & 118 deletions src/FSharp.Data.Xsd/Xml/XsdInference.fs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,12 @@
// valid elements according to the definitions in the given schema.
// The InferedType derived from a schema should be essentialy the same as one
// infered from a significant set of valid samples.
// With this perspective we can support some XSD leveraging the existing functionalities.
// Adopting this perspective we can support XSD leveraging the existing functionalities.
// The implementation uses a simplfied XSD model to split the task of deriving an InferedType:
// - element definitions in xsd files map to this simplified xsd model
// - instances of this xsd model map to InferedType.




namespace ProviderImplementation

open System.Xml
Expand All @@ -27,15 +25,17 @@ module XsdModel =
type IsOptional = bool
type Occurs = decimal * decimal

// reference equality and mutable type allows for cycles
[<ReferenceEquality>]
type XsdElement = { Name: XmlQualifiedName
Type: XsdType
mutable Type: XsdType
SubstitutionGroup: XsdElement list
IsAbstract: bool
IsNillable: bool }

and XsdType = SimpleType of XmlTypeCode | ComplexType of XsdComplexType

and XsdComplexType =
and [<ReferenceEquality>] XsdComplexType =
{ Attributes: (XmlQualifiedName * XmlTypeCode * IsOptional) list
Contents: XsdContent }

Expand Down Expand Up @@ -93,57 +93,41 @@ module XsdParsing =
fun elm -> if subst'.ContainsKey elm then subst'.Item elm else []


// worth memoizing?
let hasCycles element =
let items = System.Collections.Generic.HashSet<XmlSchemaObject>()
let rec closure (obj: XmlSchemaObject) =
let nav innerObj =
if items.Add innerObj then closure innerObj
match obj with
| :? XmlSchemaElement as e ->
if e.RefName.IsEmpty then
nav e.ElementSchemaType
(getSubst e) |> Seq.iter nav
else nav (getElm e.RefName)
| :? XmlSchemaComplexType as c ->
nav c.ContentTypeParticle
| :? XmlSchemaGroupRef as r ->
nav r.Particle
| :? XmlSchemaGroupBase as x ->
x.Items
|> ofType<XmlSchemaObject>
|> Seq.iter nav
| _ -> ()
closure element
items.Contains element

let elements = System.Collections.Generic.Dictionary<XmlSchemaElement, XsdModel.XsdElement>()

member x.getElement name = getElm name
member x.getSubstitutions elm = getSubst elm
member x.isRecursive elm = hasCycles elm
member x.Elements = elements


open XsdModel

let rec parseElement (ctx: ParsingContext) elm =
match ctx.Elements.TryGetValue elm with
| true, x -> x
| _ ->
let substitutionGroup =
ctx.getSubstitutions elm
|> List.filter (fun x -> x <> elm)
|> List.map (parseElement ctx)
let elementType =
if ctx.isRecursive elm
then // empty content
ComplexType { Attributes = []; Contents = ComplexContent Empty }
else
match elm.ElementSchemaType with
| :? XmlSchemaSimpleType as x -> SimpleType x.Datatype.TypeCode
| :? XmlSchemaComplexType as x -> ComplexType (parseComplexType ctx x)
| x -> failwithf "unknown ElementSchemaType: %A" x
{ Name = elm.QualifiedName
Type = elementType
SubstitutionGroup = substitutionGroup
IsAbstract = elm.IsAbstract
IsNillable = elm.IsNillable }

// another attempt in case the element is put while parsing substitution groups
match ctx.Elements.TryGetValue elm with
| true, x -> x
| _ ->
let result =
{ Name = elm.QualifiedName
Type = XsdType.SimpleType XmlTypeCode.None // temporary dummy value
SubstitutionGroup = substitutionGroup
IsAbstract = elm.IsAbstract
IsNillable = elm.IsNillable }
ctx.Elements.Add(elm, result)
// computing the real type after filling the dictionary allows for cycles
result.Type <-
match elm.ElementSchemaType with
| :? XmlSchemaSimpleType as x -> SimpleType x.Datatype.TypeCode
| :? XmlSchemaComplexType as x -> ComplexType (parseComplexType ctx x)
| x -> failwithf "unknown ElementSchemaType: %A" x
result

and parseComplexType (ctx: ParsingContext) (x: XmlSchemaComplexType) =
{ Attributes =
Expand Down Expand Up @@ -174,21 +158,23 @@ module XsdParsing =
group.Items
|> ofType<XmlSchemaParticle>
|> Seq.map (parseParticle ctx)
|> List.ofSeq // beware of recursive schemas
|> List.ofSeq
match group with
| :? XmlSchemaAll -> All (occurs, particles)
| :? XmlSchemaChoice -> Choice (occurs, particles)
| :? XmlSchemaSequence -> Sequence (occurs, particles)
| _ -> failwithf "unknown group base: %A" group

match par with
| :? XmlSchemaAny -> Any occurs
| :? XmlSchemaGroupBase as grp -> parseParticles grp
| :? XmlSchemaGroupRef as grpRef -> parseParticle ctx grpRef.Particle
| :? XmlSchemaElement as elm ->
let e = if elm.RefName.IsEmpty then elm else ctx.getElement elm.RefName
Element (occurs, parseElement ctx e)
| _ -> Empty // XmlSchemaParticle.EmptyParticle
let result =
match par with
| :? XmlSchemaAny -> Any occurs
| :? XmlSchemaGroupBase as grp -> parseParticles grp
| :? XmlSchemaGroupRef as grpRef -> parseParticle ctx grpRef.Particle
| :? XmlSchemaElement as elm ->
let e = if elm.RefName.IsEmpty then elm else ctx.getElement elm.RefName
Element (occurs, parseElement ctx e)
| _ -> Empty // XmlSchemaParticle.EmptyParticle
result

let getElements schema =
let ctx = ParsingContext schema
Expand Down Expand Up @@ -240,7 +226,9 @@ module XsdParsing =
cache.Set(uri, value)
value
|> fun value ->
// what if it's not UTF8?
// what if it's not UTF8? probably we should peek the xml string
// looking for an encoding declaration.
// the best solution would be to have the cache store the raw bytes
let bytes = System.Text.Encoding.UTF8.GetBytes value
new System.IO.MemoryStream(bytes) :> obj
else base.GetEntity(absoluteUri, role, ofObjectToReturn)
Expand Down Expand Up @@ -299,8 +287,65 @@ module XsdInference =
let nil = { InferedProperty.Name = "{http://www.w3.org/2001/XMLSchema-instance}nil"
Type = InferedType.Primitive(typeof<bool>, None, true) }

type Ctx = System.Collections.Generic.Dictionary<XsdComplexType, InferedProperty>

// derives an InferedType for an element definition
let rec inferElementType (ctx: Ctx) (elm: XsdElement) =
let name = getElementName elm
if elm.IsAbstract
then InferedType.Record(name, [], optional = false)
else
match elm.Type with
| SimpleType typeCode ->
let ty = InferedType.Primitive (getType typeCode, None, elm.IsNillable)
let prop = { InferedProperty.Name = ""; Type = ty }
let props = if elm.IsNillable then [prop; nil] else [prop]
InferedType.Record(name, props, optional = false)
| ComplexType cty ->
let props = inferProperties ctx cty
let props =
if elm.IsNillable then
for prop in props do
prop.Type <- prop.Type.EnsuresHandlesMissingValues false
nil::props
else props
InferedType.Record(name, props, optional = false)


and inferProperties (ctx: Ctx) cty =
let attrs: InferedProperty list =
cty.Attributes
|> List.map (fun (name, typeCode, optional) ->
{ Name = name.Name
Type = InferedType.Primitive (getType typeCode, None, optional) } )

match cty.Contents with
| SimpleContent typeCode ->
let body = { InferedProperty.Name = ""
Type = InferedType.Primitive (getType typeCode, None, false)}
body::attrs
| ComplexContent xsdParticle ->
let body =
if ctx.ContainsKey cty then ctx.Item cty else
let result = { InferedProperty.Name = ""; Type = InferedType.Top }
ctx.Add(cty, result)
let getRecordTag (e:XsdElement) = InferedTypeTag.Record(getElementName e)
result.Type <-
match getElements ctx Single xsdParticle with
| [] -> InferedType.Null
| items ->
let tags = items |> List.map (fst >> getRecordTag)
let types =
items
|> Seq.zip tags
|> Seq.map (fun (tag, (e, m)) -> tag, (m, inferElementType ctx e))
|> Map.ofSeq
InferedType.Collection(tags, types)
result
if body.Type = InferedType.Null then attrs else body::attrs

// collects element definitions in a particle
let rec getElements parentMultiplicity = function
and getElements (ctx: Ctx) parentMultiplicity = function
| XsdParticle.Element(occ, elm) ->
let mult = combineMultiplicity(parentMultiplicity, getMultiplicity occ)
match elm.IsAbstract, elm.SubstitutionGroup with
Expand All @@ -311,78 +356,22 @@ module XsdInference =
| XsdParticle.Sequence (occ, particles)
| XsdParticle.All (occ, particles) ->
let mult = combineMultiplicity(parentMultiplicity, getMultiplicity occ)
particles |> List.collect (getElements mult)
particles |> List.collect (getElements ctx mult)
| XsdParticle.Choice (occ, particles) ->
let mult = makeOptional (getMultiplicity occ)
let mult' = combineMultiplicity(parentMultiplicity, mult)
particles |> List.collect (getElements mult')
particles |> List.collect (getElements ctx mult')
| XsdParticle.Empty -> []
| XsdParticle.Any _ -> []


// derives an InferedType for an element definition
and inferElementType (elm: XsdElement) =
let name = getElementName elm
if elm.IsAbstract
then InferedType.Record(name, [], optional = false)
else
match elm.Type with
| SimpleType typeCode ->
let ty = InferedType.Primitive (getType typeCode, None, elm.IsNillable)
let prop = { InferedProperty.Name = ""; Type = ty }
let props = if elm.IsNillable then [prop; nil] else [prop]
InferedType.Record(name, props, optional = false)
| ComplexType cty ->
let props = inferProperties cty
let props =
if elm.IsNillable then
for prop in props do
prop.Type <- prop.Type.EnsuresHandlesMissingValues false
nil::props
else props
InferedType.Record(name, props, optional = false)

and inferElements = function
let inferElements elms =
match elms |> List.filter (fun elm -> not elm.IsAbstract) with
| [] -> failwith "No suitable element definition found in the schema."
| [elm] -> inferElementType elm
| [elm] -> inferElementType (Ctx()) elm
| elms ->
elms
|> List.map (fun elm ->
InferedTypeTag.Record (getElementName elm), inferElementType elm)
InferedTypeTag.Record (getElementName elm), inferElementType (Ctx()) elm)
|> Map.ofList
|> InferedType.Heterogeneous


and inferProperties cty =
let attrs: InferedProperty list =
cty.Attributes
|> List.map (fun (name, typeCode, optional) ->
{ Name = name.Name
Type = InferedType.Primitive (getType typeCode, None, optional) } )
match cty.Contents with
| SimpleContent typeCode ->
let ty = InferedType.Primitive (getType typeCode, None, false)
{ Name = ""; Type = ty }::attrs
| ComplexContent xsdParticle ->
match inferParticle InferedMultiplicity.Single xsdParticle with
| InferedTypeTag.Null, _ -> attrs // empty content
| _tag, (_mul, ty) -> { Name = ""; Type = ty }::attrs


and inferParticle (parentMultiplicity: InferedMultiplicity) particle =
let getRecordTag (e:XsdElement) = InferedTypeTag.Record(getElementName e)
match getElements parentMultiplicity particle with
| [] ->
InferedTypeTag.Null,
(InferedMultiplicity.OptionalSingle, InferedType.Null)
| items ->
let tags = items |> List.map (fst >> getRecordTag)

let types =
items
|> Seq.zip tags
|> Seq.map (fun (tag, (e, m)) -> tag, (m, inferElementType e))
|> Map.ofSeq
InferedTypeTag.Collection,
(parentMultiplicity, InferedType.Collection(tags, types))

12 changes: 6 additions & 6 deletions tests/FSharp.Data.Xsd.ProviderTests/Examples.fs
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ type recursiveElements = XmlProvider<Schema = """
""">

[<Test>]
let ``recursive elements have only the XElement property``() =
let ``recursive elements are supported``() =
let doc = recursiveElements.Parse """
<italic>
<bold></bold>
Expand All @@ -165,11 +165,11 @@ let ``recursive elements have only the XElement property``() =
printfn "%A" doc.XElement
match doc.Bold, doc.Italic, doc.Underline with
| None, Some x, None ->
x.XElement.Elements(XName.Get "bold") |> Seq.length |> should equal 2
x.XElement.Elements(XName.Get "italic") |> Seq.length |> should equal 0
x.XElement.Elements(XName.Get "underline") |> Seq.length |> should equal 1


x.Bolds.Length |> should equal 2
x.Italics.Length |>should equal 0
x.Underlines.Length |> should equal 1
x.Bolds.[1].Bolds.Length |> should equal 1
| _ -> failwith "unexpected"


Expand Down

0 comments on commit dca2f9b

Please sign in to comment.