Skip to content

Commit

Permalink
Propagate xml:base to resolve URIs
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris00 committed Dec 16, 2014
1 parent 63db77c commit 7536f94
Show file tree
Hide file tree
Showing 12 changed files with 551 additions and 394 deletions.
301 changes: 168 additions & 133 deletions lib/syndic_atom.ml

Large diffs are not rendered by default.

23 changes: 17 additions & 6 deletions lib/syndic_atom.mli
Original file line number Diff line number Diff line change
Expand Up @@ -475,13 +475,19 @@ val feed :
?subtitle:subtitle ->
id:id -> title:title -> updated:updated -> entry list -> feed

val parse : Xmlm.input -> feed
val parse : ?xmlbase: Uri.t -> Xmlm.input -> feed
(** [parse xml] returns the feed corresponding to [xml]. Beware that
[xml] is mutable, so when the parsing fails, one have to create a
new copy of [xml] to use it with another function.
[xml] is mutable, so when the parsing fails, one has to create a
new copy of [xml] to use it with another function. If you
retrieve [xml] from a URL, you should use that URL as [~xmlbase].
Raise [Error.Expected], [Expected_Data] or [Error.Duplicate_Link]
if [xml] is not a valid Atom document. *)
if [xml] is not a valid Atom document.
@param xmlbase default xml:base to resolve relative URLs (of
course xml:base attributes in the XML Atom document take
precedence over this).
See {{:http://www.w3.org/TR/xmlbase/}XML Base}. *)

val to_xml : feed -> Syndic_xml.t
(** [to_xml f] converts the feed [f] to an XML tree. *)
Expand All @@ -502,10 +508,15 @@ val aggregate : ?id:id -> ?updated:updated -> ?subtitle:subtitle ->

(**/**)

type person = [ `Email of string | `Name of string | `URI of string ] list
type uri = Uri.t option * string
(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not
[None], gives the base URI against which [uri] must be resolved if
it is relative. *)

type person = [ `Email of string | `Name of string | `URI of uri ] list

(** Analysis without verification, enjoy ! *)
val unsafe : Xmlm.input ->
val unsafe : ?xmlbase: Uri.t -> Xmlm.input ->
[> `Feed of
[> `Author of person
| `Category of
Expand Down
34 changes: 21 additions & 13 deletions lib/syndic_common.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ module XML = struct

exception Ignore_namespace

type node = Xmlm.pos * Xmlm.tag * t list

let generate_catcher
?(namespaces=[""])
?(attr_producer=[])
Expand All @@ -19,33 +21,39 @@ module XML = struct
try Some (List.assoc name map)
with _ -> None
in
let rec catch_attr acc pos = function
let rec catch_attr ~xmlbase acc pos = function
| (("http://www.w3.org/XML/1998/namespace", "base"), new_base) :: r ->
let xmlbase = base ~parent:xmlbase new_base in
catch_attr ~xmlbase acc pos r
| attr :: r -> begin
match get_producer (get_attr_name attr) attr_producer with
| Some f when in_namespaces attr ->
catch_attr ((f (get_attr_value attr)) :: acc) pos r
| _ -> catch_attr acc pos r end
| [] -> acc
let acc = (f ~xmlbase (get_attr_value attr)) :: acc in

This comment has been minimized.

Copy link
@dsheets

dsheets Jan 6, 2015

I believe this causes an attribute order dependence between xml:base and other attributes that use relative URIs. If I'm reading the w3.org spec correctly, this should not occur and the xml:base for an element should be used on all of its relative reference attributes. I: a) haven't tested this, b) may be reading the spec wrong, c) don't know if you've somehow handled this elsewhere.

This comment has been minimized.

Copy link
@Chris00

Chris00 Jan 6, 2015

Author Owner

I think you are right. I'll check this tomorrow (xml:base is only supposed to be used for sub-nodes, right?).

This comment has been minimized.

Copy link
@Chris00

Chris00 Jan 7, 2015

Author Owner

The ref says “The base URI for a URI reference appearing in any other attribute value, including default attribute values, is the base URI of the element bearing the attribute.” so the value of xml:node has to be extracted before processing other attributes. Will do that.

catch_attr ~xmlbase acc pos r
| _ -> catch_attr ~xmlbase acc pos r end
| [] -> xmlbase, acc
in
let rec catch_datas acc = function
let rec catch_datas ~xmlbase acc = function
| Node (pos, tag, datas) :: r ->
begin match get_producer (get_tag_name tag) data_producer with
| Some f when in_namespaces tag ->
catch_datas ((f (pos, tag, datas)) :: acc) r
| _ -> catch_datas acc r end
let acc = (f ~xmlbase (pos, tag, datas)) :: acc in
catch_datas ~xmlbase acc r
| _ -> catch_datas ~xmlbase acc r end
| Data (pos, str) :: r ->
begin match leaf_producer with
| Some f -> catch_datas ((f pos str) :: acc) r
| None -> catch_datas acc r end
| Some f -> catch_datas ~xmlbase ((f ~xmlbase pos str) :: acc) r
| None -> catch_datas ~xmlbase acc r end
| [] -> acc
in
let generate (pos, tag, datas) =
maker ~pos (catch_attr (catch_datas [] datas) pos (get_attrs tag))
let generate ~xmlbase ((pos, tag, datas): node) =
let xmlbase, acc = catch_attr ~xmlbase [] pos (get_attrs tag) in
maker ~pos (catch_datas ~xmlbase acc datas)
in generate

let dummy_of_xml ~ctor =
let leaf_producer pos data = ctor data in
let head ~pos = function [] -> ctor ""
let leaf_producer ~xmlbase pos data = ctor ~xmlbase data in
let head ~pos = function [] -> ctor ~xmlbase:None ""
| x :: r -> x in
generate_catcher ~leaf_producer head
end
Expand Down
17 changes: 10 additions & 7 deletions lib/syndic_common.mli
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
module XML : sig
type t = Syndic_xml.t

type node = Xmlm.pos * Xmlm.tag * t list

val generate_catcher :
?namespaces:string list ->
?attr_producer:(string * (string -> 'a)) list ->
?data_producer:(string * (Xmlm.pos * Xmlm.tag * t list -> 'a)) list ->
?leaf_producer:(Xmlm.pos -> string -> 'a) ->
(pos: Xmlm.pos -> 'a list -> 'b) -> Xmlm.pos * Xmlm.tag * t list -> 'b

val dummy_of_xml : ctor:(string -> 'a) ->
Xmlm.pos * Xmlm.tag * t list -> 'a
?attr_producer: (string * (xmlbase: Uri.t option -> string -> 'a)) list ->
?data_producer: (string * (xmlbase: Uri.t option -> node -> 'a)) list ->
?leaf_producer: (xmlbase: Uri.t option -> Xmlm.pos -> string -> 'a) ->
(pos: Xmlm.pos -> 'a list -> 'b) ->
xmlbase: Uri.t option -> node -> 'b

val dummy_of_xml : ctor:(xmlbase: Uri.t option -> string -> 'a) ->
xmlbase: Uri.t option -> node -> 'a
end

module Util : sig
Expand Down
132 changes: 76 additions & 56 deletions lib/syndic_opml1.ml
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,15 @@ let string_of_xml name (pos, _, datas) =
try get_leaf datas
with Not_found -> raise (Error.Error (pos, name ^ " must not be empty"))

let title_of_xml a = `Title(string_of_xml "<title>" a)
let title_of_xml ~xmlbase a = `Title(string_of_xml "<title>" a)

let owner_name_of_xml a = `OwnerName(string_of_xml "<ownerName>" a)
let owner_name_of_xml ~xmlbase a =
`OwnerName(string_of_xml "<ownerName>" a)

let owner_email_of_xml a = `OwnerEmail(string_of_xml "<ownerEmail>" a)
let owner_email_of_xml ~xmlbase a =
`OwnerEmail(string_of_xml "<ownerEmail>" a)

let expansion_state_of_xml (pos, _, datas) =
let expansion_state_of_xml ~xmlbase (pos, _, datas) =
let explode s =
let rec aux acc i =
if i = String.length s then acc
Expand Down Expand Up @@ -73,16 +75,20 @@ let int_of_xml name (pos, _, datas) =
with Not_found -> raise (Error.Error (pos, name ^ " must not be empty"))
| Failure _ -> raise (Error.Error (pos, name ^ " must be an integer"))

let vert_scroll_state_of_xml a =
let vert_scroll_state_of_xml ~xmlbase a =
`VertScrollState(int_of_xml "<vertScrollState>" a)

let window_top_of_xml a = `WindowTop(int_of_xml "<windowTop>" a)
let window_top_of_xml ~xmlbase a =
`WindowTop(int_of_xml "<windowTop>" a)

let window_left_of_xml a = `WindowLeft(int_of_xml "<windowLeft>" a)
let window_left_of_xml ~xmlbase a =
`WindowLeft(int_of_xml "<windowLeft>" a)

let window_bottom_of_xml a = `WindowBottom(int_of_xml "<windowBotton>" a)
let window_bottom_of_xml ~xmlbase a =
`WindowBottom(int_of_xml "<windowBotton>" a)

let window_right_of_xml a = `WindowRight(int_of_xml "<windowRight>" a)
let window_right_of_xml ~xmlbase a =
`WindowRight(int_of_xml "<windowRight>" a)

type head' = [
| `Title of string
Expand Down Expand Up @@ -178,8 +184,11 @@ let date_of_xml name (pos, _, datas) =
try Date.of_rfc822 d
with _ -> raise (Error.Error (pos, sprintf "Date %S incorrect" d))

let date_created_of_xml a = `DateCreated (date_of_xml "<dateCreated>" a)
let date_modified_of_xml a = `DateModified (date_of_xml "<dateModified>" a)
let date_created_of_xml ~xmlbase a =
`DateCreated (date_of_xml "<dateCreated>" a)

let date_modified_of_xml ~xmlbase a =
`DateModified (date_of_xml "<dateModified>" a)

let head_of_xml =
let data_producer = [
Expand All @@ -201,17 +210,17 @@ let head_of_xml =

let head_of_xml' =
let data_producer = [
"title", dummy_of_xml ~ctor:(fun a -> `Title a);
"dateCreated", dummy_of_xml ~ctor:(fun a -> `DateCreated a);
"dateModified", dummy_of_xml ~ctor:(fun a -> `DateModified a);
"ownerName", dummy_of_xml ~ctor:(fun a -> `OwnerName a);
"ownerEmail", dummy_of_xml ~ctor:(fun a -> `OwnerEmail a);
"expansionState", dummy_of_xml ~ctor:(fun a -> `ExpansionSate a);
"vertScrollState", dummy_of_xml ~ctor:(fun a -> `VertScrollState a);
"windowTop", dummy_of_xml ~ctor:(fun a -> `WindowTop a);
"windowLeft", dummy_of_xml ~ctor:(fun a -> `WindowLeft a);
"windowBottom", dummy_of_xml ~ctor:(fun a -> `WindowBottom a);
"windowRight", dummy_of_xml ~ctor:(fun a -> `WindowRight a)
"title", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Title a);
"dateCreated", dummy_of_xml ~ctor:(fun ~xmlbase a -> `DateCreated a);
"dateModified", dummy_of_xml ~ctor:(fun ~xmlbase a -> `DateModified a);

This comment has been minimized.

Copy link
@dsheets

dsheets Jan 6, 2015

Why don't these use the _of_xml functions on lines 187-191 above?

This comment has been minimized.

Copy link
@Chris00

Chris00 Jan 6, 2015

Author Owner

Because it is the prime version and we do not want it to fail because the date has an incorrect format. We just want to return the structure of the document with verbatim content.

"ownerName", dummy_of_xml ~ctor:(fun ~xmlbase a -> `OwnerName a);
"ownerEmail", dummy_of_xml ~ctor:(fun ~xmlbase a -> `OwnerEmail a);
"expansionState", dummy_of_xml ~ctor:(fun ~xmlbase a -> `ExpansionSate a);
"vertScrollState", dummy_of_xml ~ctor:(fun ~xmlbase a -> `VertScrollState a);
"windowTop", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowTop a);
"windowLeft", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowLeft a);
"windowBottom", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowBottom a);
"windowRight", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowRight a)
] in
generate_catcher
~data_producer
Expand All @@ -234,7 +243,8 @@ let outline ?typ ?(is_comment=false) ?(is_breakpoint=false) ?xml_url ?html_url
{ text; typ; is_comment; is_breakpoint; xml_url; html_url;
attrs; outlines }

let rec outline_of_node (pos, ((_outline, attributes): Xmlm.tag), datas) =
let rec outline_of_node ~xmlbase ((pos, ((_, attributes)), datas): node) =
let new_xmlbase = ref xmlbase in
let text = ref ""
and typ = ref None
and is_comment = ref false
Expand All @@ -243,30 +253,33 @@ let rec outline_of_node (pos, ((_outline, attributes): Xmlm.tag), datas) =
and html_url = ref None
and attrs = ref []
and outlines = ref [] in
let process_attrs ((_, name), v) = match name with
| "text" -> text := v
| "type" -> typ := Some v
| "isComment" ->
let process_attrs (name, v) = match name with
| ("http://www.w3.org/XML/1998/namespace", "base") ->
new_xmlbase := XML.base ~parent:xmlbase v
| (_, "text") -> text := v
| (_, "type") -> typ := Some v
| (_, "isComment") ->
(try is_comment := bool_of_string v
with _ -> raise(Error.Error (pos, "<isComment> must have true or \
false value.")))
| "isBreakpoint" ->
| (_, "isBreakpoint") ->
(try is_breakpoint := bool_of_string v
with _ -> raise (Error.Error (pos, "<isBreakpoint> must have true \
or false value.")))
| "xmlUrl" ->
(try xml_url := Some(Uri.of_string v)
| (_, "xmlUrl") ->
(try xml_url := Some( XML.resolve ~xmlbase (Uri.of_string v))
with _ -> raise(Error.Error(pos, "<xmlUrl> content must be an URL")))
| "htmlUrl" ->
(try html_url := Some(Uri.of_string v)
| (_, "htmlUrl") ->
(try html_url := Some( XML.resolve ~xmlbase (Uri.of_string v))
with _ -> raise(Error.Error(pos, "<htmlUrl> content must be an URL")))
| _ ->
| (_, name) ->
attrs := (name, v) :: !attrs in
List.iter process_attrs attributes;
let process_outlines = function
| XML.Node (p, (((ns, name), _) as t), d) ->
if ns = "" && name = "outline" then
outlines := outline_of_node (p, t, d) :: !outlines
outlines := outline_of_node ~xmlbase:!new_xmlbase (p, t, d)
:: !outlines
| XML.Data _ -> () in
List.iter process_outlines datas;
{ text = !text;
Expand All @@ -279,27 +292,32 @@ let rec outline_of_node (pos, ((_outline, attributes): Xmlm.tag), datas) =
outlines = !outlines;
}

let outline_of_xml a = `Outline(outline_of_node a)

let rec outline_of_node' (pos, ((_outline, attributes): Xmlm.tag), datas) =
let el_of_attrs ((_, name), v) = match name with
| "text" -> `Text v
| "type" -> `Type v
| "isComment" -> `IsComment v
| "isBreakpoint" -> `IsBreakpoint v
| "xmlUrl" -> `XML_url v
| "htmlUrl" -> `HTML_url v
| _ -> `Attr (name, v) in
let el = ref (List.map el_of_attrs attributes) in
let outline_of_xml ~xmlbase a = `Outline(outline_of_node ~xmlbase a)

let rec outline_of_node' ~xmlbase ((pos, ((_, attributes)), datas): node) =
let new_xmlbase = ref xmlbase in
let el = ref [] in
let el_of_attrs (name, v) = match name with
| ("http://www.w3.org/XML/1998/namespace", "base") ->
new_xmlbase := XML.base ~parent:xmlbase v
| (_, "text") -> el := `Text v :: !el
| (_, "type") -> el := `Type v :: !el
| (_, "isComment") -> el := `IsComment v :: !el
| (_, "isBreakpoint") -> el := `IsBreakpoint v :: !el
| (_, "xmlUrl") -> el := `XML_url(xmlbase, v) :: !el
| (_, "htmlUrl") -> el := `HTML_url(xmlbase, v) :: !el
| (_, name) -> el := `Attr (name, v) :: !el in
List.iter el_of_attrs attributes;
let process_outlines = function
| XML.Node (p, (((ns, name), _) as t), d) ->
if ns = "" && name = "outline" then
el := `Outline(outline_of_node' (p, t, d)) :: !el
el := `Outline(outline_of_node' ~xmlbase:!new_xmlbase (p, t, d))
:: !el
| XML.Data _ -> () in
List.iter process_outlines datas;
!el

let outline_of_xml' a = `Outline(outline_of_node' a)
let outline_of_xml' ~xmlbase a = `Outline(outline_of_node' ~xmlbase a)

type body = outline list

Expand Down Expand Up @@ -358,7 +376,7 @@ let make_opml ~pos (l : [< opml'] list) =

let opml_of_xml =
let attr_producer = [
"version", (fun a -> `Version a)
"version", (fun ~xmlbase a -> `Version a)
] in
let data_producer = [
"head", head_of_xml;
Expand All @@ -371,7 +389,7 @@ let opml_of_xml =

let opml_of_xml' =
let attr_producer = [
"version", (fun a -> `Version a)
"version", (fun ~xmlbase a -> `Version a)
] in
let data_producer = [
"head", head_of_xml';
Expand All @@ -385,14 +403,14 @@ let opml_of_xml' =
let find_opml l =
find (function XML.Node (_, t, _) -> tag_is t "opml" | _ -> false) l

let parse input =
let parse ?xmlbase input =
match XML.of_xmlm input |> snd with
| XML.Node (pos, tag, data) ->
if tag_is tag "opml" then
opml_of_xml (pos, tag, data)
opml_of_xml ~xmlbase (pos, tag, data)
else
begin match find_opml data with
| Some (XML.Node (p, t, d)) -> opml_of_xml (p, t, d)
| Some (XML.Node (p, t, d)) -> opml_of_xml ~xmlbase (p, t, d)
| _ -> raise (Error.Error ((0, 0),
"document MUST contains exactly one <opml> \
element"))
Expand All @@ -401,14 +419,16 @@ let parse input =
"document MUST contains exactly one <opml> \
element"))

let unsafe input =
type uri = Uri.t option * string

let unsafe ?xmlbase input =
match XML.of_xmlm input |> snd with
| XML.Node (pos, tag, data) ->
if tag_is tag "opml" then
`Opml (opml_of_xml' (pos, tag, data))
`Opml (opml_of_xml' ~xmlbase (pos, tag, data))
else
begin match find_opml data with
| Some (XML.Node (p, t, d)) -> `Opml (opml_of_xml' (p, t, d))
| Some (XML.Node (p, t, d)) -> `Opml (opml_of_xml' ~xmlbase (p, t, d))
| _ -> `Opml []
end
| _ -> `Opml []
Expand Down
Loading

0 comments on commit 7536f94

Please sign in to comment.