From 7536f9414da5fb15f1cdf85178644bafd5fbcd84 Mon Sep 17 00:00:00 2001 From: Christophe Troestler Date: Tue, 16 Dec 2014 23:22:39 +0100 Subject: [PATCH] Propagate xml:base to resolve URIs See https://github.com/Cumulus/Syndic/issues/32 as well as https://github.com/ocaml/platform-blog/issues/12 --- lib/syndic_atom.ml | 301 +++++++++++++++++++++++------------------- lib/syndic_atom.mli | 23 +++- lib/syndic_common.ml | 34 +++-- lib/syndic_common.mli | 17 ++- lib/syndic_opml1.ml | 132 ++++++++++-------- lib/syndic_opml1.mli | 13 +- lib/syndic_rss1.ml | 84 ++++++------ lib/syndic_rss1.mli | 39 +++--- lib/syndic_rss2.ml | 259 +++++++++++++++++++++--------------- lib/syndic_rss2.mli | 29 ++-- lib/syndic_xml.ml | 9 ++ lib/syndic_xml.mli | 5 + 12 files changed, 551 insertions(+), 394 deletions(-) diff --git a/lib/syndic_atom.ml b/lib/syndic_atom.ml index 49d934b..1b954c3 100644 --- a/lib/syndic_atom.ml +++ b/lib/syndic_atom.ml @@ -30,7 +30,7 @@ let link ?type_media ?hreflang ?title ?length ~rel href = { href ; rel ; type_media ; hreflang ; title ; length } type link' = [ - | `HREF of string + | `HREF of Uri.t | `Rel of string | `Type of string | `HREFLang of string @@ -88,6 +88,8 @@ type author = email: string option; } +let dummy_author = { name = ""; uri = None; email = None } + let author ?uri ?email name = { uri ; email ; name } @@ -103,7 +105,7 @@ let make_person datas ~pos (l : [< person'] list) = | Some (`Name s) -> s | _ -> (* The spec mandates that name - but severay feed just do name *) + but several feeds just do name *) get_leaf datas in (* element atom:uri { atomUri }? *) let uri = match find (function `URI _ -> true | _ -> false) l with @@ -120,17 +122,17 @@ let make_person datas ~pos (l : [< person'] list) = let make_author datas ~pos a = `Author(make_person datas ~pos a) -let person_name_of_xml (pos, tag, datas) = +let person_name_of_xml ~xmlbase (pos, tag, datas) = `Name(try get_leaf datas with Not_found -> "") (* mandatory ? *) -let person_uri_of_xml (pos, tag, datas) = - try `URI(Uri.of_string (get_leaf datas)) +let person_uri_of_xml ~xmlbase (pos, tag, datas) = + try `URI(XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) with Not_found -> raise (Error.Error (pos, "The content of MUST be \ a non-empty string")) -let person_email_of_xml (pos, tag, datas) = +let person_email_of_xml ~xmlbase (pos, tag, datas) = `Email(try get_leaf datas with Not_found -> "") (* mandatory ? *) @@ -151,16 +153,17 @@ let person_data_producer = [ ("uri", person_uri_of_xml); ("email", person_email_of_xml); ] -let author_of_xml ((_, _, datas) as xml) = +let author_of_xml ~xmlbase ((_, _, datas) as xml) = generate_catcher ~namespaces ~data_producer:person_data_producer - (make_author datas) xml + (make_author datas) ~xmlbase xml -type person = [ `Email of string | `Name of string | `URI of string ] list +type uri = Uri.t option * string +type person = [ `Email of string | `Name of string | `URI of uri ] list let person_data_producer' = [ - ("name", dummy_of_xml ~ctor:(fun a -> `Name a)); - ("uri", dummy_of_xml ~ctor:(fun a -> `URI a)); - ("email", dummy_of_xml ~ctor:(fun a -> `Email a)); + ("name", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Name a)); + ("uri", dummy_of_xml ~ctor:(fun ~xmlbase a -> `URI(xmlbase, a))); + ("email", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Email a)); ] let author_of_xml' = generate_catcher ~namespaces ~data_producer:person_data_producer' @@ -178,7 +181,7 @@ let category ?scheme ?label term = type category' = [ | `Term of string - | `Scheme of string + | `Scheme of Uri.t | `Label of string ] @@ -194,7 +197,7 @@ let make_category ~pos (l : [< category'] list) = (* attribute scheme { atomUri }? *) let scheme = match find (function `Scheme _ -> true | _ -> false) l with - | Some (`Scheme u) -> Some (Uri.of_string u) + | Some (`Scheme u) -> Some u | _ -> None in (* attribute label { text }? *) @@ -204,6 +207,8 @@ let make_category ~pos (l : [< category'] list) = in `Category({ term; scheme; label; } : category) +let scheme_of_xml ~xmlbase a = + `Scheme(XML.resolve ~xmlbase (Uri.of_string a)) (* atomCategory = element atom:category { @@ -214,21 +219,25 @@ let make_category ~pos (l : [< category'] list) = undefinedContent } *) -let category_of_xml, category_of_xml' = - let attr_producer = [ - ("term", (fun a -> `Term a)); - ("scheme", (fun a -> `Scheme a)); - ("label", (fun a -> `Label a)) - ] in - generate_catcher ~attr_producer make_category, +let category_attr_producer = [ + ("term", (fun ~xmlbase a -> `Term a)); + ("label", (fun ~xmlbase a -> `Label a)) + ] +let category_of_xml = + let attr_producer = ("scheme", scheme_of_xml) :: category_attr_producer in + generate_catcher ~attr_producer make_category + +let category_of_xml' = + let attr_producer = ("scheme", (fun ~xmlbase a -> `Scheme a)) + :: category_attr_producer in generate_catcher ~attr_producer (fun ~pos x -> `Category x) let make_contributor datas ~pos a = `Contributor(make_person datas ~pos a) -let contributor_of_xml ((_, _, datas) as xml) = +let contributor_of_xml ~xmlbase ((_, _, datas) as xml) = generate_catcher ~namespaces ~data_producer:person_data_producer - (make_contributor datas) xml + (make_contributor datas) ~xmlbase xml let contributor_of_xml' = generate_catcher ~namespaces ~data_producer:person_data_producer' @@ -244,7 +253,7 @@ type generator = let generator ?uri ?version content = { uri ; version ; content } type generator' = [ - | `URI of string + | `URI of Uri.t | `Version of string | `Content of string ] @@ -264,11 +273,14 @@ let make_generator ~pos (l : [< generator'] list) = in (* attribute uri { atomUri }? *) let uri = match find (function `URI _ -> true | _ -> false) l with - | Some ((`URI u)) -> Some (Uri.of_string u) + | Some (`URI u) -> Some u | _ -> None in `Generator({ version; uri; content; } : generator) +let generator_uri_of_xml ~xmlbase a = + `URI(XML.resolve ~xmlbase (Uri.of_string a)) + (* atomGenerator = element atom:generator { atomCommonAttributes, attribute uri { atomUri }?, @@ -276,23 +288,29 @@ let make_generator ~pos (l : [< generator'] list) = text } *) -let generator_of_xml, generator_of_xml' = +let generator_of_xml = + let attr_producer = [ + ("version", (fun ~xmlbase a -> `Version a)); + ("uri", generator_uri_of_xml); + ] in + let leaf_producer ~xmlbase pos data = `Content data in + generate_catcher ~attr_producer ~leaf_producer make_generator + +let generator_of_xml' = let attr_producer = [ - ("version", (fun a -> `Version a)); - ("uri", (fun a -> `URI a)); + ("version", (fun ~xmlbase a -> `Version a)); + ("uri", (fun ~xmlbase a -> `URI a)); ] in - let leaf_producer pos data = `Content data in - generate_catcher ~attr_producer ~leaf_producer make_generator, + let leaf_producer ~xmlbase pos data = `Content data in generate_catcher ~attr_producer ~leaf_producer (fun ~pos x -> `Generator x) type icon = Uri.t -type icon' = [ `URI of string ] -let make_icon ~pos (l : [< icon'] list) = +let make_icon ~pos (l : Uri.t list) = (** (atomUri) *) - let uri = match find (fun (`URI _) -> true) l with - | Some (`URI u) -> (Uri.of_string u) - | _ -> raise (Error.Error (pos, + let uri = match l with + | u :: _ -> u + | [] -> raise (Error.Error (pos, "The content of MUST be \ a non-empty string")) in @@ -302,9 +320,13 @@ let make_icon ~pos (l : [< icon'] list) = atomCommonAttributes, } *) -let icon_of_xml, icon_of_xml' = - let leaf_producer pos data = `URI data in - generate_catcher ~leaf_producer make_icon, +let icon_of_xml = + let leaf_producer ~xmlbase pos data = + XML.resolve ~xmlbase (Uri.of_string data) in + generate_catcher ~leaf_producer make_icon + +let icon_of_xml' = + let leaf_producer ~xmlbase pos data = `URI data in generate_catcher ~leaf_producer (fun ~pos x -> `Icon x) @@ -326,7 +348,7 @@ let make_id ~pos (l : string list) = } *) let id_of_xml, id_of_xml' = - let leaf_producer pos data = data in + let leaf_producer ~xmlbase pos data = data in generate_catcher ~leaf_producer make_id, generate_catcher ~leaf_producer (fun ~pos x -> `ID x) @@ -336,12 +358,16 @@ let rel_of_string s = match String.lowercase (String.trim s) with | "self" -> Self | "enclosure" -> Enclosure | "via" -> Via - | uri -> Link (Uri.of_string uri) (* RFC 4287 § 4.2.7.2 *) + | uri -> + (* RFC 4287 § 4.2.7.2: the use of a relative reference other than + a simple name is not allowed. Thus no need to resolve against + xml:base. *) + Link (Uri.of_string uri) let make_link ~pos (l : [< link'] list) = (* attribute href { atomUri } *) let href = match find (function `HREF _ -> true | _ -> false) l with - | Some (`HREF u) -> (Uri.of_string u) + | Some (`HREF u) -> u | _ -> raise (Error.Error (pos, "Link elements MUST have a 'href' \ @@ -375,6 +401,9 @@ let make_link ~pos (l : [< link'] list) = in `Link({ href; rel; type_media; hreflang; title; length; } : link) +let link_href_of_xml ~xmlbase a = + `HREF(XML.resolve ~xmlbase (Uri.of_string a)) + (* atomLink = element atom:link { atomCommonAttributes, @@ -387,26 +416,30 @@ let make_link ~pos (l : [< link'] list) = undefinedContent } *) -let link_of_xml, link_of_xml' = - let attr_producer = [ - ("href", (fun a -> `HREF a)); - ("rel", (fun a -> `Rel a)); - ("type", (fun a -> `Type a)); - ("hreflang", (fun a -> `HREFLang a)); - ("title", (fun a -> `Title a)); - ("length", (fun a -> `Length a)); - ] in - generate_catcher ~attr_producer make_link, +let link_attr_producer = [ + ("rel", (fun ~xmlbase a -> `Rel a)); + ("type", (fun ~xmlbase a -> `Type a)); + ("hreflang", (fun ~xmlbase a -> `HREFLang a)); + ("title", (fun ~xmlbase a -> `Title a)); + ("length", (fun ~xmlbase a -> `Length a)); + ] + +let link_of_xml = + let attr_producer = ("href", link_href_of_xml) :: link_attr_producer in + generate_catcher ~attr_producer make_link + +let link_of_xml' = + let attr_producer = ("href", (fun ~xmlbase a -> `HREF a)) + :: link_attr_producer in generate_catcher ~attr_producer (fun ~pos x -> `Link x) type logo = Uri.t -type logo' = [ `URI of string ] -let make_logo ~pos (l : [< logo'] list) = +let make_logo ~pos (l : Uri.t list) = (* (atomUri) *) - let uri = match find (fun (`URI _) -> true) l with - | Some (`URI u) -> (Uri.of_string u) - | _ -> raise (Error.Error (pos, + let uri = match l with + | u :: _ -> u + | [] -> raise (Error.Error (pos, "The content of MUST be \ a non-empty string")) in @@ -417,9 +450,13 @@ let make_logo ~pos (l : [< logo'] list) = (atomUri) } *) -let logo_of_xml, logo_of_xml' = - let leaf_producer pos data = `URI data in - generate_catcher ~leaf_producer make_logo, +let logo_of_xml = + let leaf_producer ~xmlbase pos data = + XML.resolve ~xmlbase (Uri.of_string data) in + generate_catcher ~leaf_producer make_logo + +let logo_of_xml' = + let leaf_producer ~xmlbase pos data = `URI data in generate_catcher ~leaf_producer (fun ~pos x -> `Logo x) type published = Date.t @@ -437,32 +474,35 @@ let make_published ~pos (l : [< published'] list) = (* atomPublished = element atom:published { atomDateConstruct } *) let published_of_xml, published_of_xml' = - let leaf_producer pos data = `Date data in + let leaf_producer ~xmlbase pos data = `Date data in generate_catcher ~leaf_producer make_published, generate_catcher ~leaf_producer (fun ~pos x -> `Published x) type rights = text_construct -let rights_of_xml a = `Rights(text_construct_of_xml a) +let rights_of_xml ~xmlbase a = `Rights(text_construct_of_xml a) (* atomRights = element atom:rights { atomTextConstruct } *) -let rights_of_xml' ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = +let rights_of_xml' ~xmlbase + ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = `Rights(data) type title = text_construct -let title_of_xml a = `Title(text_construct_of_xml a) +let title_of_xml ~xmlbase a = `Title(text_construct_of_xml a) (* atomTitle = element atom:title { atomTextConstruct } *) -let title_of_xml' ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = +let title_of_xml' ~xmlbase + ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = `Title data type subtitle = text_construct -let subtitle_of_xml a = `Subtitle(text_construct_of_xml a) +let subtitle_of_xml ~xmlbase a = `Subtitle(text_construct_of_xml a) (* atomSubtitle = element atom:subtitle { atomTextConstruct } *) -let subtitle_of_xml' ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = +let subtitle_of_xml' ~xmlbase + ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = `Subtitle data type updated = Date.t @@ -480,7 +520,7 @@ let make_updated ~pos (l : [< updated'] list) = (* atomUpdated = element atom:updated { atomDateConstruct } *) let updated_of_xml, updated_of_xml' = - let leaf_producer pos data = `Date data in + let leaf_producer ~xmlbase pos data = `Date data in generate_catcher ~leaf_producer make_updated, generate_catcher ~leaf_producer (fun ~pos x -> `Updated x) @@ -524,20 +564,10 @@ type source' = [ | `Updated of updated ] -let make_source ~entry_authors ~pos (l : [< source'] list) = +let make_source ~pos (l : [< source'] list) = (* atomAuthor* *) let authors = - List.fold_left - (fun acc -> function `Author x -> x :: acc | _ -> acc) [] l in - let authors = match authors, entry_authors with - | _ :: _, _ -> authors - | [], _ :: _ -> entry_authors - | [], [] -> - raise (Error.Error (pos, - " elements MUST contains one or more \ - elements")) - (* XXX: no see this rule in RFC *) - in + List.fold_left (fun acc -> function `Author x -> x :: acc | _ -> acc) [] l in (* atomCategory* *) let categories = List.fold_left @@ -597,18 +627,18 @@ let make_source ~entry_authors ~pos (l : [< source'] list) = | Some (`Updated d) -> Some d | _ -> None in - ({ authors; - categories; - contributors; - generator; - icon; - id; - links; - logo; - rights; - subtitle; - title; - updated; } : source) + `Source ({ authors; + categories; + contributors; + generator; + icon; + id; + links; + logo; + rights; + subtitle; + title; + updated; } : source) (* atomSource = element atom:source { @@ -643,11 +673,10 @@ let source_of_xml = ("title", title_of_xml); ("updated", updated_of_xml); ] in - fun ~entry_authors -> generate_catcher ~namespaces ~data_producer - (make_source ~entry_authors) + make_source let source_of_xml' = let data_producer = [ @@ -715,7 +744,8 @@ type content' = [ | atomInlineOtherContent | atomOutOfLineContent *) -let content_of_xml ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = +let content_of_xml ~xmlbase + ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = (* MIME ::= attribute type { "text" | "html" }? | attribute type { "xhtml" } | attribute type { atomMediaType }? *) @@ -726,7 +756,7 @@ let content_of_xml ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = let mime = match find (fun a -> attr_is a "type") attr with | Some(_, ty) -> Some ty | None -> None in - `Content(Src(mime, Uri.of_string src)) + `Content(Src(mime, XML.resolve ~xmlbase (Uri.of_string src))) | None -> (* (text)* * | xhtmlDiv @@ -738,7 +768,8 @@ let content_of_xml ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = | Some (_, "xhtml") -> Xhtml(get_xml_content data data) | Some (_, mime) -> Mime(mime, get_leaf data)) -let content_of_xml' ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = +let content_of_xml' ~xmlbase + ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = let l = match find (fun a -> attr_is a "src") attr with | Some(_, src) -> [`SRC src] | None -> [] in @@ -751,9 +782,9 @@ let content_of_xml' ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = type summary = text_construct (* atomSummary = element atom:summary { atomTextConstruct } *) -let summary_of_xml a = `Summary(text_construct_of_xml a) +let summary_of_xml ~xmlbase a = `Summary(text_construct_of_xml a) -let summary_of_xml' ((pos, (tag, attr), data) : Xmlm.pos * Xmlm.tag * t list) = +let summary_of_xml' ~xmlbase ((_, (_, _), data): Xmlm.pos * Xmlm.tag * t list) = `Summary data type entry = @@ -872,38 +903,30 @@ type feed' = [ ] -let make_entry ~(feed_authors: author list) ~pos l = +let make_entry ~pos l = let authors = List.fold_left (fun acc -> function `Author x -> x :: acc | _ -> acc) [] l in - let authors = match authors with - (* default author is feed/author, see RFC 4287 § 4.1.2 *) - | [] -> feed_authors - | _ -> authors in (* atomSource? (pass the authors known so far) *) let sources = List.fold_left (fun acc -> function `Source x -> x :: acc | _ -> acc) [] l in let source = match sources with | [] -> None - | [s] -> Some(source_of_xml ~entry_authors:authors s) + | [s] -> Some s | _ -> (* RFC 4287 § 4.1.2 *) let msg = " elements MUST NOT contain more than one \ element." in raise(Error.Error(pos, msg)) in let authors = match authors, source with | a0 :: a, _ -> a0, a - | [], Some s -> + | [], Some(s: source) -> (match s.authors with | a0 :: a -> a0, a | [] -> let msg = " does not contain an and its \ neither does" in raise (Error.Error (pos, msg))) - | [], None -> - raise (Error.Error (pos, - " elements MUST contains one or more \ - elements or elements MUST \ - contains one or more elements")) + | [], None -> dummy_author, [] (* unacceptable value *) (* atomCategory* *) in let categories = List.fold_left (fun acc -> function `Category x -> x :: acc | _ -> acc) [] l @@ -952,18 +975,18 @@ let make_entry ~(feed_authors: author list) ~pos l = " elements MUST contains exactly one \ elements")) in - ({ authors; - categories; - content; - contributors; - id; - links = uniq_link_alternate ~pos links; - published; - rights; - source; - summary; - title; - updated; } : entry) + `Entry (pos, ({ authors; + categories; + content; + contributors; + id; + links = uniq_link_alternate ~pos links; + published; + rights; + source; + summary; + title; + updated; } : entry)) (* atomEntry = element atom:entry { @@ -992,17 +1015,16 @@ let entry_of_xml = ("link", link_of_xml); ("published", published_of_xml); ("rights", rights_of_xml); - ("source", (fun a -> `Source a)); + ("source", source_of_xml); ("content", content_of_xml); ("summary", summary_of_xml); ("title", title_of_xml); ("updated", updated_of_xml); ] in - fun ~feed_authors -> generate_catcher ~namespaces ~data_producer - (make_entry ~feed_authors) + make_entry let entry_of_xml' = let data_producer = [ @@ -1107,11 +1129,21 @@ let make_feed ~pos (l : _ list) = elements")) in (* atomEntry* *) + let fix_author pos (e: entry) = + match e.authors with + | (a, []) when a.name = "" -> + (match authors with + | a0 :: a -> { e with authors = (a0, a) } + | [] -> + let msg = " elements MUST contains at least an \ + element or element MUST \ + contains one or more elements" in + raise (Error.Error (pos, msg))) + | _ -> e in let entries = List.fold_left - (fun acc -> function `Entry x -> - entry_of_xml ~feed_authors:authors x :: acc - | _ -> acc) [] l in + (fun acc -> function `Entry(pos, e) -> fix_author pos e :: acc + | _ -> acc) [] l in ({ authors; categories; contributors; @@ -1145,6 +1177,7 @@ let make_feed ~pos (l : _ list) = atomEntry* } *) + let feed_of_xml = let data_producer = [ ("author", author_of_xml); @@ -1159,7 +1192,7 @@ let feed_of_xml = ("subtitle", subtitle_of_xml); ("title", title_of_xml); ("updated", updated_of_xml); - ("entry", (fun a -> `Entry a)); + ("entry", entry_of_xml); ] in generate_catcher ~namespaces ~data_producer make_feed @@ -1181,19 +1214,21 @@ let feed_of_xml' = ] in generate_catcher ~namespaces ~data_producer (fun ~pos x -> x) -let parse input = + + +let parse ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, datas) when tag_is tag "feed" -> - feed_of_xml (pos, tag, datas) + feed_of_xml ~xmlbase (pos, tag, datas) | _ -> raise (Error.Error ((0, 0), "document MUST contains exactly one \ element")) (* FIXME: the spec says that an entry can appear as the top-level element *) -let unsafe input = +let unsafe ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, datas) when tag_is tag "feed" -> - `Feed (feed_of_xml' (pos, tag, datas)) + `Feed (feed_of_xml' ~xmlbase (pos, tag, datas)) | _ -> `Feed [] diff --git a/lib/syndic_atom.mli b/lib/syndic_atom.mli index 589627d..c93d86d 100644 --- a/lib/syndic_atom.mli +++ b/lib/syndic_atom.mli @@ -475,13 +475,19 @@ val feed : ?subtitle:subtitle -> id:id -> title:title -> updated:updated -> entry list -> feed -val parse : Xmlm.input -> feed +val parse : ?xmlbase: Uri.t -> Xmlm.input -> feed (** [parse xml] returns the feed corresponding to [xml]. Beware that - [xml] is mutable, so when the parsing fails, one have to create a - new copy of [xml] to use it with another function. + [xml] is mutable, so when the parsing fails, one has to create a + new copy of [xml] to use it with another function. If you + retrieve [xml] from a URL, you should use that URL as [~xmlbase]. Raise [Error.Expected], [Expected_Data] or [Error.Duplicate_Link] - if [xml] is not a valid Atom document. *) + if [xml] is not a valid Atom document. + + @param xmlbase default xml:base to resolve relative URLs (of + course xml:base attributes in the XML Atom document take + precedence over this). + See {{:http://www.w3.org/TR/xmlbase/}XML Base}. *) val to_xml : feed -> Syndic_xml.t (** [to_xml f] converts the feed [f] to an XML tree. *) @@ -502,10 +508,15 @@ val aggregate : ?id:id -> ?updated:updated -> ?subtitle:subtitle -> (**/**) -type person = [ `Email of string | `Name of string | `URI of string ] list +type uri = Uri.t option * string +(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not + [None], gives the base URI against which [uri] must be resolved if + it is relative. *) + +type person = [ `Email of string | `Name of string | `URI of uri ] list (** Analysis without verification, enjoy ! *) -val unsafe : Xmlm.input -> +val unsafe : ?xmlbase: Uri.t -> Xmlm.input -> [> `Feed of [> `Author of person | `Category of diff --git a/lib/syndic_common.ml b/lib/syndic_common.ml index 7daa6cb..5c21200 100644 --- a/lib/syndic_common.ml +++ b/lib/syndic_common.ml @@ -5,6 +5,8 @@ module XML = struct exception Ignore_namespace + type node = Xmlm.pos * Xmlm.tag * t list + let generate_catcher ?(namespaces=[""]) ?(attr_producer=[]) @@ -19,33 +21,39 @@ module XML = struct try Some (List.assoc name map) with _ -> None in - let rec catch_attr acc pos = function + let rec catch_attr ~xmlbase acc pos = function + | (("http://www.w3.org/XML/1998/namespace", "base"), new_base) :: r -> + let xmlbase = base ~parent:xmlbase new_base in + catch_attr ~xmlbase acc pos r | attr :: r -> begin match get_producer (get_attr_name attr) attr_producer with | Some f when in_namespaces attr -> - catch_attr ((f (get_attr_value attr)) :: acc) pos r - | _ -> catch_attr acc pos r end - | [] -> acc + let acc = (f ~xmlbase (get_attr_value attr)) :: acc in + catch_attr ~xmlbase acc pos r + | _ -> catch_attr ~xmlbase acc pos r end + | [] -> xmlbase, acc in - let rec catch_datas acc = function + let rec catch_datas ~xmlbase acc = function | Node (pos, tag, datas) :: r -> begin match get_producer (get_tag_name tag) data_producer with | Some f when in_namespaces tag -> - catch_datas ((f (pos, tag, datas)) :: acc) r - | _ -> catch_datas acc r end + let acc = (f ~xmlbase (pos, tag, datas)) :: acc in + catch_datas ~xmlbase acc r + | _ -> catch_datas ~xmlbase acc r end | Data (pos, str) :: r -> begin match leaf_producer with - | Some f -> catch_datas ((f pos str) :: acc) r - | None -> catch_datas acc r end + | Some f -> catch_datas ~xmlbase ((f ~xmlbase pos str) :: acc) r + | None -> catch_datas ~xmlbase acc r end | [] -> acc in - let generate (pos, tag, datas) = - maker ~pos (catch_attr (catch_datas [] datas) pos (get_attrs tag)) + let generate ~xmlbase ((pos, tag, datas): node) = + let xmlbase, acc = catch_attr ~xmlbase [] pos (get_attrs tag) in + maker ~pos (catch_datas ~xmlbase acc datas) in generate let dummy_of_xml ~ctor = - let leaf_producer pos data = ctor data in - let head ~pos = function [] -> ctor "" + let leaf_producer ~xmlbase pos data = ctor ~xmlbase data in + let head ~pos = function [] -> ctor ~xmlbase:None "" | x :: r -> x in generate_catcher ~leaf_producer head end diff --git a/lib/syndic_common.mli b/lib/syndic_common.mli index 70e16d3..bd551bc 100644 --- a/lib/syndic_common.mli +++ b/lib/syndic_common.mli @@ -1,15 +1,18 @@ module XML : sig type t = Syndic_xml.t + type node = Xmlm.pos * Xmlm.tag * t list + val generate_catcher : ?namespaces:string list -> - ?attr_producer:(string * (string -> 'a)) list -> - ?data_producer:(string * (Xmlm.pos * Xmlm.tag * t list -> 'a)) list -> - ?leaf_producer:(Xmlm.pos -> string -> 'a) -> - (pos: Xmlm.pos -> 'a list -> 'b) -> Xmlm.pos * Xmlm.tag * t list -> 'b - - val dummy_of_xml : ctor:(string -> 'a) -> - Xmlm.pos * Xmlm.tag * t list -> 'a + ?attr_producer: (string * (xmlbase: Uri.t option -> string -> 'a)) list -> + ?data_producer: (string * (xmlbase: Uri.t option -> node -> 'a)) list -> + ?leaf_producer: (xmlbase: Uri.t option -> Xmlm.pos -> string -> 'a) -> + (pos: Xmlm.pos -> 'a list -> 'b) -> + xmlbase: Uri.t option -> node -> 'b + + val dummy_of_xml : ctor:(xmlbase: Uri.t option -> string -> 'a) -> + xmlbase: Uri.t option -> node -> 'a end module Util : sig diff --git a/lib/syndic_opml1.ml b/lib/syndic_opml1.ml index 3f72598..a19f2f0 100644 --- a/lib/syndic_opml1.ml +++ b/lib/syndic_opml1.ml @@ -32,13 +32,15 @@ let string_of_xml name (pos, _, datas) = try get_leaf datas with Not_found -> raise (Error.Error (pos, name ^ " must not be empty")) -let title_of_xml a = `Title(string_of_xml "" a) +let title_of_xml ~xmlbase a = `Title(string_of_xml "<title>" a) -let owner_name_of_xml a = `OwnerName(string_of_xml "<ownerName>" a) +let owner_name_of_xml ~xmlbase a = + `OwnerName(string_of_xml "<ownerName>" a) -let owner_email_of_xml a = `OwnerEmail(string_of_xml "<ownerEmail>" a) +let owner_email_of_xml ~xmlbase a = + `OwnerEmail(string_of_xml "<ownerEmail>" a) -let expansion_state_of_xml (pos, _, datas) = +let expansion_state_of_xml ~xmlbase (pos, _, datas) = let explode s = let rec aux acc i = if i = String.length s then acc @@ -73,16 +75,20 @@ let int_of_xml name (pos, _, datas) = with Not_found -> raise (Error.Error (pos, name ^ " must not be empty")) | Failure _ -> raise (Error.Error (pos, name ^ " must be an integer")) -let vert_scroll_state_of_xml a = +let vert_scroll_state_of_xml ~xmlbase a = `VertScrollState(int_of_xml "<vertScrollState>" a) -let window_top_of_xml a = `WindowTop(int_of_xml "<windowTop>" a) +let window_top_of_xml ~xmlbase a = + `WindowTop(int_of_xml "<windowTop>" a) -let window_left_of_xml a = `WindowLeft(int_of_xml "<windowLeft>" a) +let window_left_of_xml ~xmlbase a = + `WindowLeft(int_of_xml "<windowLeft>" a) -let window_bottom_of_xml a = `WindowBottom(int_of_xml "<windowBotton>" a) +let window_bottom_of_xml ~xmlbase a = + `WindowBottom(int_of_xml "<windowBotton>" a) -let window_right_of_xml a = `WindowRight(int_of_xml "<windowRight>" a) +let window_right_of_xml ~xmlbase a = + `WindowRight(int_of_xml "<windowRight>" a) type head' = [ | `Title of string @@ -178,8 +184,11 @@ let date_of_xml name (pos, _, datas) = try Date.of_rfc822 d with _ -> raise (Error.Error (pos, sprintf "Date %S incorrect" d)) -let date_created_of_xml a = `DateCreated (date_of_xml "<dateCreated>" a) -let date_modified_of_xml a = `DateModified (date_of_xml "<dateModified>" a) +let date_created_of_xml ~xmlbase a = + `DateCreated (date_of_xml "<dateCreated>" a) + +let date_modified_of_xml ~xmlbase a = + `DateModified (date_of_xml "<dateModified>" a) let head_of_xml = let data_producer = [ @@ -201,17 +210,17 @@ let head_of_xml = let head_of_xml' = let data_producer = [ - "title", dummy_of_xml ~ctor:(fun a -> `Title a); - "dateCreated", dummy_of_xml ~ctor:(fun a -> `DateCreated a); - "dateModified", dummy_of_xml ~ctor:(fun a -> `DateModified a); - "ownerName", dummy_of_xml ~ctor:(fun a -> `OwnerName a); - "ownerEmail", dummy_of_xml ~ctor:(fun a -> `OwnerEmail a); - "expansionState", dummy_of_xml ~ctor:(fun a -> `ExpansionSate a); - "vertScrollState", dummy_of_xml ~ctor:(fun a -> `VertScrollState a); - "windowTop", dummy_of_xml ~ctor:(fun a -> `WindowTop a); - "windowLeft", dummy_of_xml ~ctor:(fun a -> `WindowLeft a); - "windowBottom", dummy_of_xml ~ctor:(fun a -> `WindowBottom a); - "windowRight", dummy_of_xml ~ctor:(fun a -> `WindowRight a) + "title", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Title a); + "dateCreated", dummy_of_xml ~ctor:(fun ~xmlbase a -> `DateCreated a); + "dateModified", dummy_of_xml ~ctor:(fun ~xmlbase a -> `DateModified a); + "ownerName", dummy_of_xml ~ctor:(fun ~xmlbase a -> `OwnerName a); + "ownerEmail", dummy_of_xml ~ctor:(fun ~xmlbase a -> `OwnerEmail a); + "expansionState", dummy_of_xml ~ctor:(fun ~xmlbase a -> `ExpansionSate a); + "vertScrollState", dummy_of_xml ~ctor:(fun ~xmlbase a -> `VertScrollState a); + "windowTop", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowTop a); + "windowLeft", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowLeft a); + "windowBottom", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowBottom a); + "windowRight", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WindowRight a) ] in generate_catcher ~data_producer @@ -234,7 +243,8 @@ let outline ?typ ?(is_comment=false) ?(is_breakpoint=false) ?xml_url ?html_url { text; typ; is_comment; is_breakpoint; xml_url; html_url; attrs; outlines } -let rec outline_of_node (pos, ((_outline, attributes): Xmlm.tag), datas) = +let rec outline_of_node ~xmlbase ((pos, ((_, attributes)), datas): node) = + let new_xmlbase = ref xmlbase in let text = ref "" and typ = ref None and is_comment = ref false @@ -243,30 +253,33 @@ let rec outline_of_node (pos, ((_outline, attributes): Xmlm.tag), datas) = and html_url = ref None and attrs = ref [] and outlines = ref [] in - let process_attrs ((_, name), v) = match name with - | "text" -> text := v - | "type" -> typ := Some v - | "isComment" -> + let process_attrs (name, v) = match name with + | ("http://www.w3.org/XML/1998/namespace", "base") -> + new_xmlbase := XML.base ~parent:xmlbase v + | (_, "text") -> text := v + | (_, "type") -> typ := Some v + | (_, "isComment") -> (try is_comment := bool_of_string v with _ -> raise(Error.Error (pos, "<isComment> must have true or \ false value."))) - | "isBreakpoint" -> + | (_, "isBreakpoint") -> (try is_breakpoint := bool_of_string v with _ -> raise (Error.Error (pos, "<isBreakpoint> must have true \ or false value."))) - | "xmlUrl" -> - (try xml_url := Some(Uri.of_string v) + | (_, "xmlUrl") -> + (try xml_url := Some( XML.resolve ~xmlbase (Uri.of_string v)) with _ -> raise(Error.Error(pos, "<xmlUrl> content must be an URL"))) - | "htmlUrl" -> - (try html_url := Some(Uri.of_string v) + | (_, "htmlUrl") -> + (try html_url := Some( XML.resolve ~xmlbase (Uri.of_string v)) with _ -> raise(Error.Error(pos, "<htmlUrl> content must be an URL"))) - | _ -> + | (_, name) -> attrs := (name, v) :: !attrs in List.iter process_attrs attributes; let process_outlines = function | XML.Node (p, (((ns, name), _) as t), d) -> if ns = "" && name = "outline" then - outlines := outline_of_node (p, t, d) :: !outlines + outlines := outline_of_node ~xmlbase:!new_xmlbase (p, t, d) + :: !outlines | XML.Data _ -> () in List.iter process_outlines datas; { text = !text; @@ -279,27 +292,32 @@ let rec outline_of_node (pos, ((_outline, attributes): Xmlm.tag), datas) = outlines = !outlines; } -let outline_of_xml a = `Outline(outline_of_node a) - -let rec outline_of_node' (pos, ((_outline, attributes): Xmlm.tag), datas) = - let el_of_attrs ((_, name), v) = match name with - | "text" -> `Text v - | "type" -> `Type v - | "isComment" -> `IsComment v - | "isBreakpoint" -> `IsBreakpoint v - | "xmlUrl" -> `XML_url v - | "htmlUrl" -> `HTML_url v - | _ -> `Attr (name, v) in - let el = ref (List.map el_of_attrs attributes) in +let outline_of_xml ~xmlbase a = `Outline(outline_of_node ~xmlbase a) + +let rec outline_of_node' ~xmlbase ((pos, ((_, attributes)), datas): node) = + let new_xmlbase = ref xmlbase in + let el = ref [] in + let el_of_attrs (name, v) = match name with + | ("http://www.w3.org/XML/1998/namespace", "base") -> + new_xmlbase := XML.base ~parent:xmlbase v + | (_, "text") -> el := `Text v :: !el + | (_, "type") -> el := `Type v :: !el + | (_, "isComment") -> el := `IsComment v :: !el + | (_, "isBreakpoint") -> el := `IsBreakpoint v :: !el + | (_, "xmlUrl") -> el := `XML_url(xmlbase, v) :: !el + | (_, "htmlUrl") -> el := `HTML_url(xmlbase, v) :: !el + | (_, name) -> el := `Attr (name, v) :: !el in + List.iter el_of_attrs attributes; let process_outlines = function | XML.Node (p, (((ns, name), _) as t), d) -> if ns = "" && name = "outline" then - el := `Outline(outline_of_node' (p, t, d)) :: !el + el := `Outline(outline_of_node' ~xmlbase:!new_xmlbase (p, t, d)) + :: !el | XML.Data _ -> () in List.iter process_outlines datas; !el -let outline_of_xml' a = `Outline(outline_of_node' a) +let outline_of_xml' ~xmlbase a = `Outline(outline_of_node' ~xmlbase a) type body = outline list @@ -358,7 +376,7 @@ let make_opml ~pos (l : [< opml'] list) = let opml_of_xml = let attr_producer = [ - "version", (fun a -> `Version a) + "version", (fun ~xmlbase a -> `Version a) ] in let data_producer = [ "head", head_of_xml; @@ -371,7 +389,7 @@ let opml_of_xml = let opml_of_xml' = let attr_producer = [ - "version", (fun a -> `Version a) + "version", (fun ~xmlbase a -> `Version a) ] in let data_producer = [ "head", head_of_xml'; @@ -385,14 +403,14 @@ let opml_of_xml' = let find_opml l = find (function XML.Node (_, t, _) -> tag_is t "opml" | _ -> false) l -let parse input = +let parse ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, data) -> if tag_is tag "opml" then - opml_of_xml (pos, tag, data) + opml_of_xml ~xmlbase (pos, tag, data) else begin match find_opml data with - | Some (XML.Node (p, t, d)) -> opml_of_xml (p, t, d) + | Some (XML.Node (p, t, d)) -> opml_of_xml ~xmlbase (p, t, d) | _ -> raise (Error.Error ((0, 0), "document MUST contains exactly one <opml> \ element")) @@ -401,14 +419,16 @@ let parse input = "document MUST contains exactly one <opml> \ element")) -let unsafe input = +type uri = Uri.t option * string + +let unsafe ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, data) -> if tag_is tag "opml" then - `Opml (opml_of_xml' (pos, tag, data)) + `Opml (opml_of_xml' ~xmlbase (pos, tag, data)) else begin match find_opml data with - | Some (XML.Node (p, t, d)) -> `Opml (opml_of_xml' (p, t, d)) + | Some (XML.Node (p, t, d)) -> `Opml (opml_of_xml' ~xmlbase (p, t, d)) | _ -> `Opml [] end | _ -> `Opml [] diff --git a/lib/syndic_opml1.mli b/lib/syndic_opml1.mli index c8b155e..6529d95 100644 --- a/lib/syndic_opml1.mli +++ b/lib/syndic_opml1.mli @@ -102,7 +102,7 @@ type opml = body : body; } -val parse : Xmlm.input -> opml +val parse : ?xmlbase: Uri.t -> Xmlm.input -> opml (** [parse i] takes [i] and returns an opml record which is the OCaml representation of the OPML document. *) @@ -116,7 +116,12 @@ val output : opml -> Xmlm.dest -> unit (**/**) -val unsafe : Xmlm.input -> +type uri = Uri.t option * string +(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not + [None], gives the base URI against which [uri] must be resolved if + it is relative. *) + +val unsafe : ?xmlbase: Uri.t -> Xmlm.input -> [> `Opml of [> `Body of [> `Outline of @@ -125,8 +130,8 @@ val unsafe : Xmlm.input -> | `IsBreakpoint of string | `IsComment of string | `Outline of 'a - | `XML_url of string - | `HTML_url of string + | `XML_url of uri + | `HTML_url of uri | `Attr of string * string ] list as 'a ] list diff --git a/lib/syndic_rss1.ml b/lib/syndic_rss1.ml index 593d2e2..f6dc92b 100644 --- a/lib/syndic_rss1.ml +++ b/lib/syndic_rss1.ml @@ -19,7 +19,7 @@ let make_title ~pos (l : string list) = in `Title title let title_of_xml, title_of_xml' = - let leaf_producer pos data = data in + let leaf_producer ~xmlbase pos data = data in generate_catcher ~namespaces ~leaf_producer make_title, generate_catcher ~namespaces ~leaf_producer (fun ~pos x -> `Title x) @@ -35,7 +35,7 @@ let make_name ~pos (l : string list) = in `Name name let name_of_xml, name_of_xml' = - let leaf_producer pos data = data in + let leaf_producer ~xmlbase pos data = data in generate_catcher ~namespaces ~leaf_producer make_name, generate_catcher ~namespaces ~leaf_producer (fun ~pos x -> `Name x) @@ -51,16 +51,16 @@ let make_description ~pos (l : string list) = in `Description description let description_of_xml, description_of_xml' = - let leaf_producer pos data = data in + let leaf_producer ~xmlbase pos data = data in generate_catcher ~namespaces ~leaf_producer make_description, generate_catcher ~namespaces ~leaf_producer (fun ~pos x -> `Description x) type channel_image = Uri.t -type channel_image' = [ `URI of string ] +type channel_image' = [ `URI of Uri.t option * string ] let make_channel_image ~pos (l : [< channel_image' ] list) = let image = match find (function `URI _ -> true) l with - | Some (`URI u) -> (Uri.of_string u) + | Some (`URI(xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) | _ -> raise (Error.Error (pos, "The content of <image> MUST be \ @@ -69,17 +69,17 @@ let make_channel_image ~pos (l : [< channel_image' ] list) = let channel_image_of_xml, channel_image_of_xml' = let attr_producer = [ - ("resource", (fun a -> `URI a)); + ("resource", (fun ~xmlbase a -> `URI(xmlbase, a))); ] in generate_catcher ~namespaces ~attr_producer make_channel_image, generate_catcher ~namespaces ~attr_producer (fun ~pos x -> `Image x) type link = Uri.t -type link' = [ `URI of string ] +type link' = [ `URI of Uri.t option * string ] let make_link ~pos (l : [< link' ] list) = let link = match find (function `URI _ -> true) l with - | Some (`URI u) -> (Uri.of_string u) + | Some (`URI(xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) | _ -> raise (Error.Error (pos, "The content of <link> MUST be \ @@ -87,16 +87,16 @@ let make_link ~pos (l : [< link' ] list) = in `Link link let link_of_xml, link_of_xml' = - let leaf_producer pos data = `URI data in + let leaf_producer ~xmlbase pos data = `URI(xmlbase, data) in generate_catcher ~namespaces ~leaf_producer make_link, generate_catcher ~namespaces ~leaf_producer (fun ~pos x -> `Link x) type url = Uri.t -type url' = [ `URI of string ] +type url' = [ `URI of Uri.t option * string ] let make_url ~pos (l : [< url' ] list) = let url = match find (function `URI _ -> true) l with - | Some (`URI u) -> Uri.of_string u + | Some (`URI(xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) | _ -> raise (Error.Error (pos, "The content of <url> MUST be \ @@ -104,16 +104,16 @@ let make_url ~pos (l : [< url' ] list) = in `URL url let url_of_xml, url_of_xml' = - let leaf_producer pos data = `URI data in + let leaf_producer ~xmlbase pos data = `URI(xmlbase, data) in generate_catcher ~namespaces ~leaf_producer make_url, generate_catcher ~namespaces ~leaf_producer (fun ~pos x -> `URL x) type li = Uri.t -type li' = [ `URI of string ] +type li' = [ `URI of Uri.t option * string ] let make_li ~pos (l : [< li' ] list) = let url = match find (function `URI _ -> true) l with - | Some (`URI u) -> (Uri.of_string u) + | Some (`URI(xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) | _ -> raise (Error.Error (pos, "Li elements MUST have a 'resource' \ @@ -123,7 +123,7 @@ let make_li ~pos (l : [< li' ] list) = let li_of_xml, li_of_xml' = let attr_producer = [ - ("resource", (fun a -> `URI a)); + ("resource", (fun ~xmlbase a -> `URI(xmlbase, a))); ] in generate_catcher ~namespaces @@ -184,11 +184,11 @@ let items_of_xml' = ~data_producer (fun ~pos x -> `Items x) type channel_textinput = Uri.t -type channel_textinput' = [ `URI of string ] +type channel_textinput' = [ `URI of Uri.t option * string ] let make_textinput ~pos (l : [< channel_textinput' ] list) = let url = match find (function `URI _ -> true) l with - | Some (`URI u) -> (Uri.of_string u) + | Some (`URI(xmlbase, u)) -> XML.resolve ~xmlbase (Uri.of_string u) | _ -> raise (Error.Error (pos, "Textinput elements MUST have a 'resource' \ @@ -197,7 +197,7 @@ let make_textinput ~pos (l : [< channel_textinput' ] list) = let channel_textinput_of_xml, channel_textinput_of_xml' = let attr_producer = [ - ("resource", (fun a -> `URI a)); + ("resource", (fun ~xmlbase a -> `URI(xmlbase, a))); ] in generate_catcher ~namespaces ~attr_producer make_textinput, generate_catcher ~namespaces ~attr_producer (fun ~pos x -> `TextInput x) @@ -220,12 +220,12 @@ type channel' = [ | `Image of channel_image | `Items of items | `TextInput of channel_textinput - | `About of string + | `About of Uri.t ] let make_channel ~pos (l : [< channel' ] list) = let about = match find (function `About _ -> true | _ -> false) l with - | Some (`About u) -> (Uri.of_string u) + | Some (`About u) -> u | _ -> raise (Error.Error (pos, "Channel elements MUST have a 'about' \ @@ -262,6 +262,12 @@ let make_channel ~pos (l : [< channel' ] list) = `Channel({ about; title; link; description; image; items; textinput } : channel) +let about_of_xml ~xmlbase a = + `About(XML.resolve ~xmlbase (Uri.of_string a)) + +let about_of_xml' ~xmlbase a = `About(xmlbase, a) + + let channel_of_xml = let data_producer = [ ("title", title_of_xml); @@ -272,7 +278,7 @@ let channel_of_xml = ("textinput", channel_textinput_of_xml); ] in let attr_producer = [ - ("about", (fun a -> `About a)); + ("about", about_of_xml); ] in generate_catcher ~namespaces @@ -290,7 +296,7 @@ let channel_of_xml' = ("textinput", channel_textinput_of_xml'); ] in let attr_producer = [ - ("about", (fun a -> `About a)); + ("about", about_of_xml'); ] in generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos x -> `Channel x) @@ -307,7 +313,7 @@ type image' = [ | `Title of title | `Link of link | `URL of url - | `About of string + | `About of Uri.t ] let make_image ~pos (l : [< image' ] list) = @@ -327,7 +333,7 @@ let make_image ~pos (l : [< image' ] list) = "<image> elements MUST contains exactly one \ <url> element")) in let about = match find (function `About _ -> true | _ -> false) l with - | Some (`About a) -> (Uri.of_string a) + | Some (`About a) -> a | _ -> raise (Error.Error (pos, "Image elements MUST have a 'about' \ @@ -341,7 +347,7 @@ let image_of_xml = ("url", url_of_xml); ] in let attr_producer = [ - ("about", (fun a -> `About a)); + ("about", about_of_xml); ] in generate_catcher ~namespaces @@ -356,7 +362,7 @@ let image_of_xml' = ("url", url_of_xml'); ] in let attr_producer = [ - ("about", (fun a -> `About a)); + ("about", about_of_xml'); ] in generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos x -> `Image x) @@ -373,7 +379,7 @@ type item' = [ | `Title of title | `Link of link | `Description of description - | `About of string + | `About of Uri.t ] let make_item ~pos (l : [< item' ] list) = @@ -392,7 +398,7 @@ let make_item ~pos (l : [< item' ] list) = | Some (`Description d) -> Some d | _ -> None in let about = match find (function `About _ -> true | _ -> false) l with - | Some (`About u) -> (Uri.of_string u) + | Some (`About u) -> u | _ -> raise (Error.Error (pos, "Item elements MUST have a 'about' \ @@ -406,7 +412,7 @@ let item_of_xml = ("description", description_of_xml); ] in let attr_producer = [ - ("about", (fun a -> `About a)); + ("about", about_of_xml); ] in generate_catcher ~namespaces @@ -421,7 +427,7 @@ let item_of_xml' = ("description", description_of_xml'); ] in let attr_producer = [ - ("about", (fun a -> `About a)); + ("about", about_of_xml'); ] in generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos x -> `Item x) @@ -436,7 +442,7 @@ type textinput = } type textinput' = [ - | `About of string + | `About of Uri.t | `Title of title | `Description of description | `Name of name @@ -466,7 +472,7 @@ let make_textinput ~pos (l : [< textinput' ] list) = "<textinput> elements MUST contains exactly one \ <link> element")) in let about = match find (function `About _ -> true | _ -> false) l with - | Some (`About u) -> (Uri.of_string u) + | Some (`About u) -> u | _ -> raise (Error.Error (pos, "Textinput elements MUST have a 'about' \ @@ -481,7 +487,7 @@ let textinput_of_xml = ("link", link_of_xml); ] in let attr_producer = [ - ("about", (fun a -> `About a)) + ("about", about_of_xml) ] in generate_catcher ~namespaces @@ -497,7 +503,7 @@ let textinput_of_xml' = ("link", link_of_xml'); ] in let attr_producer = [ - ("about", (fun a -> `About a)) + ("about", about_of_xml') ] in generate_catcher ~namespaces ~attr_producer ~data_producer (fun ~pos x -> `TextInput x) @@ -555,16 +561,18 @@ let rdf_of_xml' = ] in generate_catcher ~namespaces ~data_producer (fun ~pos x -> x) -let parse input = +let parse ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, datas) when tag_is tag "RDF" -> - rdf_of_xml (pos, tag, datas) + rdf_of_xml ~xmlbase (pos, tag, datas) | _ -> raise (Error.Error ((0, 0), "document MUST contains exactly one \ <rdf> element")) -let unsafe input = +type uri = Uri.t option * string + +let unsafe ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, datas) when tag_is tag "RDF" -> - `RDF(rdf_of_xml' (pos, tag, datas)) + `RDF(rdf_of_xml' xmlbase (pos, tag, datas)) | _ -> `RDF [] diff --git a/lib/syndic_rss1.mli b/lib/syndic_rss1.mli index be99ef4..20a6573 100644 --- a/lib/syndic_rss1.mli +++ b/lib/syndic_rss1.mli @@ -282,45 +282,54 @@ Model: (channel, image?, item+, textinput?) ]} *) -val parse : Xmlm.input -> rdf +val parse : ?xmlbase: Uri.t -> Xmlm.input -> rdf (** [parse xml] returns the RDF corresponding to [xml]. @raise Error.raise_expectation if [xml] is not a valid RSS1 - document. *) + document. + + @param xmlbase the base URI against which relative URIs in the XML + RSS1 document are resolved. It is superseded by xml:base present + in the document (if any). *) (**/**) +type uri = Uri.t option * string +(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not + [None], gives the base URI against which [uri] must be resolved if + it is relative. *) + (** Analysis without verification, enjoy ! *) -val unsafe : Xmlm.input -> +val unsafe : ?xmlbase: Uri.t -> Xmlm.input -> [> `RDF of [> `Channel of - [> `About of string + [> `About of uri | `Description of string list - | `Image of [> `URI of string ] list + | `Image of [> `URI of uri ] list | `Items of - [> `Seq of [> `Li of [> `URI of string ] list ] list ] + [> `Seq of [> `Li of [> `URI of uri ] list ] list ] list - | `Link of [> `URI of string ] list - | `TextInput of [> `URI of string ] list + | `Link of [> `URI of uri ] list + | `TextInput of [> `URI of uri ] list | `Title of string list ] list | `Image of - [> `About of string - | `Link of [> `URI of string ] list + [> `About of uri + | `Link of [> `URI of uri ] list | `Title of string list - | `URL of [> `URI of string ] list ] + | `URL of [> `URI of uri ] list ] list | `Item of - [> `About of string + [> `About of uri | `Description of string list - | `Link of [> `URI of string ] list + | `Link of [> `URI of uri ] list | `Title of string list ] list | `TextInput of - [> `About of string + [> `About of uri | `Description of string list - | `Link of [> `URI of string ] list + | `Link of [> `URI of uri ] list | `Name of string list | `Title of string list ] list ] diff --git a/lib/syndic_rss2.ml b/lib/syndic_rss2.ml index f8716a6..1ed779f 100644 --- a/lib/syndic_rss2.ml +++ b/lib/syndic_rss2.ml @@ -62,23 +62,28 @@ let make_image ~pos (l : [< image' ] list) = in `Image ({ url; title; link; width; height; description } : image) -let image_url_of_xml (pos, tag, datas) = - try `URL(Uri.of_string (get_leaf datas)) +let url_of_xml ~xmlbase a = + `URL(XML.resolve ~xmlbase (Uri.of_string a)) + +let url_of_xml' ~xmlbase a = `URL(xmlbase, a) + +let image_url_of_xml ~xmlbase (pos, tag, datas) = + try url_of_xml ~xmlbase (get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <uri> MUST be \ a non-empty string")) -let image_title_of_xml (pos, tag, datas) = +let image_title_of_xml ~xmlbase (pos, tag, datas) = `Title(try get_leaf datas with Not_found -> "") -let image_link_of_xml (pos, tag, datas) = - try `Link(Uri.of_string (get_leaf datas)) +let image_link_of_xml ~xmlbase (pos, tag, datas) = + try `Link(XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) with Not_found -> raise (Error.Error (pos, "The content of <link> MUST be \ a non-empty string")) -let image_size_of_xml ~max (pos, tag, datas) = +let image_size_of_xml ~max ~xmlbase (pos, tag, datas) = try let size = int_of_string (get_leaf datas) in if size > max then raise (Error.Error @@ -92,10 +97,12 @@ let image_size_of_xml ~max (pos, tag, datas) = ("The content of <"^(get_tag_name tag)^"> MUST be \ an integer"))) -let image_width_of_xml a = `Width(image_size_of_xml ~max:144 a) -let image_height_of_xml a = `Height(image_size_of_xml ~max:400 a) +let image_width_of_xml ~xmlbase a = + `Width(image_size_of_xml ~max:144 ~xmlbase a) +let image_height_of_xml ~xmlbase a = + `Height(image_size_of_xml ~max:400 ~xmlbase a) -let image_description_of_xml (pos, tag, datas) = +let image_description_of_xml ~xmlbase (pos, tag, datas) = try `Description(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <description> MUST be \ @@ -114,12 +121,12 @@ let image_of_xml = let image_of_xml' = let data_producer = [ - ("url", dummy_of_xml ~ctor:(fun a -> `URL a)); - ("title", dummy_of_xml ~ctor:(fun a -> `Title a)); - ("link", dummy_of_xml ~ctor:(fun a -> `Link a)); - ("width", dummy_of_xml ~ctor:(fun a -> `Width a)); - ("height", dummy_of_xml ~ctor:(fun a -> `Height a)); - ("description", dummy_of_xml ~ctor:(fun a -> `Description a)); + ("url", dummy_of_xml ~ctor:url_of_xml'); + ("title", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Title a)); + ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link(xmlbase, a))); + ("width", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Width a)); + ("height", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Height a)); + ("description", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Description a)); ] in generate_catcher ~data_producer (fun ~pos x -> `Image x) @@ -132,7 +139,7 @@ type cloud = { } type cloud' = [ - | `Domain of string + | `Domain of Uri.t | `Port of string | `Path of string | `RegisterProcedure of string @@ -141,7 +148,7 @@ type cloud' = [ let make_cloud ~pos (l : [< cloud' ] list) = let domain = match find (function `Domain _ -> true | _ -> false) l with - | Some (`Domain u) -> (Uri.of_string u) + | Some (`Domain u) -> u | _ -> raise (Error.Error (pos, "Cloud elements MUST have a 'domain' \ @@ -178,17 +185,28 @@ let make_cloud ~pos (l : [< cloud' ] list) = in `Cloud ({ domain; port; path; registerProcedure; protocol; } : cloud) -let cloud_of_xml, cloud_of_xml' = - let attr_producer = [ - ("domain", (fun a -> `Domain a)); - ("port", (fun a -> `Port a)); - ("path", (fun a -> `Path a)); (* XXX: it's RFC compliant ? *) - ("registerProcedure", (fun a -> `RegisterProcedure a)); - ("protocol", (fun a -> `Protocol a)); - ] in - generate_catcher ~attr_producer make_cloud, +let domain_of_xml ~xmlbase a = + `Domain(XML.resolve ~xmlbase (Uri.of_string a)) + +let domain_of_xml' ~xmlbase a = + `Domain(xmlbase, a) + +let cloud_attr_producer = [ + ("port", (fun ~xmlbase a -> `Port a)); + ("path", (fun ~xmlbase a -> `Path a)); (* XXX: it's RFC compliant ? *) + ("registerProcedure", (fun ~xmlbase a -> `RegisterProcedure a)); + ("protocol", (fun ~xmlbase a -> `Protocol a)); + ] + +let cloud_of_xml = + let attr_producer = ("domain", domain_of_xml) :: cloud_attr_producer in + generate_catcher ~attr_producer make_cloud + +let cloud_of_xml' = + let attr_producer = ("domain", domain_of_xml') :: cloud_attr_producer in generate_catcher ~attr_producer (fun ~pos x -> `Cloud x) + type textinput = { title: string; @@ -236,26 +254,26 @@ let make_textinput ~pos (l : [< textinput'] list) = in `TextInput ({ title; description; name; link; } : textinput) -let textinput_title_of_xml (pos, tag, datas) = +let textinput_title_of_xml ~xmlbase (pos, tag, datas) = try `Title(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <title> MUST be \ a non-empty string")) -let textinput_description_of_xml (pos, tag, datas) = +let textinput_description_of_xml ~xmlbase (pos, tag, datas) = try `Description(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <description> MUST be \ a non-empty string")) -let textinput_name_of_xml (pos, tag, datas) = +let textinput_name_of_xml ~xmlbase (pos, tag, datas) = try `Name(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <name> MUST be \ a non-empty string")) -let textinput_link_of_xml (pos, tag, datas) = - try `Link(Uri.of_string (get_leaf datas)) +let textinput_link_of_xml ~xmlbase (pos, tag, datas) = + try `Link(XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) with Not_found -> raise (Error.Error (pos, "The content of <link> MUST be \ a non-empty string")) @@ -271,10 +289,10 @@ let textinput_of_xml = let textinput_of_xml' = let data_producer = [ - ("title", dummy_of_xml ~ctor:(fun a -> `Title a)); - ("description", dummy_of_xml ~ctor:(fun a -> `Description a)); - ("name", dummy_of_xml ~ctor:(fun a -> `Name a)); - ("link", dummy_of_xml ~ctor:(fun a -> `Link a)); + ("title", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Title a)); + ("description", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Description a)); + ("name", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Name a)); + ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link(xmlbase, a))); ] in generate_catcher ~data_producer (fun ~pos x -> `TextInput x) @@ -286,7 +304,7 @@ type category = type category' = [ | `Data of string - | `Domain of string + | `Domain of Uri.t ] let make_category ~pos (l : [< category' ] list) = @@ -294,15 +312,19 @@ let make_category ~pos (l : [< category' ] list) = | Some (`Data s)-> s | _ -> "" in let domain = match find (function `Domain _ -> true | _ -> false) l with - | Some (`Domain d) -> Some (Uri.of_string d) + | Some (`Domain d) -> Some d | _ -> None in `Category({ data; domain; } : category ) -let category_of_xml, category_of_xml' = - let attr_producer = [ ("domain", (fun a -> `Domain a)); ] in - let leaf_producer pos data = `Data data in - generate_catcher ~attr_producer ~leaf_producer make_category, +let category_of_xml = + let attr_producer = [ ("domain", domain_of_xml) ] in + let leaf_producer ~xmlbase pos data = `Data data in + generate_catcher ~attr_producer ~leaf_producer make_category + +let category_of_xml' = + let attr_producer = [ ("domain", domain_of_xml') ] in + let leaf_producer ~xmlbase pos data = `Data data in generate_catcher ~attr_producer ~leaf_producer (fun ~pos x -> `Category x) type enclosure = @@ -313,14 +335,14 @@ type enclosure = } type enclosure' = [ - | `URL of string + | `URL of Uri.t | `Length of string | `Mime of string ] let make_enclosure ~pos (l : [< enclosure' ] list) = let url = match find (function `URL _ -> true | _ -> false) l with - | Some (`URL u) -> Uri.of_string u + | Some (`URL u) -> u | _ -> raise (Error.Error (pos, "Enclosure elements MUST have a 'url' \ @@ -342,13 +364,20 @@ let make_enclosure ~pos (l : [< enclosure' ] list) = in `Enclosure ({ url; length; mime; } : enclosure) -let enclosure_of_xml, enclosure_of_xml' = +let enclosure_of_xml = + let attr_producer = [ + ("url", url_of_xml); + ("length", (fun ~xmlbase a -> `Length a)); + ("type", (fun ~xmlbase a -> `Mime a)); + ] in + generate_catcher ~attr_producer make_enclosure + +let enclosure_of_xml' = let attr_producer = [ - ("url", (fun a -> `URL a)); - ("length", (fun a -> `Length a)); - ("type", (fun a -> `Mime a)); + ("url", url_of_xml'); + ("length", (fun ~xmlbase a -> `Length a)); + ("type", (fun ~xmlbase a -> `Mime a)); ] in - generate_catcher ~attr_producer make_enclosure, generate_catcher ~attr_producer (fun ~pos x -> `Enclosure x) type guid = @@ -372,12 +401,14 @@ let make_guid ~pos (l : [< guid' ] list) = | Some (`Permalink b) -> bool_of_string b | _ -> true (* cf. RFC *) in + (* FIXME: It is not clear that the GUID should be interpreted as an + URI to be resolved using xml:base. *) `Guid(if data = "" then None else Some({ data = Uri.of_string data; permalink } : guid)) let guid_of_xml, guid_of_xml' = - let attr_producer = [ ("isPermalink", (fun a -> `Permalink a)); ] in - let leaf_producer pos data = `Data data in + let attr_producer = [ ("isPermalink", (fun ~xmlbase a -> `Permalink a)); ] in + let leaf_producer ~xmlbase pos data = `Data data in generate_catcher ~attr_producer ~leaf_producer make_guid, generate_catcher ~attr_producer ~leaf_producer (fun ~pos x -> `Guid x) @@ -389,7 +420,7 @@ type source = type source' = [ | `Data of string - | `URL of string + | `URL of Uri.t ] let make_source ~pos (l : [< source' ] list) = @@ -400,7 +431,7 @@ let make_source ~pos (l : [< source' ] list) = a non-empty string")) in let url = match find (function `URL _ -> true | _ -> false) l with - | Some (`URL u) -> Uri.of_string u + | Some (`URL u) -> u | _ -> raise (Error.Error (pos, "Source elements MUST have a 'url' \ @@ -408,10 +439,14 @@ let make_source ~pos (l : [< source' ] list) = in `Source ({ data; url; } : source) -let source_of_xml, source_of_xml' = - let attr_producer = [ ("url", (fun a -> `URL a)); ] in - let leaf_producer pos data = `Data data in - generate_catcher ~attr_producer ~leaf_producer make_source, +let source_of_xml = + let attr_producer = [ ("url", url_of_xml) ] in + let leaf_producer ~xmlbase pos data = `Data data in + generate_catcher ~attr_producer ~leaf_producer make_source + +let source_of_xml' = + let attr_producer = [ ("url", url_of_xml') ] in + let leaf_producer ~xmlbase pos data = `Data data in generate_catcher ~attr_producer ~leaf_producer (fun ~pos x -> `Source x) type story = @@ -498,33 +533,33 @@ let make_item ~pos (l : _ list) = pubDate; source; } : item) -let item_title_of_xml (pos, tag, datas) = +let item_title_of_xml ~xmlbase (pos, tag, datas) = try `Title(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <title> MUST be \ a non-empty string")) -let item_description_of_xml (pos, tag, datas) = +let item_description_of_xml ~xmlbase (pos, tag, datas) = `Description(try get_leaf datas with Not_found -> "") -let item_link_of_xml (pos, tag, datas) = - `Link(try Some(Uri.of_string (get_leaf datas)) +let item_link_of_xml ~xmlbase (pos, tag, datas) = + `Link(try Some(XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) with Not_found -> None) -let item_author_of_xml (pos, tag, datas) = +let item_author_of_xml ~xmlbase (pos, tag, datas) = try `Author(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <author> MUST be \ a non-empty string")) -let item_comments_of_xml (pos, tag, datas) = - try `Comments(Uri.of_string (get_leaf datas)) +let item_comments_of_xml ~xmlbase (pos, tag, datas) = + try `Comments(XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) with Not_found -> raise (Error.Error (pos, "The content of <comments> MUST be \ a non-empty string")) -let item_pubdate_of_xml (pos, tag, datas) = +let item_pubdate_of_xml ~xmlbase (pos, tag, datas) = try `PubDate(Date.of_rfc822 (get_leaf datas)) with Not_found -> raise (Error.Error (pos, "The content of <pubDate> MUST be \ @@ -547,15 +582,15 @@ let item_of_xml = let item_of_xml' = let data_producer = [ - ("title", dummy_of_xml ~ctor:(fun a -> `Title a)); - ("description", dummy_of_xml ~ctor:(fun a -> `Description a)); - ("link", dummy_of_xml ~ctor:(fun a -> `Link a)); - ("author", dummy_of_xml ~ctor:(fun a -> `Author a)); + ("title", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Title a)); + ("description", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Description a)); + ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link(xmlbase, a))); + ("author", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Author a)); ("category", category_of_xml'); - ("comments", dummy_of_xml ~ctor:(fun a -> `Comments a)); + ("comments", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Comments a)); ("enclosure", enclosure_of_xml'); ("guid", guid_of_xml'); - ("pubdate", dummy_of_xml ~ctor:(fun a -> `PubDate a)); + ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase a -> `PubDate a)); ("source", source_of_xml'); ] in generate_catcher ~data_producer (fun ~pos x -> `Item x) @@ -719,95 +754,95 @@ let make_channel ~pos (l : [< channel' ] list) = skipDays; items; } : channel) -let channel_title_of_xml (pos, tag, datas) = +let channel_title_of_xml ~xmlbase (pos, tag, datas) = try `Title(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <title> MUST be \ a non-empty string")) -let channel_description_of_xml (pos, tag, datas) = +let channel_description_of_xml ~xmlbase (pos, tag, datas) = `Description(try get_leaf datas with Not_found -> "") -let channel_link_of_xml (pos, tag, datas) = - try `Link(Uri.of_string (get_leaf datas)) +let channel_link_of_xml ~xmlbase (pos, tag, datas) = + try `Link(XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) with Not_found -> raise (Error.Error (pos, "The content of <link> MUST be \ a non-empty string")) -let channel_language_of_xml (pos, tag, datas) = +let channel_language_of_xml ~xmlbase (pos, tag, datas) = try `Language(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <language> MUST be \ a non-empty string")) -let channel_copyright_of_xml (pos, tag, datas) = +let channel_copyright_of_xml ~xmlbase (pos, tag, datas) = try `Copyright(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <copyright> MUST be \ a non-empty string")) -let channel_managingeditor_of_xml (pos, tag, datas) = +let channel_managingeditor_of_xml ~xmlbase (pos, tag, datas) = try `ManagingEditor(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <managingEditor> MUST be \ a non-empty string")) -let channel_webmaster_of_xml (pos, tag, datas) = +let channel_webmaster_of_xml ~xmlbase (pos, tag, datas) = try `WebMaster(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <webMaster> MUST be \ a non-empty string")) -let channel_pubdate_of_xml (pos, tag, datas) = +let channel_pubdate_of_xml ~xmlbase (pos, tag, datas) = try `PubDate(Date.of_rfc822 (get_leaf datas)) with Not_found -> raise (Error.Error (pos, "The content of <pubDate> MUST be \ a non-empty string")) -let channel_lastbuilddate_of_xml (pos, tag, datas) = +let channel_lastbuilddate_of_xml ~xmlbase (pos, tag, datas) = try `LastBuildDate(Date.of_rfc822 (get_leaf datas)) with Not_found -> raise (Error.Error (pos, "The content of <lastBuildDate> MUST be \ a non-empty string")) -let channel_category_of_xml (pos, tag, datas) = +let channel_category_of_xml ~xmlbase (pos, tag, datas) = try `Category(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <category> MUST be \ a non-empty string")) -let channel_generator_of_xml (pos, tag, datas) = +let channel_generator_of_xml ~xmlbase (pos, tag, datas) = try `Generator(get_leaf datas) with Not_found -> raise (Error.Error (pos, "The content of <generator> MUST be \ a non-empty string")) -let channel_docs_of_xml (pos, tag, datas) = - try `Docs(Uri.of_string (get_leaf datas)) +let channel_docs_of_xml ~xmlbase (pos, tag, datas) = + try `Docs(XML.resolve ~xmlbase (Uri.of_string (get_leaf datas))) with Not_found -> raise (Error.Error (pos, "The content of <docs> MUST be \ a non-empty string")) -let channel_ttl_of_xml (pos, tag, datas) = +let channel_ttl_of_xml ~xmlbase (pos, tag, datas) = try `TTL(int_of_string (get_leaf datas)) with _ -> raise (Error.Error (pos, "The content of <ttl> MUST be \ a non-empty string representing an integer")) -let channel_rating_of_xml (pos, tag, datas) = +let channel_rating_of_xml ~xmlbase (pos, tag, datas) = try `Rating(int_of_string (get_leaf datas)) with _ -> raise (Error.Error (pos, "The content of <rating> MUST be \ a non-empty string representing an integer")) -let channel_skipHours_of_xml (pos, tag, datas) = +let channel_skipHours_of_xml ~xmlbase (pos, tag, datas) = try `SkipHours(int_of_string (get_leaf datas)) with _ -> raise (Error.Error (pos, "The content of <skipHours> MUST be \ a non-empty string representing an integer")) -let channel_skipDays_of_xml (pos, tag, datas) = +let channel_skipDays_of_xml ~xmlbase (pos, tag, datas) = try `SkipDays(int_of_string (get_leaf datas)) with _ -> raise (Error.Error (pos, "The content of <skipDays> MUST be \ @@ -840,25 +875,25 @@ let channel_of_xml = let channel_of_xml' = let data_producer = [ - ("title", dummy_of_xml ~ctor:(fun a -> `Title a)); - ("link", dummy_of_xml ~ctor:(fun a -> `Link a)); - ("description", dummy_of_xml ~ctor:(fun a -> `Description a)); - ("Language", dummy_of_xml ~ctor:(fun a -> `Language a)); - ("copyright", dummy_of_xml ~ctor:(fun a -> `Copyright a)); - ("managingeditor", dummy_of_xml ~ctor:(fun a -> `ManagingEditor a)); - ("webmaster", dummy_of_xml ~ctor:(fun a -> `WebMaster a)); - ("pubdate", dummy_of_xml ~ctor:(fun a -> `PubDate a)); - ("lastbuilddate", dummy_of_xml ~ctor:(fun a -> `LastBuildDate a)); - ("category", dummy_of_xml ~ctor:(fun a -> `Category a)); - ("generator", dummy_of_xml ~ctor:(fun a -> `Generator a)); - ("docs", dummy_of_xml ~ctor:(fun a -> `Docs a)); + ("title", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Title a)); + ("link", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Link(xmlbase, a))); + ("description", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Description a)); + ("Language", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Language a)); + ("copyright", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Copyright a)); + ("managingeditor", dummy_of_xml ~ctor:(fun ~xmlbase a -> `ManagingEditor a)); + ("webmaster", dummy_of_xml ~ctor:(fun ~xmlbase a -> `WebMaster a)); + ("pubdate", dummy_of_xml ~ctor:(fun ~xmlbase a -> `PubDate a)); + ("lastbuilddate", dummy_of_xml ~ctor:(fun ~xmlbase a -> `LastBuildDate a)); + ("category", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Category a)); + ("generator", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Generator a)); + ("docs", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Docs a)); ("cloud", cloud_of_xml'); - ("ttl", dummy_of_xml ~ctor:(fun a -> `TTL a)); + ("ttl", dummy_of_xml ~ctor:(fun ~xmlbase a -> `TTL a)); ("image", image_of_xml'); - ("rating", dummy_of_xml ~ctor:(fun a -> `Rating a)); + ("rating", dummy_of_xml ~ctor:(fun ~xmlbase a -> `Rating a)); ("textinput", textinput_of_xml'); - ("skiphours", dummy_of_xml ~ctor:(fun a -> `SkipHours a)); - ("skipdays", dummy_of_xml ~ctor:(fun a -> `SkipDays a)); + ("skiphours", dummy_of_xml ~ctor:(fun ~xmlbase a -> `SkipHours a)); + ("skipdays", dummy_of_xml ~ctor:(fun ~xmlbase a -> `SkipDays a)); ("item", item_of_xml'); ] in generate_catcher ~data_producer (fun ~pos x -> x) @@ -867,14 +902,14 @@ let find_channel l = find (function XML.Node(pos, tag, data) -> tag_is tag "channel" | XML.Data _ -> false) l -let parse input = +let parse ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, data) -> if tag_is tag "channel" then - channel_of_xml (pos, tag, data) + channel_of_xml ~xmlbase (pos, tag, data) else ( match find_channel data with - | Some(XML.Node(p, t, d)) -> channel_of_xml (p, t, d) + | Some(XML.Node(p, t, d)) -> channel_of_xml ~xmlbase (p, t, d) | Some(XML.Data _) | _ -> raise (Error.Error ((0, 0), "document MUST contains exactly one \ @@ -883,12 +918,16 @@ let parse input = "document MUST contains exactly one \ <channel> element")) -let unsafe input = +type uri = Uri.t option * string + +let unsafe ?xmlbase input = match XML.of_xmlm input |> snd with | XML.Node (pos, tag, data) -> - if tag_is tag "channel" then `Channel (channel_of_xml' (pos, tag, data)) + if tag_is tag "channel" then + `Channel (channel_of_xml' ~xmlbase (pos, tag, data)) else (match find_channel data with - | Some(XML.Node(p, t, d)) -> `Channel (channel_of_xml' (p, t, d)) + | Some(XML.Node(p, t, d)) -> + `Channel (channel_of_xml' ~xmlbase (p, t, d)) | Some(XML.Data _) | None -> `Channel []) | _ -> `Channel [] diff --git a/lib/syndic_rss2.mli b/lib/syndic_rss2.mli index 43ba228..c6879b4 100644 --- a/lib/syndic_rss2.mli +++ b/lib/syndic_rss2.mli @@ -1,5 +1,5 @@ (** [Syndic.Rss2]: compliant with - {{: http://www.rssboard.org/rss-specification#ltcloudgtSubelementOfLtchannelgt} RSS 2.0}. *) + {{: http://www.rssboard.org/rss-specification} RSS 2.0}. *) module Error : module type of Syndic_error @@ -305,7 +305,7 @@ type channel = *) -val parse : Xmlm.input -> channel +val parse : ?xmlbase: Uri.t -> Xmlm.input -> channel (** [parse xml] returns the channel corresponding to [xml]. Raise [Error.Expected], [Error.Size_Exceeded] or @@ -319,12 +319,17 @@ val to_atom : channel -> Syndic_atom.feed (**/**) +type uri = Uri.t option * string +(** An URI is given by (xmlbase, uri). The value of [xmlbase], if not + [None], gives the base URI against which [uri] must be resolved if + it is relative. *) + (** Analysis without verification, enjoy ! *) -val unsafe : Xmlm.input -> +val unsafe : ?xmlbase: Uri.t -> Xmlm.input -> [> `Channel of [> `Category of string | `Cloud of - [> `Domain of string + [> `Domain of uri | `Path of string | `Port of string | `Protocol of string @@ -337,27 +342,27 @@ val unsafe : Xmlm.input -> | `Image of [> `Description of string | `Height of string - | `Link of string + | `Link of uri | `Title of string - | `URL of string + | `URL of uri | `Width of string ] list | `Item of [> `Author of string - | `Category of [> `Data of string | `Domain of string ] list + | `Category of [> `Data of string | `Domain of uri ] list | `Comments of string | `Description of string | `Enclosure of - [> `Length of string | `Mime of string | `URL of string ] list + [> `Length of string | `Mime of string | `URL of uri ] list | `Guid of [> `Data of string | `Permalink of string ] list - | `Link of string + | `Link of uri | `PubDate of string - | `Source of [> `Data of string | `URL of string ] list + | `Source of [> `Data of string | `URL of uri ] list | `Title of string ] list | `Language of string | `LastBuildDate of string - | `Link of string + | `Link of uri | `ManagingEditor of string | `PubDate of string | `Rating of string @@ -366,7 +371,7 @@ val unsafe : Xmlm.input -> | `TTL of string | `TextInput of [> `Description of string - | `Link of string + | `Link of uri | `Name of string | `Title of string ] list diff --git a/lib/syndic_xml.ml b/lib/syndic_xml.ml index e3e0428..2058548 100644 --- a/lib/syndic_xml.ml +++ b/lib/syndic_xml.ml @@ -4,6 +4,15 @@ type t = | Node of Xmlm.pos * Xmlm.tag * t list | Data of Xmlm.pos * string +let resolve ~xmlbase uri = match xmlbase with + | None -> uri + | Some b -> Uri.resolve "" b uri + +let base ~parent base = + let base = Uri.of_string base in + Some(match parent with Some p -> Uri.resolve "" p base + | None -> base) + let of_xmlm input = let el tag datas = Node (Xmlm.pos input, tag, datas) in let data data = Data (Xmlm.pos input, data) in diff --git a/lib/syndic_xml.mli b/lib/syndic_xml.mli index 77f194d..737b179 100644 --- a/lib/syndic_xml.mli +++ b/lib/syndic_xml.mli @@ -9,6 +9,11 @@ type t = | Node of Xmlm.pos * Xmlm.tag * t list | Data of Xmlm.pos * string +val resolve : xmlbase: Uri.t option -> Uri.t -> Uri.t +(** [resolve base uri] resolve the [uri] against the possible base. *) + +val base : parent: Uri.t option -> string -> Uri.t option + val get_position : t -> Xmlm.pos val of_xmlm : Xmlm.input -> (dtd * t)