Skip to content

Commit

Permalink
add parts function
Browse files Browse the repository at this point in the history
  • Loading branch information
bikallem committed Aug 21, 2021
1 parent 5354d50 commit 5d443eb
Show file tree
Hide file tree
Showing 8 changed files with 270 additions and 16 deletions.
3 changes: 2 additions & 1 deletion .ocamlformat
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
profile = ocamlformat
exp-grouping=preserve
module-item-spacing=compact
sequence-blank-line=preserve-one
single-case=compact
break-cases = fit
break-infix=fit-or-vertical
parse-docstrings = true

4 changes: 4 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## v3.1.0 2021-08-21

- Add `parts` function to allow non-streaming parsing of multiparts.

## v3.0.1 2021-07-24

- Improve documentation, fix some typos in code. Rename `read_result` to `read` and `read_part` to `read
Expand Down
2 changes: 1 addition & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

(generate_opam_files true)

(version 3.0.1)
(version 3.1.0)

(source
(github lemaetech/http-mutlipart-formdata))
Expand Down
2 changes: 1 addition & 1 deletion http-multipart-formdata.opam
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# This file is generated by dune, edit dune-project instead
opam-version: "2.0"
version: "3.0.1"
version: "3.1.0"
synopsis: "Http multipart/formdata parser"
description:
"OCaml implementation of RFC 7578 (Returning Values from Forms: multipart/form-data)- https://tools.ietf.org/html/rfc7578"
Expand Down
87 changes: 87 additions & 0 deletions http-multipart-formdata.opam.locked
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
opam-version: "2.0"
name: "http-multipart-formdata"
version: "3.1.0"
synopsis: "Http multipart/formdata parser"
description:
"OCaml implementation of RFC 7578 (Returning Values from Forms: multipart/form-data)- https://tools.ietf.org/html/rfc7578"
maintainer: "Bikal Lem"
authors: "Bikal Lem, <[email protected]>"
license: "MPL-2.0"
tags: ["http" "multipart" "formadata" "form" "web"]
homepage: "https://github.com/lemaetech/http-mutlipart-formdata"
bug-reports: "https://github.com/lemaetech/http-mutlipart-formdata/issues"
depends: [
"angstrom" {= "0.15.0"}
"astring" {= "0.8.5" & with-doc}
"base" {= "v0.14.1" & with-test}
"base-bigarray" {= "base"}
"base-threads" {= "base"}
"base-unix" {= "base"}
"bigarray-compat" {= "1.0.0"}
"bigstringaf" {= "0.8.0"}
"cmdliner" {= "1.0.4"}
"conf-pkg-config" {= "2"}
"cppo" {= "1.6.7" & with-test}
"csexp" {= "1.5.1" & with-test}
"cstruct" {= "6.0.1"}
"dune" {= "2.9.0"}
"dune-configurator" {= "2.9.0" & with-test}
"fmt" {= "0.8.9"}
"fpath" {= "0.7.3" & with-doc}
"jane-street-headers" {= "v0.14.0" & with-test}
"jst-config" {= "v0.14.0" & with-test}
"logs" {= "0.7.0" & with-doc}
"ocaml" {= "4.12.0"}
"ocaml-base-compiler" {= "4.12.0"}
"ocaml-compiler-libs" {= "v0.12.3" & with-test}
"ocaml-config" {= "2"}
"ocaml-migrate-parsetree" {= "2.2.0" & with-test}
"ocaml-options-vanilla" {= "1"}
"ocaml-syntax-shims" {= "1.0.0"}
"ocamlbuild" {= "0.14.0"}
"ocamlfind" {= "1.9.1"}
"octavius" {= "1.2.2" & with-test}
"odoc" {= "dev" & with-doc}
"odoc-parser" {= "0.9.0" & with-doc}
"ppx_assert" {= "v0.14.0" & with-test}
"ppx_base" {= "v0.14.0" & with-test}
"ppx_cold" {= "v0.14.0" & with-test}
"ppx_compare" {= "v0.14.0" & with-test}
"ppx_derivers" {= "1.2.1" & with-test}
"ppx_deriving" {= "5.2.1" & with-test}
"ppx_enumerate" {= "v0.14.0" & with-test}
"ppx_expect" {= "v0.14.1" & with-test}
"ppx_hash" {= "v0.14.0" & with-test}
"ppx_here" {= "v0.14.0" & with-test}
"ppx_inline_test" {= "v0.14.1" & with-test}
"ppx_js_style" {= "v0.14.1" & with-test}
"ppx_optcomp" {= "v0.14.3" & with-test}
"ppx_sexp_conv" {= "v0.14.3" & with-test}
"ppxlib" {= "0.22.2" & with-test}
"re" {= "1.9.0" & with-test}
"result" {= "1.5"}
"seq" {= "base"}
"sexplib0" {= "v0.14.0" & with-test}
"stdio" {= "v0.14.0" & with-test}
"stdlib-shims" {= "0.3.0"}
"time_now" {= "v0.14.0" & with-test}
"topkg" {= "1.0.3"}
"tyxml" {= "4.5.0" & with-doc}
"uchar" {= "0.0.2" & with-doc}
"uutf" {= "1.0.2" & with-doc}
]
build: [
["dune" "subst"] {dev}
[
"dune"
"build"
"-p"
name
"-j"
jobs
"@install"
"@runtest" {with-test}
"@doc" {with-doc}
]
]
dev-repo: "git+https://github.com/lemaetech/http-mutlipart-formdata.git"
59 changes: 51 additions & 8 deletions lib/http_multipart_formdata.ml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ and part_header =
; filename: string option
; parameters: string Map.t }

and field_name = string

and part_body = string

type part_body_header =
| Content_type of {ty: string; subtype: string; parameters: string Map.t}
| Content_disposition of string Map.t
Expand Down Expand Up @@ -162,7 +166,8 @@ let boundary content_type =
let* params =
skip_many ws
*> (string_ci "multipart/form-data" <?> "Not multipart formdata header")
*> skip_many ws *> many param
*> skip_many ws
*> many param
in
match List.assoc_opt "boundary" params with
| Some boundary -> return (Boundary boundary)
Expand All @@ -173,7 +178,9 @@ let boundary content_type =
let content_disposition =
let+ params =
string_ci "Content-Disposition:"
*> skip_many ws *> string_ci "form-data" *> many param
*> skip_many ws
*> string_ci "form-data"
*> many param
in
let params = List.to_seq params |> Map.of_seq in
Content_disposition params
Expand All @@ -186,7 +193,8 @@ let preamble dash_boundary =
many
(let* dash_boundary' = peek_string len in
if String.equal dash_boundary dash_boundary' then fail "" else any_char )
*> advance len *> commit
*> advance len
*> commit

let crlf = string_ci "\r\n" <?> "[crlf]"

Expand Down Expand Up @@ -294,7 +302,7 @@ let of_bigarray = Cstruct.of_bigarray

let rec read (reader : reader) =
match reader.parser_state with
| Buffered.Partial k -> (
| Buffered.Partial k -> begin
match reader.input with
| `Incremental ->
let continue (input : [`Cstruct of Cstruct.t | `Eof]) =
Expand All @@ -310,11 +318,13 @@ let rec read (reader : reader) =
`Awaiting_input continue
| `Cstruct i ->
let input' =
if Cstruct.len i = 0 then `Eof else `Bigstring (Cstruct.to_bigarray i)
if Cstruct.length i = 0 then `Eof
else `Bigstring (Cstruct.to_bigarray i)
in
reader.parser_state <- k input' ;
read reader )
| Buffered.Done (buf, x) -> (
read reader
end
| Buffered.Done (buf, x) -> begin
match x with
| `End ->
reader.unconsumed <- of_bigarray ~off:buf.off ~len:buf.len buf.buf ;
Expand All @@ -328,13 +338,46 @@ let rec read (reader : reader) =
x
| `Incremental ->
reader.unconsumed <- of_bigarray ~off:buf.off ~len:buf.len buf.buf ;
x ) )
x )
end
| Buffered.Fail (buf, marks, err) ->
reader.unconsumed <- of_bigarray ~off:buf.off ~len:buf.len buf.buf ;
`Error (String.concat " > " marks ^ ": " ^ err)

let unconsumed reader = reader.unconsumed

(* Non streaming *)

let parts boundary body =
let rec read_parts reader parts =
read reader
|> function
| `End ->
Queue.to_seq parts
|> List.of_seq
|> List.map (fun (header, body) ->
let field_name = name header in
(field_name, (header, body)) )
|> Result.ok
| `Header header ->
let body = Cstruct.(read_body reader empty |> to_string) in
Queue.push (header, body) parts ;
read_parts reader parts
| `Error e -> Error e
| _ -> assert false
and read_body reader body =
read reader
|> function
| `Body_end -> body
| `Body buf -> read_body reader (Cstruct.append body buf)
| `Error e -> failwith e
| _ -> assert false
in
let reader =
reader ~read_buffer_size:10 boundary (`Cstruct (Cstruct.of_string body))
in
read_parts reader (Queue.create ())

(* Pretty Printers *)

let pp_boundary fmt (Boundary boundary) = Fmt.string fmt boundary
Expand Down
41 changes: 36 additions & 5 deletions lib/http_multipart_formdata.mli
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
The parser implements HTTP [multipart/form-data] standard as defined in
{{:https://tools.ietf.org/html/rfc7578} RFC 7578}. *)

(** {2 Types} *)
(** {1 Types} *)

(** [reader] represents a HTTP multipart formdata reader. *)
type reader
Expand Down Expand Up @@ -46,7 +46,13 @@ and part_header
(** Represents the multipart boundary value. *)
and boundary

(** {2 Mulipart Boundary parser} *)
(** A form field name *)
and field_name = string

(** A Multipart body *)
and part_body = string

(** {1 Mulipart Boundary parser} *)

val boundary : string -> (boundary, string) result
(** [boundary content_type] parses [content_type] to extract {!type:boundary}
Expand All @@ -60,7 +66,11 @@ val boundary : string -> (boundary, string) result
Http_multipart_formdata.boundary content_type
]} *)

(** {2 Multipart Reader} *)
(** {1 Streaming Multipart}
API to stream multipart parts. Use these functions when you have to handle
HTTP form submissions which has large file uploads and at the same time be
memory efficient. *)

val reader : ?read_buffer_size:int -> boundary -> input -> reader
(** [reader ?read_buffer_size boundary input] creates reader. The default value
Expand All @@ -73,7 +83,28 @@ val unconsumed : reader -> Cstruct.t
(** [unconsumed reader] returns any leftover data still remaining after
{!type:reader} returns [`End]. *)

(** {2 Part header} *)
(** {1 Non-Streaming Multipart}
Use these functions if the HTTP form submission is of a relatively small
size. *)

val parts :
boundary
-> string
-> ((field_name * (part_header * part_body)) list, string) result
(** [parts boundary http_body] returns a list of HTTP multipart parts parsed in
[http_body].
The returned parts list is keyed to a form field name so that one can do:
{[
let parts_kv = parts boundary http_body in
match List.assoc_opt "field1" parts_vk with
| Some v -> ...
| None -> ..
]} *)

(** {1 Part header} *)

val name : part_header -> string
(** [name t] returns the form field name. *)
Expand All @@ -87,7 +118,7 @@ val filename : part_header -> string option
val find : string -> part_header -> string option
(** [find name t] returns the multipart parameter value associated with [name]. *)

(** {2 Pretty Printers} *)
(** {3 Pretty Printers} *)

val pp_part_header : Format.formatter -> part_header -> unit
val pp_read_result : Format.formatter -> read -> unit
Expand Down
Loading

0 comments on commit 5d443eb

Please sign in to comment.