Skip to content

Commit

Permalink
fix #306. use pcre-ocaml as regexp backend
Browse files Browse the repository at this point in the history
  • Loading branch information
leque committed Dec 29, 2021
1 parent 1919b0f commit 7e60feb
Show file tree
Hide file tree
Showing 8 changed files with 24 additions and 29 deletions.
2 changes: 1 addition & 1 deletion doc/doc-primitives.saty
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ document (|
}
+command (`regexp-of-string`) (tS --> tRE) {
\code{regexp-of-string ${s}}で文字列から正規表現型を生成する.
使える正規表現の構文はOCamlの\code{Str.regexp}で使えるものと等しい
使える正規表現の構文はPCREと等しい
正規表現として不適当な文字列が与えられた場合は実行時エラーが出る.
}
+command (`string-scan`) (tRE --> (tS --> (tOPT (tPROD [tS; tS])))) {
Expand Down
2 changes: 1 addition & 1 deletion satysfi.opam
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ depends: [
"ocamlfind" {build}
"otfm" {= "0.3.7+satysfi"}
"ppx_deriving"
"re" {build}
"pcre" {>= "7.4.0" & < "7.5.0"}
"uutf"
"yojson-with-position" {= "1.4.2+satysfi"}
"omd" {< "2.0.0~"}
Expand Down
1 change: 1 addition & 0 deletions src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
uutf
yojson-with-position
omd
pcre
)
(preprocess (pps
ppx_deriving.show
Expand Down
2 changes: 1 addition & 1 deletion src/frontend/evalUtil.ml
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ let get_float value : float =
| _ -> report_bug_value "get_float" value


let get_regexp (value : syntactic_value) : Str.regexp =
let get_regexp (value : syntactic_value) : Pcre.regexp =
match value with
| BaseConstant(BCRegExp(regexp)) -> regexp
| _ -> report_bug_value "get_regexp" value
Expand Down
2 changes: 1 addition & 1 deletion src/frontend/types.cppo.ml
Original file line number Diff line number Diff line change
Expand Up @@ -641,7 +641,7 @@ type base_constant =
| BCFloat of float
| BCLength of length
| BCString of string
| BCRegExp of Str.regexp
| BCRegExp of Pcre.regexp
[@printer (fun fmt _ -> Format.fprintf fmt "<regexp>")]
| BCPath of path list
[@printer (fun fmt _ -> Format.fprintf fmt "<path>")]
Expand Down
2 changes: 1 addition & 1 deletion tools/gencode/dune
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
(executable
(name gencode)
(libraries core_kernel re)
(libraries core_kernel pcre)
;; (preprocess (pps (ppx_driver.runner)))
)
19 changes: 4 additions & 15 deletions tools/gencode/u.ml
Original file line number Diff line number Diff line change
Expand Up @@ -5,24 +5,13 @@ let default v = function


let trim_re =
let open Re in
let sp = alt [ char ' '; char '\t' ] in
let nl = alt [ str "\r\n"; char '\r'; char '\n' ] in
(* -- /\A *\n(.*\n) *\z/ -- *)
seq [
bos;
rep sp;
nl;
group @@ seq [ rep any; nl ];
rep sp;
eos;
] |> compile
Pcre.regexp ~flags:[`UTF8; `DOTALL] "\\A *\n(.*\n) *\\z"


let trim s =
match Re.exec_opt trim_re s with
| Some(gr) -> Re.Group.get gr 1
| None -> s
match Pcre.exec ~rex:trim_re s with
| exception Not_found -> s
| substrings -> Pcre.get_substring substrings 1


let opt_map f = function
Expand Down
23 changes: 14 additions & 9 deletions tools/gencode/vminst.ml
Original file line number Diff line number Diff line change
Expand Up @@ -1965,12 +1965,13 @@ make_int (String.length str)
~is_pdf_mode_primitive:true
~is_text_mode_primitive:true
~code:{|
if Str.string_match pat str 0 then
let matched = Str.matched_string str in
let start = String.length matched in
match Pcre.exec ~flags:[`ANCHORED] ~rex:pat str with
| substrings ->
let _, start = Pcre.get_substring_ofs substrings 0 in
let matched = String.sub str 0 start in
let rest = String.sub str start (String.length str - start) in
Constructor("Some", Tuple([make_string matched; make_string rest]))
else
| exception Not_found ->
Constructor("None", const_unit)
|}
; inst "PrimitiveStringUnexplode"
Expand Down Expand Up @@ -2019,8 +2020,12 @@ make_list make_int ilst
~is_text_mode_primitive:true
~code:{|
let regexp =
try Str.regexp str with
| Failure(msg) -> report_dynamic_error ("regexp-of-string: " ^ msg)
match Pcre.regexp ~flags:[`UTF8] str with
| re -> re
| exception Pcre.Error(Pcre.BadPattern (msg, pos)) ->
report_dynamic_error ("regexp-of-string: " ^ msg)
| exception Pcre.Error(Pcre.InternalError msg) ->
report_dynamic_error ("regexp-of-string: " ^ msg)
in
make_regexp regexp
|}
Expand All @@ -2030,13 +2035,13 @@ make_regexp regexp
~fields:[
]
~params:[
param "pat" ~type_:"regexp";
param "rex" ~type_:"regexp";
param "s" ~type_:"string";
]
~is_pdf_mode_primitive:true
~is_text_mode_primitive:true
~code:{|
make_bool (Str.string_match pat s 0)
make_bool (Pcre.pmatch ~flags:[`ANCHORED] ~rex s)
|}
; inst "PrimitiveSplitIntoLines"
~name:"split-into-lines"
Expand Down Expand Up @@ -2065,7 +2070,7 @@ pairlst |> make_list (fun (i, s) -> Tuple([make_int i; make_string s]))
~is_pdf_mode_primitive:true
~is_text_mode_primitive:true
~code:{|
let slst = Str.split sep str in
let slst = Pcre.split ~rex:sep str in
let pairlst = slst |> List.map chop_space_indent in
pairlst |> make_list (fun (i, s) -> Tuple([make_int i; make_string s]))
|}
Expand Down

0 comments on commit 7e60feb

Please sign in to comment.