diff --git a/doc/doc-primitives.saty b/doc/doc-primitives.saty index 360f77f89..d6be2d014 100644 --- a/doc/doc-primitives.saty +++ b/doc/doc-primitives.saty @@ -299,7 +299,7 @@ document (| } +command (`regexp-of-string`) (tS --> tRE) { \code{regexp-of-string ${s}}で文字列から正規表現型を生成する. - 使える正規表現の構文はOCamlの\code{Str.regexp}で使えるものと等しい. + 使える正規表現の構文はPCREと等しい. 正規表現として不適当な文字列が与えられた場合は実行時エラーが出る. } +command (`string-scan`) (tRE --> (tS --> (tOPT (tPROD [tS; tS])))) { diff --git a/satysfi.opam b/satysfi.opam index 34dbf90e3..9fc8bac26 100644 --- a/satysfi.opam +++ b/satysfi.opam @@ -31,7 +31,7 @@ depends: [ "ocamlfind" {build} "otfm" {= "0.3.7+satysfi"} "ppx_deriving" - "re" {build} + "pcre" {>= "7.4.0" & < "7.5.0"} "uutf" "yojson-with-position" {= "1.4.2+satysfi"} "omd" {< "2.0.0~"} diff --git a/src/dune b/src/dune index 520ef49dd..975828ed6 100644 --- a/src/dune +++ b/src/dune @@ -14,6 +14,7 @@ uutf yojson-with-position omd + pcre ) (preprocess (pps ppx_deriving.show diff --git a/src/frontend/evalUtil.ml b/src/frontend/evalUtil.ml index 74de415f4..95f447bd8 100644 --- a/src/frontend/evalUtil.ml +++ b/src/frontend/evalUtil.ml @@ -420,7 +420,7 @@ let get_float value : float = | _ -> report_bug_value "get_float" value -let get_regexp (value : syntactic_value) : Str.regexp = +let get_regexp (value : syntactic_value) : Pcre.regexp = match value with | BaseConstant(BCRegExp(regexp)) -> regexp | _ -> report_bug_value "get_regexp" value diff --git a/src/frontend/types.cppo.ml b/src/frontend/types.cppo.ml index 0a916d49e..bfcd16880 100644 --- a/src/frontend/types.cppo.ml +++ b/src/frontend/types.cppo.ml @@ -641,7 +641,7 @@ type base_constant = | BCFloat of float | BCLength of length | BCString of string - | BCRegExp of Str.regexp + | BCRegExp of Pcre.regexp [@printer (fun fmt _ -> Format.fprintf fmt "")] | BCPath of path list [@printer (fun fmt _ -> Format.fprintf fmt "")] diff --git a/tools/gencode/dune b/tools/gencode/dune index af6c3c16e..f1775488b 100644 --- a/tools/gencode/dune +++ b/tools/gencode/dune @@ -1,5 +1,5 @@ (executable (name gencode) - (libraries core_kernel re) + (libraries core_kernel pcre) ;; (preprocess (pps (ppx_driver.runner))) ) diff --git a/tools/gencode/u.ml b/tools/gencode/u.ml index f5daa1122..36dec13b6 100644 --- a/tools/gencode/u.ml +++ b/tools/gencode/u.ml @@ -5,24 +5,13 @@ let default v = function let trim_re = - let open Re in - let sp = alt [ char ' '; char '\t' ] in - let nl = alt [ str "\r\n"; char '\r'; char '\n' ] in - (* -- /\A *\n(.*\n) *\z/ -- *) - seq [ - bos; - rep sp; - nl; - group @@ seq [ rep any; nl ]; - rep sp; - eos; - ] |> compile + Pcre.regexp ~flags:[`UTF8; `DOTALL] "\\A *\n(.*\n) *\\z" let trim s = - match Re.exec_opt trim_re s with - | Some(gr) -> Re.Group.get gr 1 - | None -> s + match Pcre.exec ~rex:trim_re s with + | exception Not_found -> s + | substrings -> Pcre.get_substring substrings 1 let opt_map f = function diff --git a/tools/gencode/vminst.ml b/tools/gencode/vminst.ml index f8bfe008b..938930cb4 100644 --- a/tools/gencode/vminst.ml +++ b/tools/gencode/vminst.ml @@ -1965,12 +1965,13 @@ make_int (String.length str) ~is_pdf_mode_primitive:true ~is_text_mode_primitive:true ~code:{| -if Str.string_match pat str 0 then - let matched = Str.matched_string str in - let start = String.length matched in +match Pcre.exec ~flags:[`ANCHORED] ~rex:pat str with +| substrings -> + let _, start = Pcre.get_substring_ofs substrings 0 in + let matched = String.sub str 0 start in let rest = String.sub str start (String.length str - start) in Constructor("Some", Tuple([make_string matched; make_string rest])) -else +| exception Not_found -> Constructor("None", const_unit) |} ; inst "PrimitiveStringUnexplode" @@ -2019,8 +2020,12 @@ make_list make_int ilst ~is_text_mode_primitive:true ~code:{| let regexp = - try Str.regexp str with - | Failure(msg) -> report_dynamic_error ("regexp-of-string: " ^ msg) + match Pcre.regexp ~flags:[`UTF8] str with + | re -> re + | exception Pcre.Error(Pcre.BadPattern (msg, pos)) -> + report_dynamic_error ("regexp-of-string: " ^ msg) + | exception Pcre.Error(Pcre.InternalError msg) -> + report_dynamic_error ("regexp-of-string: " ^ msg) in make_regexp regexp |} @@ -2030,13 +2035,13 @@ make_regexp regexp ~fields:[ ] ~params:[ - param "pat" ~type_:"regexp"; + param "rex" ~type_:"regexp"; param "s" ~type_:"string"; ] ~is_pdf_mode_primitive:true ~is_text_mode_primitive:true ~code:{| -make_bool (Str.string_match pat s 0) +make_bool (Pcre.pmatch ~flags:[`ANCHORED] ~rex s) |} ; inst "PrimitiveSplitIntoLines" ~name:"split-into-lines" @@ -2065,7 +2070,7 @@ pairlst |> make_list (fun (i, s) -> Tuple([make_int i; make_string s])) ~is_pdf_mode_primitive:true ~is_text_mode_primitive:true ~code:{| -let slst = Str.split sep str in +let slst = Pcre.split ~rex:sep str in let pairlst = slst |> List.map chop_space_indent in pairlst |> make_list (fun (i, s) -> Tuple([make_int i; make_string s])) |}