Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

doc(engine): ppx_functor_application #744

Merged
merged 1 commit into from
Jul 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 71 additions & 54 deletions engine/utils/ppx_functor_application/README.md
Original file line number Diff line number Diff line change
@@ -1,65 +1,82 @@
# `ppx_inline`
# `ppx_functor_application`

Inlines chunks of OCaml AST in place.
## Motivation
The engine consists of numerous phases, implemented as OCaml functors
parametrized over "AST features" (see the book). Two phases can be
binded (sequenced) via `Phase_utils.BindPhase` functor.

Rewrite `[%%inline_defs L]`, `[%%inline_arms L]`, `[%%inline_body PATH]` inside nodes `[%%inlined_contents NODE]`, where:
- `L` is a (`+`/`-`-separated) list of path specifying which chunk of AST we should inline;
- `PATH` is a `.`-separated list of strings, possibly containing the `*` glob.
Since OCaml define (or let users define) infix notations for functor
application, combining many phases (functors) results in the following
christmas tree looking kinds of code:

## Example:
File `some_module.ml`:
```ocaml
let f x = x + 1
let g x = x + 2
let f' x = x + 3

module M = struct
let w = 0
let x = 1
let y = 2
let z = 3
struct
module ARG0 = (Phases.Reject.RawOrMutPointer)(Features.Rust)
module ARG1 = (Phases.Transform_hax_lib_inline)(ARG0.FB)
module ARG2 = (Phases.Specialize)(ARG1.FB)
module ARG3 = (Phases.Drop_sized_trait)(ARG2.FB)
module ARG4 = (Phases.Simplify_question_marks)(ARG3.FB)
module ARG5 = (Phases.And_mut_defsite)(ARG4.FB)
module ARG6 = (Phases.Reconstruct_for_loops)(ARG5.FB)
module ARG7 = (Phases.Reconstruct_while_loops)(ARG6.FB)
module ARG8 = (Phases.Direct_and_mut)(ARG7.FB)
module ARG9 = (Phases.Reject.Arbitrary_lhs)(ARG8.FB)
module ARG10 = (Phases.Drop_blocks)(ARG9.FB)
module ARG11 = (Phases.Drop_references)(ARG10.FB)
module ARG12 = (Phases.Trivialize_assign_lhs)(ARG11.FB)
module ARG13 = (Side_effect_utils.Hoist)(ARG12.FB)
module ARG14 = (Phases.Simplify_match_return)(ARG13.FB)
module ARG15 = (Phases.Drop_needless_returns)(ARG14.FB)
module ARG16 = (Phases.Local_mutation)(ARG15.FB)
module ARG17 = (Phases.Reject.Continue)(ARG16.FB)
module ARG18 = (Phases.Cf_into_monads)(ARG17.FB)
module ARG19 = (Phases.Reject.EarlyExit)(ARG18.FB)
module ARG20 = (Phases.Functionalize_loops)(ARG19.FB)
module ARG21 = (Phases.Reject.As_pattern)(ARG20.FB)
module ARG22 = (Phases.Traits_specs)(ARG21.FB)
module ARG23 = (Phases.Simplify_hoisting)(ARG22.FB)
module ARG24 = (Phases.Newtype_as_refinement)(ARG23.FB)
module ARG25 = (SubtypeToInputLanguage)(ARG24.FB)
module ARG26 = (Identity)(ARG25.FB)
include
((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(((BindPhase)(ARG0))(ARG1)))(ARG2)))(ARG3)))(ARG4)))(ARG5)))(ARG6)))(ARG7)))(ARG8)))(ARG9)))(ARG10)))(ARG11)))(ARG12)))(ARG13)))(ARG14)))(ARG15)))(ARG16)))(ARG17)))(ARG18)))(ARG19)))(ARG20)))(ARG21)))(ARG22)))(ARG23)))(ARG24)))(ARG25)))(ARG26)
end

let h x = ""
type foo = | A | B
let i (x: foo) =
match x with
| A -> 0
| B -> 1
```

The module:
```ocaml
module%inlined_contents [@@add "some_module.ml"] Test = struct
[%%inline_defs f + g + foo]
[%%inline_defs "M.*" - z - y]

let h: int -> string = [%%inline_body h]
let i: foo -> int =
match i with
| [%%inline_arms "i.*" - B] -> dummy
| B -> 123
end
```
The system of phases is supposed to let backends opt-in or out easily
for phases. This syntactic limitation was a major issue for that.

## Solution
This PPX defines a small DSL that embeds in the OCaml syntax of
expressions to provide a nice way of binding phases functors via a
`|>` infix operator.

Will be rewritten into:
Example:
```ocaml
module%inlined_contents [@@add "some_module.ml"] Test = struct

(* [%%inline_defs f + g + foo] *)
let f x = x + 1
let g x = x + 2
type foo = | A | B

(* [%%inline_defs "M.*" - z - y] *)
let w = 0
let x = 1

let h: int -> string = (fun x -> "")
let i: foo -> int =
match i with
| A -> 0
| B -> 123
end
module TransformToInputLanguage =
[%functor_application
Phases.Reject.RawOrMutPointer(Features.Rust)
|> Phases.Transform_hax_lib_inline
|> Phases.Specialize
|> Phases.Drop_sized_trait
|> Phases.Simplify_question_marks
|> Phases.And_mut_defsite
|> Phases.Reconstruct_for_loops
|> Phases.Reconstruct_while_loops
|> SubtypeToInputLanguage
|> Identity
]
[@ocamlformat "disable"]
```

Note: the `[@ocamlformat "disable"]` annotation is important,
otherwise `ocamlformat` tries to format those PPX invokations with its
rules for expressions, yielding rather ugly looking code...

### Syntax
- `Name`: a module `Name`
- `Name(X, Y, Z)`: the application of the functor `Name` with three arguments `X`, `Y` and `Z`
- `(module <M>)`: the arbitary OCaml module expression `<M>`
- `<a> <b>`: the application of the module described by `<a>` and the module described by `<b>`
- `(fun X -> <a>)`: a "functor" from `X` to `<a>`
- `<a> |> <b>`: `<a>` binded with `<b>`
23 changes: 19 additions & 4 deletions engine/utils/ppx_functor_application/ppx_functor_application.ml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ let show_module_expr = string_of_module_expr
let pp_module_expr (fmt : Format.formatter) (s : module_expr) : unit =
Format.pp_print_string fmt @@ string_of_module_expr s

(** Defines a DSL for functor application. *)
type module_dsl =
| Var of longident
| App of module_dsl * module_dsl
Expand All @@ -41,6 +42,7 @@ type module_dsl =

let var_of_string s = Var (Longident.Lident s)

(** Elaborate a OCaml module expression from a `module_dsl` *)
let rec elab ~loc (t : module_dsl) : module_expr =
let (module E) = Ast_builder.make loc in
let h = elab ~loc in
Expand Down Expand Up @@ -92,6 +94,7 @@ let rec collect_pipes (t : module_dsl) : module_dsl list =
| Meta (Pipe l, _) | Pipe l -> List.concat_map ~f:collect_pipes l
| _ -> [ t ]

(** Get rid of extra `Pipe` nodes *)
let rec normalize (t : module_dsl) : module_dsl =
match t with
| App (f, x) -> App (normalize f, normalize x)
Expand All @@ -104,16 +107,28 @@ let rec normalize (t : module_dsl) : module_dsl =
| [ t ] -> t
| l -> Pipe l)

(** Recognize a small language embedded in OCaml syntax for applying
functors in chain. *)
let rec parse expr =
let r =
match expr with
| { pexp_desc = Pexp_construct ({ txt; _ }, None); _ } -> Var txt
| { pexp_desc = Pexp_construct ({ txt; _ }, None); _ } ->
(* Parses variables (module names are uppercase, since we are looking at OCaml expressions, so we match on constructors) *)
Var txt
| { pexp_desc = Pexp_construct ({ txt; _ }, Some arg); _ } ->
(* Parses module applcations (same as above: in expressions, module applications are parsed as constructor applications) *)
App (Var txt, parse arg)
| [%expr [%e? m1] |> [%e? m2]] -> Pipe [ parse m1; parse m2 ]
| [%expr (module [%m? m])] -> ModExpr m
| [%expr [%e? f] [%e? x]] -> App (parse f, parse x)
| [%expr [%e? m1] |> [%e? m2]] ->
(* Parses `... |> ...` infix module application *)
Pipe [ parse m1; parse m2 ]
| [%expr (module [%m? m])] ->
(* Parses module expressions (in this case, that corresponds to OCaml module expression) *)
ModExpr m
| [%expr [%e? f] [%e? x]] ->
(* Parses module applications (e.g. `(fun x -> ...) (module YXZ)`) *)
App (parse f, parse x)
| [%expr fun [%p? x] -> [%e? body]] -> (
(* Parses module abstractions (e.g. `fun X -> Z(X)`) *)
match x with
| { ppat_desc = Ppat_construct ({ txt = Lident x; _ }, None); _ } ->
Abs (x, parse body)
Expand Down
Loading