Skip to content

Lark Grammar for M2 language

Frédéric Chapoton edited this page Oct 17, 2023 · 10 revisions

This is related to issue #2889.

It would be convenient to have a formal model for the M2 language. This would in particular allow syntax checks of all files in the common library and packages.

Here is the Lark documentation:

https://lark-parser.readthedocs.io/en/stable/

A basic tentative was started in #2893, stuck at a preliminary stage.

This is remotely related to the question of highliting code, as allowed by

https://github.com/mahrud/language-macaulay2

How to use

Here is a sample ipython session:

In [26]: from lark import Lark

In [27]: with open("macaulay2.lark") as f:
...:     w = f.read()

In [28]: json_parser = Lark(w, start='start', debug=True)

In [29]: json_parser.parse(open("M2/M2/Macaulay2/packages/DiffAlg.m2").read())

and here the tentative grammar:

// this entry point is for a file input
start: (_NEWLINE | expr)*

SYMBOL: WORD

COMMENT: /--[^\n]*/
       | /--.*\n/
       | /-\*.*\*-/
       | ";"

FUNCTION: "sin" | "cos" | "tan" | "abs" | "acos"
        | "agm" | "asin" | "atan" | "atan2"
        | "zeta" | "tanh" | "sqrt" | "sec" | "sech"
        | "erf" | "erfc" | "coth" | "csc" | "eint"

expr: SYMBOL
    | string
    | SIGNED_NUMBER
    | operator_exp
    | command
    | timing
    | assignment
    | lists_and_sequences
    | hash_table
    | branching
    | bool
    | symbols
    | make_function
    | function_call
    | function_def
    | table_access

string: STRING
      | "toString" expr
      | (string "|")+ string

mapping: expr "=>" expr

// functions
typing: ":=" mapping
function_call: (SYMBOL | FUNCTION) "(" expr ("," expr)* ")"
	     | SYMBOL expr
function_def: function_call typing? "->" expr
make_function: (lists_and_sequences|SYMBOL) typing? "->" expr

command: "newPackage"
       | "select"
       | "position(" expr ("," expr) ")"
       | "needs" string -> needs_package
       | "method" "(" mapping? ")"
       | "instance" "(" expr "," SYMBOL ")"
       | "class" expr

// operators

unary_pre_op: "-"
            | "+"
	    | "#" -> cardinality

unary_post_op: "(*)"
             | "^*" | "^!"
             | "_*" | "_!"
             | "~"
             | "!"

binary_op: "+" | "-"
         | "*" | "/" | "//"
         | "**" | "++"
         | "^" | "^^" | "^**"
         | "<<" | ">>"
         | "<==>"
         | "<==" | "<==="
         | "==>" | "===>"
         | "@" | "@@"
         | "&" | "%"
         | "|" | "|-" | "||"
         | ".." | "..<" | ":" | "_"
         | "."

operator_exp: unary_pre_op expr
            | expr unary_post_op
            | expr binary_op expr

// boolean and comparison tests

BOOLEAN: "true" | "false"

comparison: expr "==" expr -> equal
          | expr "!=" expr -> unequal
          | expr "===" expr -> strict_equal
          | expr "=!=" expr -> strict_unequal
          | expr "<" expr -> less
          | expr "<=" expr -> less_or_equal
          | expr ">" expr
          | expr ">=" expr

bool: bool "and" bool
    | bool "or" bool
    | bool "xor" bool
    | "not" bool
    | BOOLEAN
    | comparison
    | "all" "(" expr "," expr")"

// new and symbols

// new: "new" hash_table ["of" hash_table] "from" hash_table ":=" (A,B,c) "->" expr

symbols: "global" SYMBOL
       | "local" SYMBOL
       | "symbol" SYMBOL
       | "protect" SYMBOL
       | "threadVariable" SYMBOL

// assignment

assignment: expr "=" expr
          | expr ":=" expr
          | expr "<-" expr

comparison_operator: "?"


// subscripting and object access

accessing: "_" expr -> subscript
         | "." expr -> access_via_key
         | "#" expr? -> length_or_access
         | ".?" expr -> check_for_key
         | "#?" -> check_value
 
table_access: expr accessing

// branching

branching: "if" bool "then" expr ["else" expr]
         | "while" bool ["list" expr] ["do" expr]
         | "for" expr ["from" expr] ["to" expr] ["when" bool] ["list" expr] ["do" expr]
         | "break" expr?
         | "continue" expr?
         | "return" expr?

// exceptions and errors

exception: "error" string
         | "try" expr ["then" expr] ["else" expr]
         | "catch" expr
         | "throw" expr
         | "shield" expr

// timing and alarms

timing: "alarm" NN
      | "time" expr
      | "timing" expr
      | "sleep" NN
      | "nanosleep" NN
      | "elapsedTime" expr
      | "elapsedTiming" expr

// lists, sets, sequences, arrays

set_core: "{" expr ("," expr)* "}" | "{" "}"

lists_and_sequences: set_core
                   | "(" expr ("," expr)* ")"
                   | "[" expr ("," expr)* "]"
                   | "<|" expr ("," expr)* "|>"
		   | "set" set_core

// mapping over hash tables


hash_table: "{" mapping ("," mapping)* "}"

hash_table_operation: "applyValues(" hash_table "," expr ")"
        | "applyKeys(" hash_table "," expr ")"
        | "applyPairs(" hash_table "," expr ")"
        | "scanValues(" hash_table "," expr ")"
        | "scanKeys(" hash_table "," expr ")"
        | "scanPairs(" hash_table "," expr ")"
        | "scan(" hash_table "," expr ")"

// "merge(" A B C ")"
// combine: "combine(" A B C D E")"

_NEWLINE: ( /\r?\n[\t ]*/ | COMMENT )+

%import common.ESCAPED_STRING -> STRING
%import common.SIGNED_NUMBER
%import common.WORD
%import common.LETTER
%import common.NEWLINE
%import common.INT -> NN
%import common.SIGNED_INT -> ZZ
%import common.SIGNED_FLOAT -> RR
%import common.WS
%ignore WS
%ignore COMMENT
Clone this wiki locally