Skip to content

Commit

Permalink
feat: add antlr grammar for SubstraitTypes and test file format
Browse files Browse the repository at this point in the history
Add parser changes for testcase files
Add function test coverage changes
  • Loading branch information
scgkiran committed Oct 28, 2024
1 parent 683f417 commit c97a231
Show file tree
Hide file tree
Showing 28 changed files with 26,658 additions and 0 deletions.
8 changes: 8 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ repos:
rev: 6.1.0
hooks:
- id: flake8
- repo: local
hooks:
- id: check-substrait-extensions
name: Check Substrait extensions
entry: pytest tests/test_extensions.py::test_read_substrait_extensions
language: python
pass_filenames: false

109 changes: 109 additions & 0 deletions grammar/FuncTestCaseLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
lexer grammar FuncTestCaseLexer;

import SubstraitLexer;

SUBSTRAIT_SCALAR_TEST
: '### SUBSTRAIT_SCALAR_TEST:'
;

FORMAT_VERSION
: 'v' DIGIT+ ('.' DIGIT+)?
;

SUBSTRAIT_INCLUDE
: '### SUBSTRAIT_INCLUDE:'
;

DESCRIPTION_LINE
: '# ' ~[\r\n]* '\r'? '\n'
;

ERROR_RESULT
: '<!ERROR>'
;

UNDEFINED_RESULT
: '<!UNDEFINED>'
;


OVERFLOW: 'overlfow';
ROUNDING: 'rounding';
ERROR: 'ERROR';
SATURATE: 'SATURATE';
SILENT: 'SILENT';
TIE_TO_EVEN: 'TIE_TO_EVEN';
NAN: 'NAN';


INTEGER_LITERAL
: [+-]? INTEGER
;

DECIMAL_LITERAL
: [+-]? [0-9]+ ('.' [0-9]+)?
;

FLOAT_LITERAL
: [+-]? [0-9]+ ('.' [0-9]*)? ( [eE] [+-]? [0-9]+ )?
| [+-]? 'inf'
| 'nan' | 'NaN'
| 'snan'
;

BOOLEAN_LITERAL
: 'true' | 'false'
;

fragment FourDigits: [0-9][0-9][0-9][0-9];
fragment TwoDigits: [0-9][0-9];

TIMESTAMP_TZ_LITERAL
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )?
[+-] TwoDigits ':' TwoDigits '\''
;

TIMESTAMP_LITERAL
: '\'' FourDigits '-' TwoDigits '-' TwoDigits 'T' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

TIME_LITERAL
: '\'' TwoDigits ':' TwoDigits ':' TwoDigits ( '.' [0-9]+ )? '\''
;

DATE_LITERAL
: '\'' FourDigits '-' TwoDigits '-' TwoDigits '\''
;

PERIOD_PREFIX: 'P';
TIME_PREFIX: 'T';
YEAR_SUFFIX: 'Y';
M_SUFFIX: 'M'; // used for both months and minutes
DAY_SUFFIX: 'D';
HOUR_SUFFIX: 'H';
SECOND_SUFFIX: 'S';
FRACTIONAL_SECOND_SUFFIX: 'F';

INTERVAL_YEAR_LITERAL
: '\'' PERIOD_PREFIX INTEGER_LITERAL YEAR_SUFFIX (INTEGER_LITERAL M_SUFFIX)? '\''
| '\'' PERIOD_PREFIX INTEGER_LITERAL M_SUFFIX '\''
;

INTERVAL_DAY_LITERAL
: '\'' PERIOD_PREFIX INTEGER_LITERAL DAY_SUFFIX (TIME_PREFIX TIME_INTERVAL)? '\''
| '\'' PERIOD_PREFIX TIME_PREFIX TIME_INTERVAL '\''
;

fragment TIME_INTERVAL
: INTEGER_LITERAL HOUR_SUFFIX (INTEGER_LITERAL M_SUFFIX)? (INTEGER_LITERAL SECOND_SUFFIX)?
(INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| INTEGER_LITERAL M_SUFFIX (INTEGER_LITERAL SECOND_SUFFIX)? (INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| INTEGER_LITERAL SECOND_SUFFIX (INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX
;

NULL_LITERAL: 'null';

STRING_LITERAL
: '\'' ('\\' . | '\'\'' | ~['\\])* '\''
;
222 changes: 222 additions & 0 deletions grammar/FuncTestCaseParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
parser grammar FuncTestCaseParser;

options {
tokenVocab=SubstraitLexer;
tokenVocab=FuncTestCaseLexer;
}

doc
: header testGroup+ EOF
;

header
: version include
;

version
: SUBSTRAIT_SCALAR_TEST FORMAT_VERSION
;

include
: SUBSTRAIT_INCLUDE STRING_LITERAL (COMMA STRING_LITERAL)*
;

testGroupDescription
: DESCRIPTION_LINE
;

testCase
: functionName=IDENTIFIER OPAREN arguments CPAREN ( OBRACKET func_options CBRACKET )? EQ result
;

testGroup
: testGroupDescription (testCase)+
;

arguments
: argument (COMMA argument)*
;

result
: argument
| substraitError
;

argument
: nullArg
| i8Arg | i16Arg | i32Arg | i64Arg
| fp32Arg | fp64Arg
| booleanArg
| stringArg
| decimalArg
| dateArg
| timeArg
| timestampArg
| timestampTzArg
| intervalYearArg
| intervalDayArg
;

numericLiteral
: DECIMAL_LITERAL | INTEGER_LITERAL | FLOAT_LITERAL
;

nullArg: NULL_LITERAL DOUBLE_COLON datatype;

i8Arg: INTEGER_LITERAL DOUBLE_COLON I8;

i16Arg: INTEGER_LITERAL DOUBLE_COLON I16;

i32Arg: INTEGER_LITERAL DOUBLE_COLON I32;

i64Arg: INTEGER_LITERAL DOUBLE_COLON I64;

fp32Arg
: numericLiteral DOUBLE_COLON FP32
;

fp64Arg
: numericLiteral DOUBLE_COLON FP64
;

decimalArg
: numericLiteral DOUBLE_COLON decimalType
;

booleanArg
: BOOLEAN_LITERAL DOUBLE_COLON Bool
;

stringArg
: STRING_LITERAL DOUBLE_COLON Str
;

dateArg
: DATE_LITERAL DOUBLE_COLON Date
;

timeArg
: TIME_LITERAL DOUBLE_COLON Time
;

timestampArg
: TIMESTAMP_LITERAL DOUBLE_COLON Ts
;

timestampTzArg
: TIMESTAMP_TZ_LITERAL DOUBLE_COLON TsTZ
;

intervalYearArg
: INTERVAL_YEAR_LITERAL DOUBLE_COLON IYear
;

intervalDayArg
: INTERVAL_DAY_LITERAL DOUBLE_COLON IDay
;

intervalYearLiteral
: PERIOD_PREFIX (years=INTEGER_LITERAL YEAR_SUFFIX) (months=INTEGER_LITERAL M_SUFFIX)?
| PERIOD_PREFIX (months=INTEGER_LITERAL M_SUFFIX)
;

intervalDayLiteral
: PERIOD_PREFIX (days=INTEGER_LITERAL DAY_SUFFIX) (TIME_PREFIX timeInterval)?
| PERIOD_PREFIX TIME_PREFIX timeInterval
;

timeInterval
: hours=INTEGER_LITERAL HOUR_SUFFIX (minutes=INTEGER_LITERAL M_SUFFIX)? (seconds=INTEGER_LITERAL SECOND_SUFFIX)?
(fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| minutes=INTEGER_LITERAL M_SUFFIX (seconds=INTEGER_LITERAL SECOND_SUFFIX)? (fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| seconds=INTEGER_LITERAL SECOND_SUFFIX (fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX)?
| fractionalSeconds=INTEGER_LITERAL FRACTIONAL_SECOND_SUFFIX
;

datatype
: scalarType
| parameterizedType
;

scalarType
: Bool #Boolean
| I8 #i8
| I16 #i16
| I32 #i32
| I64 #i64
| FP32 #fp32
| FP64 #fp64
| Str #string
| Binary #binary
| Ts #timestamp
| TsTZ #timestampTz
| Date #date
| Time #time
| IDay #intervalDay
| IYear #intervalYear
| UUID #uuid
| UserDefined IDENTIFIER #userDefined
;

fixedCharType
: FChar isnull=QMARK? O_ANGLE_BRACKET len=numericParameter C_ANGLE_BRACKET #fixedChar
;

varCharType
: VChar isnull=QMARK? O_ANGLE_BRACKET len=numericParameter C_ANGLE_BRACKET #varChar
;

fixedBinaryType
: FBin isnull=QMARK? O_ANGLE_BRACKET len=numericParameter C_ANGLE_BRACKET #fixedBinary
;

decimalType
: Dec isnull=QMARK? (O_ANGLE_BRACKET precision=numericParameter COMMA scale=numericParameter C_ANGLE_BRACKET)? #decimal
;

precisionTimestampType
: PTs isnull=QMARK? O_ANGLE_BRACKET precision=numericParameter C_ANGLE_BRACKET #precisionTimestamp
;

precisionTimestampTZType
: PTsTZ isnull=QMARK? O_ANGLE_BRACKET precision=numericParameter C_ANGLE_BRACKET #precisionTimestampTZ
;

parameterizedType
: fixedCharType
| varCharType
| fixedBinaryType
| decimalType
| precisionTimestampType
| precisionTimestampTZType
// TODO implement the rest of the parameterized types
// | Struct isnull='?'? Lt expr (Comma expr)* Gt #struct
// | NStruct isnull='?'? Lt Identifier expr (Comma Identifier expr)* Gt #nStruct
// | List isnull='?'? Lt expr Gt #list
// | Map isnull='?'? Lt key=expr Comma value=expr Gt #map
;

numericParameter
: INTEGER_LITERAL #integerLiteral
;

substraitError
: ERROR_RESULT | UNDEFINED_RESULT
;

func_option
: option_name COLON option_value
;

option_name
: OVERFLOW | ROUNDING
| IDENTIFIER
;

option_value
: ERROR | SATURATE | SILENT | TIE_TO_EVEN | NAN
;

func_options
: func_option (COMMA func_option)*
;
9 changes: 9 additions & 0 deletions grammar/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
ANTLR_JAR=antlr-4.13.2-complete.jar
GRAMMARS=SubstraitLexer.g4 FuncTestCaseLexer.g4 FuncTestCaseParser.g4
OUTPUT_DIR=../tests/coverage/antlr_parser

generate:
java -jar $(ANTLR_JAR) -visitor -Dlanguage=Python3 -o $(OUTPUT_DIR) $(GRAMMARS)

clean:
rm -rf $(OUTPUT_DIR)/*.py
Loading

0 comments on commit c97a231

Please sign in to comment.