Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add interpunct (·) as a keyword for dot product, etc. #3584

Merged
merged 7 commits into from
Nov 26, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Expand unicode math operators recognized when lexing
We need to look at two bytes to figure out whether certain unicode
characters are math symbols, so we add a "peek2" function to take two
bytes from the file and concatenate them together as an int for easy
bitwise comparison with the unicode math operators:
https://en.wikipedia.org/wiki/Mathematical_operators_and_symbols_in_Unicode

We also no longer exclude 226 (the first byte of many of these
characters) from the ALPHA chartype, but instead add an
"ismathoperator" check where appropriate.
  • Loading branch information
d-torrance committed Nov 22, 2024
commit bc44c987f7a09d3d245349e39125bfec37387d4f
27 changes: 22 additions & 5 deletions M2/Macaulay2/d/ctype.d
Original file line number Diff line number Diff line change
@@ -25,8 +25,7 @@ foreach c in " \t\r" do setchartype(c,WHITE);
foreach c in "\n" do setchartype(c,NEWLINE);
foreach c in "$'" do setchartype(c,ALNUMEXTRA);

for c from 128 to 225 do setchartype(char(c),ALPHA); -- 226 is unicode math symbols
for c from 227 to 255 do setchartype(char(c),ALPHA);
for c from 128 to 255 do setchartype(char(c),ALPHA);
setchartype('\"',QUOTE);

chartype(c:int):int := if (c & ~255) == 0 then int(chartypes.c) else 0;
@@ -52,10 +51,28 @@ export isspace (c:char):bool := (chartype(c) & SPACE ) != 0;
export isnewline (c:char):bool := (chartype(c) & NEWLINE ) != 0;
export isquote (c:char):bool := (chartype(c) & QUOTE ) != 0;

export isvalidsymbol (s:string):bool := (
if int(uchar(s.0)) == 226 && length(s) == 3 then return true; -- ugly unicode math symbol hack
-- c = two bytes concatenated
export ismathoperator(c:int):bool := (
(c & 0xffe0) == 0xc2a0 || -- latin-1 punctuation/symbols
c == 0xc397 || -- multiplication sign
c == 0xc3b7 || -- division sign
(c & 0xfffc) == 0xe288 || -- mathematical operators
(c & 0xfffc) == 0xe2a8 || -- supplemental mathematical operators
c == 0xe29f || -- misc. mathematical symbols A
(c & 0xfffe) == 0xe2a6 || -- misc. mathematical symbols B
(c & 0xfffc) == 0xe28c -- misc. technical
);

ismathoperator(c1:char, c2:char):bool := (
ismathoperator((int(uchar(c1)) << 8) | int(uchar(c2))));

export isvalidsymbol(s:string):bool := (
if !isalpha(s.0) then return false;
foreach c in s do if !isalnum(c) then return false;
if ismathoperator(s.0, s.1) && length(s) == utf8charlength(s.0)
then return true;
for i from 0 to length(s) - 1 do (
if !isalnum(s.i) || ismathoperator(s.i, s.(i + 1))
then return false);
true);

-- Local Variables:
19 changes: 10 additions & 9 deletions M2/Macaulay2/d/lex.d
Original file line number Diff line number Diff line change
@@ -315,9 +315,15 @@ gettoken1(file:PosFile,sawNewline:bool):Token := (
return Token(
if file.file.fulllines then wordEOC else NewlineW,
newPosition(file, line, column), globalDictionary, dummySymbol, sawNewline))
else if isalpha(ch) then ( -- valid symbols are an alpha (letters, any unicode except 226) followed by any number of alphanum (alpha, digit, dollar, prime)
else if ismathoperator(peek2(file)) then (
for i from 1 to utf8charlength(char(ch))
do tokenbuf << char(getc(file));
return Token(makeUniqueWord(takestring(tokenbuf), parseWORD),
newPosition(file, line, column), globalDictionary, dummySymbol, sawNewline))
else if isalpha(ch) then ( -- valid symbols are an alpha (letters, any unicode) followed by any number of alphanum (alpha, digit, dollar, prime)
tokenbuf << char(getc(file));
while isalnum(peek(file)) do tokenbuf << char(getc(file));
while isalnum(peek(file)) && !ismathoperator(peek2(file))
do tokenbuf << char(getc(file));
return Token(makeUniqueWord(takestring(tokenbuf), parseWORD),
newPosition(file, line, column), globalDictionary, dummySymbol, sawNewline))
else if isdigit(ch) || ch==int('.') && isdigit(peek(file,1)) then (
@@ -390,7 +396,8 @@ gettoken1(file:PosFile,sawNewline:bool):Token := (
if int('.') == c then printWarningMessage(position(file),"character '"+char(c)+"' immediately following floating point number");
);
c = peek(file);
if isalpha(c) then printWarningMessage(position(file),"character '"+char(c)+"' immediately following number");
if isalpha(c) && !ismathoperator(peek2(file))
then printWarningMessage(position(file),"character '"+char(c)+"' immediately following number");
s := takestring(tokenbuf);
return Token(Word(s,typecode,hash_t(0), parseWORD),
newPosition(file, line, column), globalDictionary, dummySymbol, sawNewline))
@@ -410,12 +417,6 @@ gettoken1(file:PosFile,sawNewline:bool):Token := (
)
is word:Word do return Token(word,
newPosition(file, line, column), globalDictionary, dummySymbol, sawNewline))
else if ch == 226 then ( -- unicode math symbols
tokenbuf << char(getc(file));
tokenbuf << char(getc(file));
tokenbuf << char(getc(file));
return Token(makeUniqueWord(takestring(tokenbuf), parseWORD),
newPosition(file, line, column), globalDictionary, dummySymbol, sawNewline))
else (
when recognize(file)
is null do (
2 changes: 2 additions & 0 deletions M2/Macaulay2/d/stdiop.d
Original file line number Diff line number Diff line change
@@ -179,6 +179,8 @@ export peek(o:PosFile, offset:int):int := (
);
c);
export peek(o:PosFile):int := peek(o,0);
-- concatenate the next two bytes into a single int
export peek2(o:PosFile):int := (peek(o) << 8) | peek(o, 1);
export isatty(o:PosFile):bool := o.file.inisatty;
export close(o:PosFile):int := (
when close(o.file) is errmsg do ERROR else 0
2 changes: 1 addition & 1 deletion M2/Macaulay2/d/strings.d
Original file line number Diff line number Diff line change
@@ -64,7 +64,7 @@ export index(s:string,offset:int,c:char,d:char):int := (
while i+1 < length(s) do if c == s.i && d==s.(i+1) then return i else i=i+1;
-1);

utf8charlength(c0:char):int := (
export utf8charlength(c0:char):int := (
c := int(uchar(c0));
if (c & 0x80) == 0 then 1
else if (c & 0xe0) == 0xc0 then 2