diff --git a/LICENSE b/LICENSE index 660592c179..ba9f1fabad 100644 --- a/LICENSE +++ b/LICENSE @@ -143,6 +143,7 @@ Other contributors, listed alphabetically, are: * Jesper Noehr -- HTML formatter "anchorlinenos" * Mike Nolta -- Julia lexer * Jonas Obrist -- BBCode lexer +* Antonio Ognio -- Gleam lexer * David Oliva -- Rebol lexer * Jon Parise -- Protocol buffers lexer * Ronny Pfannschmidt -- BBCode lexer diff --git a/lib/rouge/lexers/gleam.rb b/lib/rouge/lexers/gleam.rb new file mode 100644 index 0000000000..205bf4e973 --- /dev/null +++ b/lib/rouge/lexers/gleam.rb @@ -0,0 +1,188 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +module Rouge + module Lexers + # Lexer for the Gleam programming language (https://gleam.run/) + class Gleam < RegexLexer + title 'Gleam' + desc 'The Gleam programming language (https://gleam.run/)' + tag 'gleam' + filenames '*.gleam' + mimetypes 'text/x-gleam' + + # Character sets + ID = /[a-z_][a-zA-Z0-9_]*/.freeze + TYPE_ID = /[A-Z][a-zA-Z0-9_]*/.freeze + MODULE_METHOD_CALL = %r{([a-z_][a-zA-Z0-9_]*)(\.)([a-zA-Z_][a-zA-Z0-9_]*)}.freeze + WHITESPACE = /\s+/.freeze + NEWLINE = /\n/.freeze + + # Keywords, built-ins, constants + KEYWORDS_LIST = %w[ + as assert case const external fn if import let opaque pub todo try type use + module else panic test + ].freeze + + KEYWORDS = %r{\b(?:#{KEYWORDS_LIST.join('|')})\b}.freeze + + BUILTINS_LIST = %w[ + Int Float Bool String List Result Option Iterator + ].freeze + + BUILTINS = %r{\b(?:#{BUILTINS_LIST.join('|')})\b}.freeze + + CONSTANTS = %r{\b(?:Nil|Ok|Error|Stop|Continue|True|False)\b}.freeze + + BIT_STRING_KEYWORDS_LIST = %w[ + binary bits bytes int float bit_string bit_array utf8 utf16 utf32 + utf8_codepoint utf16_codepoint utf32_codepoint signed unsigned big little + native unit size + ].freeze + + BIT_STRING_KEYWORDS = %r{\b(?:#{BIT_STRING_KEYWORDS_LIST.join('|')})\b}.freeze + + # Operators and punctuation + OPERATORS = %r{>>|<<|\|\||&&|==|!=|<=|>=|->|=>|<-|<>|\|>|[-+\/*%=!<>&|^~]}.freeze + PUNCTUATION = /[()\[\]{}.,:;]/.freeze + + # Numbers + BINARY_NUMBER = /\b0b[01](?:_?[01]+)*\b/.freeze + OCTAL_NUMBER = /\b0o[0-7](?:_?[0-7]+)*\b/.freeze + HEX_NUMBER = /\b0x[0-9a-fA-F](?:_?[0-9a-fA-F]+)*\b/.freeze + FLOAT_NUMBER = /\b\d[\d_]*\.\d[\d_]*(e[+-]?\d[\d_]*)?\b/.freeze + INTEGER_NUMBER = /\b\d[\d_]*\b/.freeze + + # Strings + DOUBLE_QUOTED_STRING = %r{"(\\\\|\\"|[^"])*"}.freeze + SINGLE_QUOTED_STRING = %r{'(\\\\|\\'|[^'])*'}.freeze + ESCAPE_SEQUENCE = %r{\\[nrt\\"'0]}.freeze + + # Comments + LINE_COMMENT = %r{//.*?$}.freeze + + state :root do + mixin :simple_tokens + + # Raw strings (backticks) + rule %r{`}, Str::Backtick, :raw_string + + # Triple-quoted strings + rule %r{"""}, Str::Double, :triple_string + + # Double-quoted strings + rule %r{"}, Str::Double, :string + + # Single-quoted strings (characters) + rule %r{'}, Str::Char, :char + + # Bit arrays + rule %r{<<}, Operator, :bitarray + end + + state :simple_tokens do + # Whitespace and newline + rule WHITESPACE, Text::Whitespace + rule NEWLINE, Text + + # Comments + rule LINE_COMMENT, Comment::Single + + # Keywords, built-ins, constants + rule KEYWORDS, Keyword + rule BUILTINS, Name::Builtin + rule CONSTANTS, Name::Constant + + # Type names (user-defined) + rule %r{\b#{TYPE_ID}\b}, Name::Class + + # Function definitions + rule %r{(\b(?:pub\s+)?fn\b)(\s+)(#{ID}) } do + groups Keyword, Text::Whitespace, Name::Function + end + + # Module and method calls (e.g., list.map) + rule MODULE_METHOD_CALL do + groups Name::Namespace, Punctuation, Name::Function + end + + # Function calls + rule %r{(#{ID})(\s*)(\() } do + groups Name::Function, Text::Whitespace, Punctuation + push :func_call_params + end + + # Module-qualified function calls + rule %r{(#{ID})(\.)(#{ID})(\s*)(\() } do + groups Name::Namespace, Punctuation, Name::Function, Text::Whitespace, Punctuation + push :func_call_params + end + + # Identifiers (variables, fields) + rule %r{\b#{ID}\b}, Name::Variable + + # Discard names (e.g., _var) + rule %r{\b_[a-z][a-zA-Z0-9_]*\b}, Name::Builtin::Pseudo + + # Operators and punctuation + rule OPERATORS, Operator + rule PUNCTUATION, Punctuation + + # Numbers + rule BINARY_NUMBER, Num::Bin + rule OCTAL_NUMBER, Num::Oct + rule HEX_NUMBER, Num::Hex + rule FLOAT_NUMBER, Num::Float + rule INTEGER_NUMBER, Num::Integer + + # Strings and escape sequences + rule ESCAPE_SEQUENCE, Str::Escape + + # Attributes + rule %r{[@]#{ID}}, Name::Decorator + end + + # Function call parameters + state :func_call_params do + rule %r{\)}, Punctuation, :pop! + rule %r{[^)]+}, Text + end + + # Raw strings + state :raw_string do + rule %r{[^`]+}, Str::Backtick + rule %r{`}, Str::Backtick, :pop! + end + + # Triple-quoted strings + state :triple_string do + rule %r{"""}, Str::Double, :pop! + rule %r{[^"]+}, Str::Double + rule %r{"}, Str::Double + end + + # Single-line strings + state :string do + rule %r{[^"\\]+}, Str::Double + rule %r{\\[\\"]}, Str::Escape + rule %r{"}, Str::Double, :pop! + end + + # Character literals + state :char do + rule %r{[^'\\]+}, Str::Char + rule %r{\\[\\']}, Str::Escape + rule %r{'}, Str::Char, :pop! + end + + # Bit arrays + state :bitarray do + rule %r{>>}, Operator, :pop! + rule WHITESPACE, Text::Whitespace + rule BIT_STRING_KEYWORDS, Keyword + rule %r{[^>]+}, Text + end + end + end + end + \ No newline at end of file diff --git a/spec/lexers/gleam_spec.rb b/spec/lexers/gleam_spec.rb new file mode 100644 index 0000000000..0475c0e338 --- /dev/null +++ b/spec/lexers/gleam_spec.rb @@ -0,0 +1,74 @@ +# -*- coding: utf-8 -*- +# frozen_string_literal: true + +describe Rouge::Lexers::Gleam do + let(:subject) { Rouge::Lexers::Gleam.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename' do + assert_guess(filename: 'example.gleam') + end + + it 'guesses by mimetype' do + assert_guess(mimetype: 'text/x-gleam') + end + end + + describe 'lexing keywords' do + it 'recognizes keywords' do + %w[let fn import pub case of type as if else try opaque assert todo async await].each do |keyword| + expect(subject.lex(keyword).to_a).to include([:keyword, keyword]) + end + end + end + + describe 'lexing built-in types' do + it 'recognizes built-in types' do + %w[Int Float Bool String List Nil Result Option Error Ok].each do |builtin| + expect(subject.lex(builtin).to_a).to include([:keyword_type, builtin]) + end + end + end + + describe 'lexing constants' do + it 'recognizes constants' do + %w[Nil Ok Error Stop Continue True False].each do |constant| + expect(subject.lex(constant).to_a).to include([:keyword_constant, constant]) + end + end + end + + describe 'lexing numbers' do + it 'recognizes integers' do + expect(subject.lex('42').to_a).to include([:num_integer, '42']) + end + + it 'recognizes floating-point numbers' do + expect(subject.lex('3.14').to_a).to include([:num_float, '3.14']) + end + + it 'recognizes hexadecimal numbers' do + expect(subject.lex('0x1A3F').to_a).to include([:num_hex, '0x1A3F']) + end + end + + describe 'lexing strings' do + it 'recognizes double-quoted strings' do + expect(subject.lex('"Hello, Gleam!"').to_a).to include([:str_double, '"Hello, Gleam!"']) + end + end + + describe 'lexing module and method calls' do + it 'recognizes module and method calls' do + expect(subject.lex('list.map').to_a).to include([:name_namespace, 'list'], [:punctuation, '.'], [:name_function, 'map']) + end + end + + describe 'lexing operators' do + it 'recognizes pipeline operator' do + expect(subject.lex('|>').to_a).to include([:operator, '|>']) + end + end +end \ No newline at end of file