From cb7704380294fc54399cbc91e4432d0d58b88ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20P=C3=A4rtel?= Date: Thu, 16 Nov 2023 08:59:21 +0200 Subject: [PATCH 1/2] Added 'gnuasm' lexer. This is based on 'nasm' and might retain some NASMisms. This uses the same file extension as 'armasm', so removed 'armasm_spec', which tested guessing by file extension. --- lib/rouge/demos/gnuasm | 39 +++++++++++++++++++++++ lib/rouge/lexers/armasm.rb | 3 +- lib/rouge/lexers/gnuasm.rb | 64 ++++++++++++++++++++++++++++++++++++++ spec/lexers/armasm_spec.rb | 14 --------- spec/visual/samples/gnuasm | 39 +++++++++++++++++++++++ 5 files changed, 144 insertions(+), 15 deletions(-) create mode 100644 lib/rouge/demos/gnuasm create mode 100644 lib/rouge/lexers/gnuasm.rb delete mode 100644 spec/lexers/armasm_spec.rb create mode 100644 spec/visual/samples/gnuasm diff --git a/lib/rouge/demos/gnuasm b/lib/rouge/demos/gnuasm new file mode 100644 index 0000000000..e512585a60 --- /dev/null +++ b/lib/rouge/demos/gnuasm @@ -0,0 +1,39 @@ + .global main + .type main, @function + .extern printf + + .text +/* Reads an integer, + doubles it, + and prints it. */ +main: + pushq %rbp + movq %rsp, %rbp + subq $128, %rsp # Reserve 128 bytes for stack + + # Read an integer into -8(%rbp) + movq $scan_format, %rdi + leaq -8(%rbp), %rsi + call scanf + # If invalid, jump to end + cmpq $1, %rax + jne .Lend + + movq -8(%rbp), %rsi + imulq $2, %rsi # Double the input + + # Call 'printf("%ld\n", %rsi)' + movq $print_format, %rdi + call printf + +.Lend: + # Return with status 0 + movq $0, %rax + movq %rbp, %rsp + popq %rbp + ret + +scan_format: + .asciz "%ld" +print_format: + .asciz "%ld\n" diff --git a/lib/rouge/lexers/armasm.rb b/lib/rouge/lexers/armasm.rb index 51b4a3885f..90b569d60b 100644 --- a/lib/rouge/lexers/armasm.rb +++ b/lib/rouge/lexers/armasm.rb @@ -7,7 +7,8 @@ class ArmAsm < RegexLexer title "ArmAsm" desc "Arm assembly syntax" tag 'armasm' - filenames '*.s' + filenames '*.s', '*.S' + mimetypes 'text/x-asm' def self.preproc_keyword @preproc_keyword ||= %w( diff --git a/lib/rouge/lexers/gnuasm.rb b/lib/rouge/lexers/gnuasm.rb new file mode 100644 index 0000000000..dc9f13a50c --- /dev/null +++ b/lib/rouge/lexers/gnuasm.rb @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +# Derived from nasm.rb. Not perfect. +module Rouge + module Lexers + class GnuAsm < RegexLexer + title "GnuAsm" + desc "GNU Assembler" + + tag 'gnuasm' + filenames '*.s', '*.S' + mimetypes 'text/x-asm' + + state :root do + mixin :whitespace + + rule %r/[a-z$._?][\w$.?#@~]*:/i, Name::Label + + rule %r/([a-z$._?][\w$.?#@~]*)(\s+)(equ)/i do + groups Name::Constant, Keyword::Declaration, Keyword::Declaration + push :instruction_args + end + rule %r/\.[a-z0-9]+/i, Keyword, :instruction_args + rule %r/(?:res|d)[bwdqt]|times/i, Keyword::Declaration, :instruction_args + rule %r/[a-z$._?][\w$.?#@~]*/i, Name::Function, :instruction_args + + rule %r/[\r\n]+/, Text + end + + state :instruction_args do + rule %r/"(\\\\"|[^"\n])*"|'(\\\\'|[^'\n])*'|`(\\\\`|[^`\n])*`/, Str + rule %r/(?:0x[\da-f]+|$0[\da-f]*|\d+[\da-f]*h)/i, Num::Hex + rule %r/[0-7]+q/i, Num::Oct + rule %r/[01]+b/i, Num::Bin + rule %r/\d+\.e?\d+/i, Num::Float + rule %r/\d+/, Num::Integer + + rule %r/[@$][a-z$._][\w$.?#@~]*/i, Name::Constant + + mixin :punctuation + + rule %r/%[a-z0-9]+/i, Name::Builtin + rule %r/[a-z$._][\w$.?#@~]*/i, Name::Variable + rule %r/[\r\n]+/, Text, :pop! + + mixin :whitespace + end + + state :whitespace do + rule %r/\n/, Text + rule %r/[ \t]+/, Text + rule %r/#.*/, Comment::Single + rule %r/\/\*(.|\n)*?\*\/*/, Comment::Multiline + end + + state :punctuation do + rule %r/[,():\[\]]+/, Punctuation + rule %r/[&|^<>+*\/~=-]+/, Operator + rule %r/\$+/, Keyword::Constant + end + end + end +end diff --git a/spec/lexers/armasm_spec.rb b/spec/lexers/armasm_spec.rb deleted file mode 100644 index 684a5bf242..0000000000 --- a/spec/lexers/armasm_spec.rb +++ /dev/null @@ -1,14 +0,0 @@ -# -*- coding: utf-8 -*- # -# frozen_string_literal: true - -describe Rouge::Lexers::ArmAsm do - let(:subject) { Rouge::Lexers::ArmAsm.new } - - describe 'guessing' do - include Support::Guessing - - it 'guesses by filename' do - assert_guess :filename => 'foo.s' - end - end -end diff --git a/spec/visual/samples/gnuasm b/spec/visual/samples/gnuasm new file mode 100644 index 0000000000..e512585a60 --- /dev/null +++ b/spec/visual/samples/gnuasm @@ -0,0 +1,39 @@ + .global main + .type main, @function + .extern printf + + .text +/* Reads an integer, + doubles it, + and prints it. */ +main: + pushq %rbp + movq %rsp, %rbp + subq $128, %rsp # Reserve 128 bytes for stack + + # Read an integer into -8(%rbp) + movq $scan_format, %rdi + leaq -8(%rbp), %rsi + call scanf + # If invalid, jump to end + cmpq $1, %rax + jne .Lend + + movq -8(%rbp), %rsi + imulq $2, %rsi # Double the input + + # Call 'printf("%ld\n", %rsi)' + movq $print_format, %rdi + call printf + +.Lend: + # Return with status 0 + movq $0, %rax + movq %rbp, %rsp + popq %rbp + ret + +scan_format: + .asciz "%ld" +print_format: + .asciz "%ld\n" From d128a6aeee78d0a40a3d5ce7970615b019219700 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20P=C3=A4rtel?= Date: Thu, 16 Nov 2023 09:25:04 +0200 Subject: [PATCH 2/2] Added disambiguation rule and tests for '*.s', '*.S' --- lib/rouge/guessers/disambiguation.rb | 7 +++++++ spec/lexers/armasm_spec.rb | 14 ++++++++++++++ spec/lexers/gnuasm_spec.rb | 14 ++++++++++++++ 3 files changed, 35 insertions(+) create mode 100644 spec/lexers/armasm_spec.rb create mode 100644 spec/lexers/gnuasm_spec.rb diff --git a/lib/rouge/guessers/disambiguation.rb b/lib/rouge/guessers/disambiguation.rb index 17b064ef23..956777c21a 100644 --- a/lib/rouge/guessers/disambiguation.rb +++ b/lib/rouge/guessers/disambiguation.rb @@ -146,6 +146,13 @@ def match?(filename) next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./) next OpenEdge end + + disambiguate '*.s', '*.S' do + next GnuAsm if matches?(/\s*\.(global|extern|type|text)/) + next GnuAsm if matches?(/%(r|e)(ax|bx|cx|dx|si|di|bp|sp)/) + + ArmAsm + end end end end diff --git a/spec/lexers/armasm_spec.rb b/spec/lexers/armasm_spec.rb new file mode 100644 index 0000000000..a9a6d4b343 --- /dev/null +++ b/spec/lexers/armasm_spec.rb @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::ArmAsm do + let(:subject) { Rouge::Lexers::ArmAsm.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename and source hint' do + assert_guess :filename => 'foo.s', :source => 'Func MOVW r0, #RetVal' + end + end +end diff --git a/spec/lexers/gnuasm_spec.rb b/spec/lexers/gnuasm_spec.rb new file mode 100644 index 0000000000..55d34079cd --- /dev/null +++ b/spec/lexers/gnuasm_spec.rb @@ -0,0 +1,14 @@ +# -*- coding: utf-8 -*- # +# frozen_string_literal: true + +describe Rouge::Lexers::GnuAsm do + let(:subject) { Rouge::Lexers::GnuAsm.new } + + describe 'guessing' do + include Support::Guessing + + it 'guesses by filename and source hint' do + assert_guess :filename => 'foo.s', :source => 'main: movq %rax, %rbx' + end + end +end