From 2d96f9f13d66d3569dffd1f8f5f2eb911ab022db Mon Sep 17 00:00:00 2001 From: Ross Kaffenberger Date: Sat, 4 May 2024 14:48:50 -0400 Subject: [PATCH] Adopt commonmarker for markdown parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces markly with commonmarker Markly is a key piece in the original implementation of the ERB-enhanced markdown rendering in Joy of Rails—adapted from phlex-markdown and built on phlex-ruby. * https://github.com/ioquatix/markly * https://github.com/joeldrapper/phlex-markdown According to the markly README, it was originally created as a fork of commonmarker to get access to the parsed markdown Abstract Syntax Tree (AST). The fork is from before commonmarker switched from being a wrapper of cmark-gfm (implemented in C, gfm = GitHub-flavored markdown) to comrak (Rust port of cmark-gfm), also maintained by awesome maintainers. * https://github.com/github/cmark-gfm * https://github.com/kivikakk/comrak It's been nearly a year since markly was released. Just a few days ago, commonmarker introduced access to its AST meaning it supplies the key feature set to make it compatible for markdown rendering enhancements in Joy of rails. I checked it out this morning and reported an issue with code fence parsing early today that has already been fixed as of this afternoon—a Saturday no less—in commonmarker v1.1.2. * Issue: https://github.com/gjtorikian/commonmarker/issues/289 * Fix: https://github.com/gjtorikian/commonmarker/pull/290 * Release: https://github.com/gjtorikian/commonmarker/releases/tag/v1.1.2 Performance benchmarks are reported on commonmarker's README. According to these benchmarks, commonmarker outperforms Kramdown by over 25x. markly outperforms commonmarker by roughly 3x. * https://github.com/gjtorikian/commonmarker?tab=readme-ov-file#benchmarks That said, this changeset provides the minimum changes to switch from markly to commonmarker for markdown parsing. Why change? The libs have similar functionality with regards to what Joy of Rails needs at this time. It feels like with commonmarker it is a little easier for me to figure out how to customize the configuration options. Markly's performance outshines commonmarker but not so much for me to say the tradeoff is painful. The energy in the commonmarker and comrak projects is encouraging. I also believe in markly's maintainer, who is awesome, and a terrific advocate for improvements in the Ruby ecosystem. Ultimately, my choice to switch is more about my perception of the momentum of the respective projects. I have no issues with reconsidering this decision at a later time assuming switching back is comparably straightforward. --- Gemfile | 2 +- Gemfile.lock | 5 ++- app/views/components/markdown/application.rb | 34 ++++++++--------- app/views/components/markdown/base.rb | 39 +++++++++++++------- app/views/components/markdown/erb.rb | 26 ++++++------- app/views/components/markdown/toc.rb | 2 +- spec/views/components/markdown/erb_spec.rb | 26 +++++++++---- 7 files changed, 79 insertions(+), 55 deletions(-) diff --git a/Gemfile b/Gemfile index 1ed6cfa4..8894093d 100644 --- a/Gemfile +++ b/Gemfile @@ -32,7 +32,7 @@ gem "inline_svg" # Embed SVGs in Rails views and style them with CSS [https://gi gem "rouge", group: [:default, :wasm] # Pure Ruby syntaix highlighter [https://github.com/rouge-ruby/rouge gem "sitepress-rails", group: [:default, :wasm] # Static site generator for Rails [https://sitepress.cc/getting-started/rails] gem "phlex-rails", group: [:default, :wasm] # An object-oriented alternative to ActionView for Ruby on Rails. [https://github.com/phlex-ruby/phlex-rails] -gem "markly" +gem "commonmarker", require: false gem "bootsnap", require: false # Reduces boot times through caching; required in config/boot.rb [https://github.com/Shopify/bootsnap] diff --git a/Gemfile.lock b/Gemfile.lock index 9cbcaf32..6665885a 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -124,6 +124,8 @@ GEM childprocess (5.0.0) code_analyzer (0.5.5) sexp_processor + commonmarker (1.1.2-arm64-darwin) + commonmarker (1.1.2-x86_64-linux) concurrent-ruby (1.2.3) connection_pool (2.4.1) crack (1.0.0) @@ -264,7 +266,6 @@ GEM mail_interceptor (0.0.7) activesupport marcel (1.0.4) - markly (0.10.0) matrix (0.4.2) mime-types (3.5.2) mime-types-data (~> 3.2015) @@ -517,6 +518,7 @@ DEPENDENCIES brakeman bundle-audit capybara + commonmarker cuprite! debug device_detector @@ -534,7 +536,6 @@ DEPENDENCIES letter_opener litestream mail_interceptor - markly mission_control-jobs phlex-rails puma (>= 5.0) diff --git a/app/views/components/markdown/application.rb b/app/views/components/markdown/application.rb index e5368e47..c035fd2e 100644 --- a/app/views/components/markdown/application.rb +++ b/app/views/components/markdown/application.rb @@ -1,26 +1,19 @@ class Markdown::Application < Markdown::Base - class Handler - class << self - def call(template, content) - Markdown::Application.new(content).call - end - end + # Options for CommonMarker + def default_commonmarker_options + { + render: { + unsafe: true + } + } end def visit(node) return if node.nil? case node.type - in :header - header(node.header_level) do - visit_children(node) - end - in :link - link(node.url, node.title) { visit_children(node) } - in :inline_html - unsafe_raw(node.string_content) - in :html - unsafe_raw(node.string_content) + in :html_block + unsafe_raw(node.to_html(options: @options)) else super end @@ -40,7 +33,6 @@ def header(header_level, &) def code_block(source, metadata = "", **attributes) language, json_attributes = parse_code_block_metadata(metadata) - Rails.logger.debug("CODE_BLOCK: #{json_attributes.inspect}") render CodeBlock.new(source, language: language, **json_attributes, **attributes) end @@ -90,4 +82,12 @@ def anchor_svg SVG end + + class Handler + class << self + def call(template, content) + Markdown::Application.new(content).call + end + end + end end diff --git a/app/views/components/markdown/base.rb b/app/views/components/markdown/base.rb index 6bcca4c4..261cd8f4 100644 --- a/app/views/components/markdown/base.rb +++ b/app/views/components/markdown/base.rb @@ -1,22 +1,27 @@ # frozen_string_literal: true require "phlex" -require "markly" +require "commonmarker" class Markdown::Base < Phlex::HTML - def initialize(content, flags: Markly::DEFAULT) + def initialize(content, **options) @content = content - @flags = flags + @options = default_commonmarker_options.merge(options) end - def template + def view_template visit(doc) end - private + protected def doc - Markly.parse(@content, flags: @flags) + Commonmarker.parse(@content, options: @options) + end + + # Options for CommonMarker + def default_commonmarker_options + {} end def visit(node) @@ -30,7 +35,7 @@ def visit(node) visit_children(node) in :text plain(node.string_content) - in :header + in :heading case node.header_level in 1 then h1 { visit_children(node) } in 2 then h2 { visit_children(node) } @@ -48,7 +53,7 @@ def visit(node) p { visit_children(node) } end in :link - a(href: node.url, title: node.title) { visit_children(node) } + link(node.url, node.title) { visit_children(node) } in :image img( src: node.url, @@ -61,10 +66,10 @@ def visit(node) strong { visit_children(node) } in :list case node.list_type - in :ordered_list then ol { visit_children(node) } - in :bullet_list then ul { visit_children(node) } + in :ordered then ol { visit_children(node) } + in :bullet then ul { visit_children(node) } end - in :list_item + in :item li { visit_children(node) } in :code inline_code do |**attributes| @@ -78,10 +83,12 @@ def visit(node) end end end - in :hrule + in :thematic_break hr - in :blockquote + in :block_quote blockquote { visit_children(node) } + in :html_block + # This is a raw HTML block, so we skip here in safe mode end end @@ -93,6 +100,12 @@ def code_block(code, language, **attributes) yield(**attributes) end + def link(url, title, **attrs, &) + a(href: url, title: title, &) + end + + private + def visit_children(node) node.each { |c| visit(c) } end diff --git a/app/views/components/markdown/erb.rb b/app/views/components/markdown/erb.rb index 16a23ca0..1d93e4a2 100644 --- a/app/views/components/markdown/erb.rb +++ b/app/views/components/markdown/erb.rb @@ -2,19 +2,6 @@ # you trust the source of your markdown and that its not user input. class Markdown::Erb < Markdown::Application - class Handler - class << self - def call(template, content) - content = Markdown::Erb.new(content, flags: Markly::UNSAFE).call - erb.call(template, content) - end - - def erb - @erb ||= ActionView::Template.registered_template_handler(:erb) - end - end - end - ERB_TAGS = %r{s*<%.*?%>} ERB_TAGS_START = %r{\A<%.*?%>} @@ -42,4 +29,17 @@ def visit(node) super end end + + class Handler + class << self + def call(template, content) + content = Markdown::Erb.new(content).call + erb.call(template, content) + end + + def erb + @erb ||= ActionView::Template.registered_template_handler(:erb) + end + end + end end diff --git a/app/views/components/markdown/toc.rb b/app/views/components/markdown/toc.rb index 108ec0aa..58b00dc3 100644 --- a/app/views/components/markdown/toc.rb +++ b/app/views/components/markdown/toc.rb @@ -30,7 +30,7 @@ def visit(node) case node.type # collect header nodes in a 2-level hierarchy - in :header + in :heading if @tree.empty? || node.header_level <= 2 @tree << [node, []] elsif node.header_level > 2 diff --git a/spec/views/components/markdown/erb_spec.rb b/spec/views/components/markdown/erb_spec.rb index 4b628608..a35d5b2e 100644 --- a/spec/views/components/markdown/erb_spec.rb +++ b/spec/views/components/markdown/erb_spec.rb @@ -53,21 +53,21 @@ def render(content, &block) end it "handles multiline fenced with multiple erbs" do - html = <<~HTML + md = <<~MD ``` <%= 1 + 1 %> <%= 2 + 2 %> ``` - HTML - processed_html = <<~HTML + MD + html = <<~HTML <%= 1 + 1 %> <%= 2 + 2 %> HTML - expect(render(html)).to eq(processed_html) + expect(render(md)).to eq(html) end it "handles multiple fences and unfenced areas" do - given_html = <<~HTML + md = <<~MD <%= 1 + 1 %> ``` <%= 2 + 2 %> @@ -77,13 +77,23 @@ def render(content, &block) <%= 4 + 4 %> ``` <%= 5 + 5 %> - HTML - processed_html = <<~HTML.strip + MD + html = <<~HTML.strip <%= 1 + 1 %><%= 2 + 2 %> <%= 3 + 3 %><%= 4 + 4 %> <%= 5 + 5 %> HTML - expect(render(given_html)).to eq(processed_html) + expect(render(md)).to eq(html) + end + + it "renders arbitrary html" do + md = <<~MD +
Hello
+ MD + html = <<~HTML +
Hello
+ HTML + expect(render(md)).to eq(html) end end end