-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(front): use webworker for markdown parsing (#1733)
* feat(front): move markdown parsing to web worker * feat: use webworker for markdown parsing * feat(markdown-worker): implement message buffering and processing * fix(markdown): import KaTeX CSS locally instead of via CDN * fix(markdown): make sure messages are serializable * feat(markdown): make sure links have target blank * refactor(markdown): improve HTML escaping function formatting
- Loading branch information
Showing
4 changed files
with
313 additions
and
177 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,199 +1,71 @@ | ||
<script lang="ts"> | ||
import type { WebSearchSource } from "$lib/types/WebSearch"; | ||
import katex from "katex"; | ||
import "katex/dist/contrib/mhchem.mjs"; | ||
import DOMPurify from "isomorphic-dompurify"; | ||
import { Marked } from "marked"; | ||
import type { Tokens, TokenizerExtension, RendererExtension } from "marked"; | ||
import { processTokens, processTokensSync, type Token } from "$lib/utils/marked"; | ||
import MarkdownWorker from "$lib/workers/markdownWorker?worker"; | ||
import CodeBlock from "../CodeBlock.svelte"; | ||
import type { IncomingMessage, OutgoingMessage } from "$lib/workers/markdownWorker"; | ||
import { browser } from "$app/environment"; | ||
import DOMPurify from "isomorphic-dompurify"; | ||
interface Props { | ||
content: string; | ||
sources?: WebSearchSource[]; | ||
} | ||
let { content, sources = [] }: Props = $props(); | ||
interface katexBlockToken extends Tokens.Generic { | ||
type: "katexBlock"; | ||
raw: string; | ||
text: string; | ||
displayMode: true; | ||
} | ||
interface katexInlineToken extends Tokens.Generic { | ||
type: "katexInline"; | ||
raw: string; | ||
text: string; | ||
displayMode: false; | ||
} | ||
export const katexBlockExtension: TokenizerExtension & RendererExtension = { | ||
name: "katexBlock", | ||
level: "block", | ||
start(src: string): number | undefined { | ||
const match = src.match(/(\${2}|\\\[)/); | ||
return match ? match.index : -1; | ||
}, | ||
tokenizer(src: string): katexBlockToken | undefined { | ||
// 1) $$ ... $$ | ||
const rule1 = /^\${2}([\s\S]+?)\${2}/; | ||
const match1 = rule1.exec(src); | ||
if (match1) { | ||
const token: katexBlockToken = { | ||
type: "katexBlock", | ||
raw: match1[0], | ||
text: match1[1].trim(), | ||
displayMode: true, | ||
}; | ||
return token; | ||
} | ||
// 2) \[ ... \] | ||
const rule2 = /^\\\[([\s\S]+?)\\\]/; | ||
const match2 = rule2.exec(src); | ||
if (match2) { | ||
const token: katexBlockToken = { | ||
type: "katexBlock", | ||
raw: match2[0], | ||
text: match2[1].trim(), | ||
displayMode: true, | ||
}; | ||
return token; | ||
} | ||
return undefined; | ||
}, | ||
renderer(token) { | ||
if (token.type === "katexBlock") { | ||
return katex.renderToString(token.text, { | ||
throwOnError: false, | ||
displayMode: token.displayMode, | ||
}); | ||
} | ||
return undefined; | ||
}, | ||
}; | ||
const worker = browser && window.Worker ? new MarkdownWorker() : null; | ||
const katexInlineExtension: TokenizerExtension & RendererExtension = { | ||
name: "katexInline", | ||
level: "inline", | ||
start(src: string): number | undefined { | ||
const match = src.match(/(\$|\\\()/); | ||
return match ? match.index : -1; | ||
}, | ||
tokenizer(src: string): katexInlineToken | undefined { | ||
// 1) $...$ | ||
const rule1 = /^\$([^$]+?)\$/; | ||
const match1 = rule1.exec(src); | ||
if (match1) { | ||
const token: katexInlineToken = { | ||
type: "katexInline", | ||
raw: match1[0], | ||
text: match1[1].trim(), | ||
displayMode: false, | ||
}; | ||
return token; | ||
} | ||
// 2) \(...\) | ||
const rule2 = /^\\\(([\s\S]+?)\\\)/; | ||
const match2 = rule2.exec(src); | ||
if (match2) { | ||
const token: katexInlineToken = { | ||
type: "katexInline", | ||
raw: match2[0], | ||
text: match2[1].trim(), | ||
displayMode: false, | ||
}; | ||
return token; | ||
} | ||
return undefined; | ||
}, | ||
renderer(token) { | ||
if (token.type === "katexInline") { | ||
return katex.renderToString(token.text, { | ||
throwOnError: false, | ||
displayMode: token.displayMode, | ||
}); | ||
} | ||
return undefined; | ||
}, | ||
}; | ||
function escapeHTML(content: string) { | ||
return content.replace( | ||
/[<>&"']/g, | ||
(x) => | ||
({ | ||
"<": "<", | ||
">": ">", | ||
"&": "&", | ||
"'": "'", | ||
'"': """, | ||
})[x] || x | ||
); | ||
} | ||
let { content, sources = [] }: Props = $props(); | ||
function addInlineCitations(md: string, webSearchSources: WebSearchSource[] = []): string { | ||
const linkStyle = | ||
"color: rgb(59, 130, 246); text-decoration: none; hover:text-decoration: underline;"; | ||
let tokens: Token[] = $state(processTokensSync(content, sources)); | ||
return md.replace(/\[(\d+)\]/g, (match: string) => { | ||
const indices: number[] = (match.match(/\d+/g) || []).map(Number); | ||
const links: string = indices | ||
.map((index: number) => { | ||
if (index === 0) return false; | ||
const source = webSearchSources[index - 1]; | ||
if (source) { | ||
return `<a href="${source.link}" target="_blank" rel="noreferrer" style="${linkStyle}">${index}</a>`; | ||
async function processContent(content: string, sources: WebSearchSource[]): Promise<Token[]> { | ||
if (worker) { | ||
return new Promise((resolve) => { | ||
worker.onmessage = (event: MessageEvent<OutgoingMessage>) => { | ||
if (event.data.type !== "processed") { | ||
throw new Error("Invalid message type"); | ||
} | ||
return ""; | ||
}) | ||
.filter(Boolean) | ||
.join(", "); | ||
return links ? ` <sup>${links}</sup>` : match; | ||
}); | ||
resolve(event.data.tokens); | ||
}; | ||
worker.postMessage( | ||
JSON.parse(JSON.stringify({ content, sources, type: "process" })) as IncomingMessage | ||
); | ||
}); | ||
} else { | ||
return processTokens(content, sources); | ||
} | ||
} | ||
const marked = new Marked({ | ||
hooks: { | ||
postprocess: (html) => DOMPurify.sanitize(addInlineCitations(html, sources)), | ||
}, | ||
extensions: [katexBlockExtension, katexInlineExtension], | ||
renderer: { | ||
link: (href, title, text) => | ||
`<a href="${href?.replace(/>$/, "")}" target="_blank" rel="noreferrer">${text}</a>`, | ||
html: (html) => escapeHTML(html), | ||
}, | ||
gfm: true, | ||
breaks: true, | ||
$effect(() => { | ||
if (!browser) { | ||
tokens = processTokensSync(content, sources); | ||
} else { | ||
(async () => { | ||
if (!browser) { | ||
tokens = processTokensSync(content, sources); | ||
} else { | ||
tokens = await processContent(content, sources); | ||
} | ||
})(); | ||
} | ||
}); | ||
DOMPurify.addHook("afterSanitizeAttributes", (node) => { | ||
if (node.tagName === "A") { | ||
node.setAttribute("rel", "noreferrer"); | ||
node.setAttribute("target", "_blank"); | ||
node.setAttribute("rel", "noreferrer"); | ||
} | ||
}); | ||
</script> | ||
|
||
{#each marked.lexer(content) as token} | ||
{#if token.type === "code"} | ||
<CodeBlock lang={token.lang} code={token.text} /> | ||
{:else} | ||
{#await marked.parse(token.raw) then parsed} | ||
{#each tokens as token} | ||
{#if token.type === "text"} | ||
{#await token.html then html} | ||
<!-- eslint-disable-next-line svelte/no-at-html-tags --> | ||
{@html parsed} | ||
{@html DOMPurify.sanitize(html)} | ||
{/await} | ||
{:else if token.type === "code"} | ||
<CodeBlock lang={token.lang} code={token.code} /> | ||
{/if} | ||
{/each} |
Oops, something went wrong.