Skip to content

Commit

Permalink
WIP try to escape <code> in render see #232
Browse files Browse the repository at this point in the history
  • Loading branch information
vincent-peugnet committed Dec 22, 2024
1 parent 81b028d commit ca15515
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 26 deletions.
49 changes: 28 additions & 21 deletions RENDER.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,25 @@ flowchart TD
0A(Head generation) -->
0rss(RSS feed declaration) --> 3B
1A[[Element]] -->
md?{markdown ?} -->|no| wi
md? --> |yes| md(Markdown to HTML) -->
ec[extract code tags] -->
wi(W inclusion) -->
el(every link*) -->
hi(header ID) -->
1F(URL linker) -->
1G(HTML tag*) --> 2C
2A[[Body]] -->
2B(W inclusion) ------->
2C((Element inclusion)) --> 2D
subgraph "post inclusion parser"
2D(Summary) -->
2rss(RSS detection) -->
2H(Wiki links) -->
2I(Link and media analysis) -->
2pp(check for post render actions)
end
2pp -->
2B(W inclusion) --------->
2C((Element inclusion)) -->
2pp(check for post render actions) -->
2D(Summary inclusion) -->
2rss(RSS detection) -->
2H(Wiki links) -->
lma(Link and media analysis) -->
ic(insert code tags) -->
3B((Head and Body gathering)) -->
3C[[Rendered HTML]] --> 4c
subgraph "post render actions"
Expand All @@ -30,18 +38,10 @@ flowchart TD
4j --> 5[\served web page/]
1A[[Element]] -->
1B(W inclusion) -->
1C(every link*) -->
1D(Markdown) --> 1E
subgraph "post MD parser"
1E(header ID) -->
1F(URL linker) -->
1G(HTML tag*)
end
1G --> 2C
1E -. "send TOC structure" .-> 2D
ec -. code tags content .-> ic
hi -. "send TOC structure" .-> 2D
2rss -. "send rss links" .-> 0rss
2pp -. trigger post render action .-> 4c
```
Expand Down Expand Up @@ -71,3 +71,10 @@ List of W inclusions
1. replace `%CONNECT%` code

The point of doing those inclusions early is to be before __Header ID__ parser. That way, when they are used inside HTML headings, they will generate nicer IDs.


Code tag extraction have to be done before W inclusions in order to avoid this inclusion to occured.




66 changes: 64 additions & 2 deletions app/class/Servicerender.php
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ abstract class Servicerender
/** @var array<string, ?bool> $urls */
protected $urls = [];

/** @var array<int, string> $codetags */
protected $codetags = [];

protected $sum = [];

/** @var bool If true, internal links target a new tab */
Expand Down Expand Up @@ -130,8 +133,8 @@ protected function gethmtl()
{

$body = $this->bodyconstructor($this->readbody());
$this->postprocessaction = $this->checkpostprocessaction($body);
$parsebody = $this->bodyparser($body);
$this->postprocessaction = $this->checkpostprocessaction($parsebody);
$head = $this->gethead();

$lang = !empty($this->page->lang()) ? $this->page->lang() : Config::lang();
Expand Down Expand Up @@ -477,6 +480,21 @@ protected function htmlparser(string $html): string
$this->sourceparser($audios);
$videos = $dom->getElementsByTagName('video');
$this->sourceparser($videos);

# replace <code> tags contents
if (!empty($this->codetags)) {
$codes = $dom->getElementsByTagName('code');
foreach ($codes as $code) {
assert($code instanceof DOMElement);
$value = $code->nodeValue;
if ($value !== null) {
if (key_exists($value, $this->codetags)) {
$code->nodeValue = $this->codetags[$value];
}
}
}
}

// By passing the documentElement to saveHTML, special chars are not converted to entities
return $dom->saveHTML($dom->documentElement);
}
Expand Down Expand Up @@ -511,6 +529,35 @@ protected function sourceparser(DOMNodeList $sourcables): void
}
}

/**
* Replace every code tag content with a hash of the previous content.
* Content is kept in array with associated hash as key
*/
protected function extractcodetag(string $html): string
{
$dom = new DOMDocument('1.0', 'UTF-8');
/** Force UTF-8 encoding for loadHTML by defining it in the content itself with an XML tag that need to be removed later */
$xhtml = '<?xml encoding="utf-8" ?>' . $html;
/** @phpstan-ignore-next-line Error supposed to be thrown here but is'nt */
$dom->loadHTML($xhtml, LIBXML_NOERROR | LIBXML_HTML_NODEFDTD | LIBXML_HTML_NOIMPLIED);
$dom->removeChild($dom->firstChild);
$codes = $dom->getElementsByTagName('code');
if ($codes->count() === 0) {
return $html;
}
foreach ($codes as $code) {
assert($code instanceof DOMElement);
$value = $code->nodeValue;
if ($value !== null) {
$hash = strval(crc32($value));
$this->codetags[$hash] = $value;
$code->nodeValue = $hash;
}
}
// By passing the documentElement to saveHTML, special chars are not converted to entities
return $dom->saveHTML($dom);
}

/**
* Replace wiki links [[page_id]] with HTML link
*/
Expand Down Expand Up @@ -596,7 +643,22 @@ protected function markdown($text)
// $fortin->header_id_func = function ($header) {
// return preg_replace('/[^\w]/', '', strtolower($header));
// };
$fortin->hard_wrap = Config::markdownhardwrap();
// $fortin->code_block_content_func = function ($code) {
// // $code = trim($code);
// $str = mb_convert_encoding($code , 'UTF-32', 'UTF-8');
// $t = unpack("N*", $str);
// $t = array_map(function($n) { return "&#$n;"; }, $t);
// $t = implode("", $t);
// return $t;
// };
// $fortin->code_span_content_func = function ($code) {
// $code = rtrim($code, ' ');
// $str = mb_convert_encoding($code , 'UTF-32', 'UTF-8');
// $t = unpack("N*", $str);
// $t = array_map(function($n) { return "&#$n;"; }, $t);
// return implode("", $t);
// };
// $fortin->hard_wrap = Config::markdownhardwrap();
$text = $fortin->transform($text);
return $text;
}
Expand Down
12 changes: 9 additions & 3 deletions app/class/Servicerenderv2.php
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,18 @@ protected function bodyconstructor(string $body): string
protected function elementparser(Elementv2 $element)
{
$content = $element->content();

if ($element->markdown()) {
$content = $this->markdown($content);
// $content = $this->extractcodetag($content);
}

$content = $this->winclusions($content);

if ($element->everylink() > 0) {
$content = $this->everylink($content, $element->everylink());
}
if ($element->markdown()) {
$content = $this->markdown($content);
}

Check failure on line 79 in app/class/Servicerenderv2.php

View workflow job for this annotation

GitHub Actions / lint php

Whitespace found at end of line
if ($element->headerid()) {
$content = $this->headerid(
$content,
Expand All @@ -80,6 +85,7 @@ protected function elementparser(Elementv2 $element)
$element->headeranchor(),
);
}

Check failure on line 88 in app/class/Servicerenderv2.php

View workflow job for this annotation

GitHub Actions / lint php

Whitespace found at end of line
if ($element->urllinker()) {
$content = $this->autourl($content);
}
Expand Down

0 comments on commit ca15515

Please sign in to comment.