fix: isolate styles of different dictionaries

Crissium · Mar 3, 2024 · 45aa053 · 45aa053
1 parent b45a03b
commit 45aa053
Show file tree

Hide file tree

Showing 5 changed files with 115 additions and 40 deletions.
diff --git a/README.md b/README.md
@@ -39,10 +39,7 @@ The dark theme is not built in, but rendered with the [Dark Reader Firefox exten
 ### Server-side
 
 - [ ] ~~Add support for Babylon BGL glossary format~~[^5]
-- [ ] Inline styles to prevent them from being applied to the whole page (The commented-out implementation in [`server/app/dicts/mdict/html_cleaner.py`](/server/app/dicts/mdict/html_cleaner.py) breaks richly-formatted dictionaries.)[^2]
 - [ ] Transliteration for the Cyrillic[^3], Greek, Arabic, Hebrew and Devanagari scripts (done: Greek, one-way Arabic, though only Arabic itself is supported at the moment, if you'd like to help with Farsi, Urdu, etc., please open an issue)
-- [X] Add the ability to set sources for automatic indexing, i.e. dictionaries put into the specified directories will be automatically added
-- [X] Recursive source scanning
 - [ ] Lock list operations to prepare for [no-GIL python](https://peps.python.org/pep-0703/)
 
 ### Client-side
@@ -134,13 +131,14 @@ This project uses or has adapted code from the following projects:
 
 | **Name** | **Developer** | **Licence** |
 |:---:|:---:|:---:|
+| [GoldenDict](https://github.com/goldendict/goldendict) | Konstantin Isakov | GPLv3 |
 | [mdict-analysis](https://bitbucket.org/xwang/mdict-analysis/src/master/) | Xiaoqiang Wang | GPLv3 |
 | [mdict-query](https://github.com/mmjang/mdict-query) | mmjang | No licence |
 | [python-stardict](https://github.com/pysuxing/python-stardict) | Su Xing | GPLv3 |
 | dictionary-db (together with the $n$-gram method) | Jean-François Dockes | GPL 2.1 |
 | [pyglossary](https://github.com/ilius/pyglossary) | Saeed Rasooli | GPLv3 |
 
-I would also express my gratitude to Jiang Qian for his suggestions, encouragement and great help.
+I would also express my gratitude to my long-time 'alpha-tester' Jiang Qian, without whom this project could never become what it is today.
 
 ## Similar projects
 
@@ -152,9 +150,6 @@ I would also express my gratitude to Jiang Qian for his suggestions, encourageme
 
 ---
 
-
-[^2]: The use of a custom styling manager such as Dark Reader is recommended until I fix this, as styles for different dictionaries interfere with each other. Or better, if you know CSS, you could just edit the dictionaries' stylesheets to make them less intrusive and individualistic.
-
 [^3]: A Russian-speaking friend told me that it is unusual to type Russian on an American keyboard, so whether this feature is useful is open to doubt.
 
 [^4]: I have come up with a name: _Kilvert_ (yeah, after the Welsh priest for its close resemblance to _SilverDict_, and the initial letter, of course, stands for KDE). (I'm on Xfce by the way.)

diff --git a/server/app/dicts/mdict/html_cleaner.py b/server/app/dicts/mdict/html_cleaner.py
@@ -2,16 +2,20 @@
 import shutil
 from pathlib import Path
 import re
-# import css_inline
 
 
 class HTMLCleaner:
 	_re_non_printing_chars = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]')
 	_re_compact_html_index = re.compile(r'`(\d+)`')
 	_re_single_quotes = re.compile(r"=\'([^']*)\'(?=[ >])")
+	_re_css_comments = re.compile(r'\/\*(?:.(?!\*\/))*.?\*\/', re.DOTALL)
+	_re_css_selectors = re.compile(r'[ \*\>\+\,;:\[\{\]]')
+	_re_css_separators = re.compile(r'[,;{]')
+	_ISOLATED_MARKER = '/* Isolated */\n'
 
 	def __init__(self, filename: str, dict_name: str, resources_dir: str, styles: str = '') -> None:
 		self._filename = filename
+		self._id = f'#{dict_name}'
 		self._resources_dir = resources_dir
 		self._href_root_dir = '/api/cache/' + dict_name + '/'
 		self._lookup_url_root = '/api/lookup/' + dict_name + '/'
@@ -68,33 +72,109 @@ def _fix_file_path(self, definition_html: str, file_extension: str) -> str:
 					self._href_root_dir + definition_html[filename_position:]
 			extension_position += len(file_extension)
 		return definition_html
+
+	def _isolate_css(self) -> None:
+		"""
+		Isolate different dictionaries' styles by prepending the article block's ID to each selector.
+		"""
+		for filename in os.listdir(self._resources_dir):
+			if filename.endswith('.css') or filename.endswith('.CSS'):
+				full_filename = os.path.join(self._resources_dir, filename)
+				with open(full_filename) as f:
+					css = f.read()
+
+				if css.startswith(self._ISOLATED_MARKER):
+					break
+
+				css = self._re_css_comments.sub('', css)
 
-	# def _inline_styles(self, html_content: str) -> str: # CSS path(s) is inside the HTML file
-	# 	# Find all CSS references
-	# 	# regex won't work. Maybe it's simply because that I haven't mastered the dark art.
-	# 	css_references = []
-	# 	css_extension_position = 0
-	# 	while (css_extension_position := html_content.find('.css"', css_extension_position)) != -1:
-	# 		css_filename_position = html_content.rfind('"', 0, css_extension_position) + 1
-	# 		css_filename = html_content[css_filename_position:css_extension_position] + '.css'
-	# 		css_references.append(css_filename)
-	# 		# Remove the CSS reference
-	# 		link_tag_start_position = html_content.rfind('<link', 0, css_filename_position)
-	# 		link_tag_end_position = html_content.find('>', link_tag_start_position) + 1
-	# 		html_content = html_content[:link_tag_start_position] + html_content[link_tag_end_position:]
-	# 		css_extension_position = link_tag_start_position
-
-	# 	for css in css_references:
-	# 		# Read the CSS file
-	# 		css_path = os.path.join(self._resources_dir, css.split('/')[-1])
-	# 		with open(css_path) as css_file:
-	# 			css_content = css_file.read()
-
-	# 		# Inline the CSS
-	# 		inliner = css_inline.CSSInliner(load_remote_stylesheets=False, extra_css=css_content)
-	# 		html_content = inliner.inline(html_content)
-
-	# 	return html_content
+				current_pos = 0
+				buf = []
+
+				while current_pos < len(css):
+					ch = css[current_pos]
+
+					if ch == '@':
+						n = current_pos
+						if css[current_pos:current_pos+7].lower() == '@import' or \
+							css[current_pos:current_pos+10].lower() == '@font-face' or \
+							css[current_pos:current_pos+10].lower() == '@namespace' or \
+							css[current_pos:current_pos+8].lower() == '@charset':
+							# Copy rule as is.
+							n = css.find(';', current_pos)
+							n2 = css.find('{', current_pos)
+							if n2 > 0 and n > n2:
+								n = n2 - 1
+						elif css[current_pos:current_pos+6].lower() == '@media':
+							# Copy up to '{' and continue parsing inside.
+							n = css.find('{', current_pos)
+						elif css[current_pos:current_pos+5].lower() == '@page':
+							# Discard
+							n = css.find('}', current_pos)
+							if n < 0:
+								break
+							current_pos = n + 1
+							continue
+						else:
+							# Copy rule as is.
+							n = css.find('}', current_pos)
+
+						if n < 0:
+							break
+
+						buf.append(css[current_pos:n+1])
+						current_pos = n + 1
+					elif ch == '{':
+						n = css.find('}', current_pos)
+						if n < 0:
+							break
+
+						buf.append(css[current_pos:n+1])
+						current_pos = n + 1
+					elif ch.isalpha() or ch in ('.', '#', '*', '\\', ':'):
+						if ch.isalpha() or ch == '*':
+							# Check for namespace prefix
+							for i in range(current_pos, len(css)):
+								ch1 = css[i]
+								if not ch1.isalnum() and \
+									not ch1 == '_' and \
+									not ch1 == '-' and \
+									not (ch1 == '*' and i == current_pos):
+									if ch1 == '|':
+										buf.append(css[current_pos:i+1])
+										current_pos = i + 1
+								break
+							if ch1 == '|':
+								continue
+
+						n = self._re_css_selectors.search(css, current_pos + 1)
+						if not n:
+							buf.append(css[current_pos:])
+							break
+						else:
+							n = n.start()
+							selector = css[current_pos:n]
+							trimmed = selector.strip().lower()
+							if trimmed == 'html' or trimmed == 'body':
+								buf.append(f'{selector} {self._id} ')
+								current_pos += 4
+							else:
+								buf.append(f'{self._id} ')
+
+						n = self._re_css_separators.search(css, current_pos)
+						if not n:
+							buf.append(css[current_pos:])
+							break
+						else:
+							buf.append(css[current_pos:n.start()])
+							current_pos = n.start()
+					else:
+						buf.append(ch)
+						current_pos += 1
+
+				new_css = f'{self._ISOLATED_MARKER}{"".join(buf)}'
+				with open(full_filename, 'w') as f:
+					f.write(new_css)
 
 	def _fix_internal_href(self, definition_html: str) -> str:
 		# That is, links like entry://#81305a5747ca42b28f2b50de9b762963_nav2
@@ -172,10 +252,10 @@ def clean(self, definition_html: str) -> str:
 			definition_html = self._expand_compact_html(definition_html)
 		definition_html = self._convert_single_quotes_to_double(definition_html)
 		definition_html = self._fix_file_path(definition_html, '.css')
+		self._isolate_css()
 		definition_html = self._fix_file_path(definition_html, '.js')
 		definition_html = self._fix_internal_href(definition_html)
 		definition_html = self._fix_entry_cross_ref(definition_html)
 		definition_html = self._fix_sound_link(definition_html)
 		definition_html = self._fix_img_src(definition_html)
-		# definition_html = self._inline_styles(definition_html)
 		return definition_html
diff --git a/server/app/templates/articles.html b/server/app/templates/articles.html
@@ -1,6 +1,6 @@
 {% for article in articles %}
-	<div class="article-block">
-		<h2 class="dictionary-headings" id="{{article[0]}}">
+	<div class="article-block" id="{{article[0]}}">
+		<h2 class="dictionary-headings">
 			{{article[1]}}
 		</h2>
 		{{article[2] | safe}}

diff --git a/server/app/templates/articles_standalone.html b/server/app/templates/articles_standalone.html
@@ -38,8 +38,8 @@
 
 <body>
 	{% for article in articles %}
-	<div class="article-block">
-		<h2 class="dictionary-headings" id="{{article[0]}}">
+	<div class="article-block" id="{{article[0]}}">
+		<h2 class="dictionary-headings">
 			{{article[1]}}
 		</h2>
 		{{article[2] | safe}}

diff --git a/server/updater.py b/server/updater.py
@@ -15,7 +15,7 @@
 project_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 windows_save_path = os.path.join(os.path.dirname(project_directory), 'SilverDict-windows.zip')
 unix_save_path = os.path.join(project_directory, 'SilverDict.zip')
-current_version = 'v1.1.1'
+current_version = 'v1.1.2'
 
 
 def _get_latest_version_and_release_note() -> tuple[str, str]: