Skip to content

Commit

Permalink
fix: isolate styles of different dictionaries
Browse files Browse the repository at this point in the history
  • Loading branch information
Crissium committed Mar 3, 2024
1 parent b45a03b commit 45aa053
Show file tree
Hide file tree
Showing 5 changed files with 115 additions and 40 deletions.
9 changes: 2 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,7 @@ The dark theme is not built in, but rendered with the [Dark Reader Firefox exten
### Server-side

- [ ] ~~Add support for Babylon BGL glossary format~~[^5]
- [ ] Inline styles to prevent them from being applied to the whole page (The commented-out implementation in [`server/app/dicts/mdict/html_cleaner.py`](/server/app/dicts/mdict/html_cleaner.py) breaks richly-formatted dictionaries.)[^2]
- [ ] Transliteration for the Cyrillic[^3], Greek, Arabic, Hebrew and Devanagari scripts (done: Greek, one-way Arabic, though only Arabic itself is supported at the moment, if you'd like to help with Farsi, Urdu, etc., please open an issue)
- [X] Add the ability to set sources for automatic indexing, i.e. dictionaries put into the specified directories will be automatically added
- [X] Recursive source scanning
- [ ] Lock list operations to prepare for [no-GIL python](https://peps.python.org/pep-0703/)

### Client-side
Expand Down Expand Up @@ -134,13 +131,14 @@ This project uses or has adapted code from the following projects:

| **Name** | **Developer** | **Licence** |
|:---:|:---:|:---:|
| [GoldenDict](https://github.com/goldendict/goldendict) | Konstantin Isakov | GPLv3 |
| [mdict-analysis](https://bitbucket.org/xwang/mdict-analysis/src/master/) | Xiaoqiang Wang | GPLv3 |
| [mdict-query](https://github.com/mmjang/mdict-query) | mmjang | No licence |
| [python-stardict](https://github.com/pysuxing/python-stardict) | Su Xing | GPLv3 |
| dictionary-db (together with the $n$-gram method) | Jean-François Dockes | GPL 2.1 |
| [pyglossary](https://github.com/ilius/pyglossary) | Saeed Rasooli | GPLv3 |

I would also express my gratitude to Jiang Qian for his suggestions, encouragement and great help.
I would also express my gratitude to my long-time 'alpha-tester' Jiang Qian, without whom this project could never become what it is today.

## Similar projects

Expand All @@ -152,9 +150,6 @@ I would also express my gratitude to Jiang Qian for his suggestions, encourageme

---


[^2]: The use of a custom styling manager such as Dark Reader is recommended until I fix this, as styles for different dictionaries interfere with each other. Or better, if you know CSS, you could just edit the dictionaries' stylesheets to make them less intrusive and individualistic.

[^3]: A Russian-speaking friend told me that it is unusual to type Russian on an American keyboard, so whether this feature is useful is open to doubt.

[^4]: I have come up with a name: _Kilvert_ (yeah, after the Welsh priest for its close resemblance to _SilverDict_, and the initial letter, of course, stands for KDE). (I'm on Xfce by the way.)
Expand Down
136 changes: 108 additions & 28 deletions server/app/dicts/mdict/html_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,20 @@
import shutil
from pathlib import Path
import re
# import css_inline


class HTMLCleaner:
_re_non_printing_chars = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]')
_re_compact_html_index = re.compile(r'`(\d+)`')
_re_single_quotes = re.compile(r"=\'([^']*)\'(?=[ >])")
_re_css_comments = re.compile(r'\/\*(?:.(?!\*\/))*.?\*\/', re.DOTALL)
_re_css_selectors = re.compile(r'[ \*\>\+\,;:\[\{\]]')
_re_css_separators = re.compile(r'[,;{]')
_ISOLATED_MARKER = '/* Isolated */\n'

def __init__(self, filename: str, dict_name: str, resources_dir: str, styles: str = '') -> None:
self._filename = filename
self._id = f'#{dict_name}'
self._resources_dir = resources_dir
self._href_root_dir = '/api/cache/' + dict_name + '/'
self._lookup_url_root = '/api/lookup/' + dict_name + '/'
Expand Down Expand Up @@ -68,33 +72,109 @@ def _fix_file_path(self, definition_html: str, file_extension: str) -> str:
self._href_root_dir + definition_html[filename_position:]
extension_position += len(file_extension)
return definition_html

def _isolate_css(self) -> None:
"""
Isolate different dictionaries' styles by prepending the article block's ID to each selector.
"""
for filename in os.listdir(self._resources_dir):
if filename.endswith('.css') or filename.endswith('.CSS'):
full_filename = os.path.join(self._resources_dir, filename)
with open(full_filename) as f:
css = f.read()

if css.startswith(self._ISOLATED_MARKER):
break

css = self._re_css_comments.sub('', css)

# def _inline_styles(self, html_content: str) -> str: # CSS path(s) is inside the HTML file
# # Find all CSS references
# # regex won't work. Maybe it's simply because that I haven't mastered the dark art.
# css_references = []
# css_extension_position = 0
# while (css_extension_position := html_content.find('.css"', css_extension_position)) != -1:
# css_filename_position = html_content.rfind('"', 0, css_extension_position) + 1
# css_filename = html_content[css_filename_position:css_extension_position] + '.css'
# css_references.append(css_filename)
# # Remove the CSS reference
# link_tag_start_position = html_content.rfind('<link', 0, css_filename_position)
# link_tag_end_position = html_content.find('>', link_tag_start_position) + 1
# html_content = html_content[:link_tag_start_position] + html_content[link_tag_end_position:]
# css_extension_position = link_tag_start_position

# for css in css_references:
# # Read the CSS file
# css_path = os.path.join(self._resources_dir, css.split('/')[-1])
# with open(css_path) as css_file:
# css_content = css_file.read()

# # Inline the CSS
# inliner = css_inline.CSSInliner(load_remote_stylesheets=False, extra_css=css_content)
# html_content = inliner.inline(html_content)

# return html_content
current_pos = 0
buf = []

while current_pos < len(css):
ch = css[current_pos]

if ch == '@':
n = current_pos
if css[current_pos:current_pos+7].lower() == '@import' or \
css[current_pos:current_pos+10].lower() == '@font-face' or \
css[current_pos:current_pos+10].lower() == '@namespace' or \
css[current_pos:current_pos+8].lower() == '@charset':
# Copy rule as is.
n = css.find(';', current_pos)
n2 = css.find('{', current_pos)
if n2 > 0 and n > n2:
n = n2 - 1
elif css[current_pos:current_pos+6].lower() == '@media':
# Copy up to '{' and continue parsing inside.
n = css.find('{', current_pos)
elif css[current_pos:current_pos+5].lower() == '@page':
# Discard
n = css.find('}', current_pos)
if n < 0:
break
current_pos = n + 1
continue
else:
# Copy rule as is.
n = css.find('}', current_pos)

if n < 0:
break

buf.append(css[current_pos:n+1])
current_pos = n + 1
elif ch == '{':
n = css.find('}', current_pos)
if n < 0:
break

buf.append(css[current_pos:n+1])
current_pos = n + 1
elif ch.isalpha() or ch in ('.', '#', '*', '\\', ':'):
if ch.isalpha() or ch == '*':
# Check for namespace prefix
for i in range(current_pos, len(css)):
ch1 = css[i]
if not ch1.isalnum() and \
not ch1 == '_' and \
not ch1 == '-' and \
not (ch1 == '*' and i == current_pos):
if ch1 == '|':
buf.append(css[current_pos:i+1])
current_pos = i + 1
break
if ch1 == '|':
continue

n = self._re_css_selectors.search(css, current_pos + 1)
if not n:
buf.append(css[current_pos:])
break
else:
n = n.start()
selector = css[current_pos:n]
trimmed = selector.strip().lower()
if trimmed == 'html' or trimmed == 'body':
buf.append(f'{selector} {self._id} ')
current_pos += 4
else:
buf.append(f'{self._id} ')

n = self._re_css_separators.search(css, current_pos)
if not n:
buf.append(css[current_pos:])
break
else:
buf.append(css[current_pos:n.start()])
current_pos = n.start()
else:
buf.append(ch)
current_pos += 1

new_css = f'{self._ISOLATED_MARKER}{"".join(buf)}'
with open(full_filename, 'w') as f:
f.write(new_css)

def _fix_internal_href(self, definition_html: str) -> str:
# That is, links like entry://#81305a5747ca42b28f2b50de9b762963_nav2
Expand Down Expand Up @@ -172,10 +252,10 @@ def clean(self, definition_html: str) -> str:
definition_html = self._expand_compact_html(definition_html)
definition_html = self._convert_single_quotes_to_double(definition_html)
definition_html = self._fix_file_path(definition_html, '.css')
self._isolate_css()
definition_html = self._fix_file_path(definition_html, '.js')
definition_html = self._fix_internal_href(definition_html)
definition_html = self._fix_entry_cross_ref(definition_html)
definition_html = self._fix_sound_link(definition_html)
definition_html = self._fix_img_src(definition_html)
# definition_html = self._inline_styles(definition_html)
return definition_html
4 changes: 2 additions & 2 deletions server/app/templates/articles.html
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{% for article in articles %}
<div class="article-block">
<h2 class="dictionary-headings" id="{{article[0]}}">
<div class="article-block" id="{{article[0]}}">
<h2 class="dictionary-headings">
{{article[1]}}
</h2>
{{article[2] | safe}}
Expand Down
4 changes: 2 additions & 2 deletions server/app/templates/articles_standalone.html
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@

<body>
{% for article in articles %}
<div class="article-block">
<h2 class="dictionary-headings" id="{{article[0]}}">
<div class="article-block" id="{{article[0]}}">
<h2 class="dictionary-headings">
{{article[1]}}
</h2>
{{article[2] | safe}}
Expand Down
2 changes: 1 addition & 1 deletion server/updater.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
project_directory = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
windows_save_path = os.path.join(os.path.dirname(project_directory), 'SilverDict-windows.zip')
unix_save_path = os.path.join(project_directory, 'SilverDict.zip')
current_version = 'v1.1.1'
current_version = 'v1.1.2'


def _get_latest_version_and_release_note() -> tuple[str, str]:
Expand Down

0 comments on commit 45aa053

Please sign in to comment.