Skip to content

Commit

Permalink
fix: HTML single quotes regex
Browse files Browse the repository at this point in the history
  • Loading branch information
Crissium committed Mar 2, 2024
1 parent 6e0c386 commit b45a03b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 4 deletions.
4 changes: 2 additions & 2 deletions server/app/dicts/mdict/html_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
class HTMLCleaner:
_re_non_printing_chars = re.compile(r'[\x00-\x08\x0b\x0c\x0e-\x1f\x7f-\x9f]')
_re_compact_html_index = re.compile(r'`(\d+)`')
_re_single_quotes = re.compile(r"\'([^']*)\'")
_re_single_quotes = re.compile(r"=\'([^']*)\'(?=[ >])")

def __init__(self, filename: str, dict_name: str, resources_dir: str, styles: str = '') -> None:
self._filename = filename
Expand Down Expand Up @@ -45,7 +45,7 @@ def _expand_compact_html(self, compact_html: str) -> str:
return compact_html

def _convert_single_quotes_to_double(self, html: str) -> str:
return self._re_single_quotes.sub("\"\\1\"", html)
return self._re_single_quotes.sub('="\\1"', html)

def _fix_file_path(self, definition_html: str, file_extension: str) -> str:
extension_position = 0
Expand Down
4 changes: 2 additions & 2 deletions server/app/dicts/stardict/html_cleaner.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class HtmlCleaner:
- remove outer <div class="article"></div> tag if present
"""
_non_printing_chars_pattern = re.compile(r'[\x00-\x1f\x7f-\x9f]')
_single_quotes_pattern = re.compile(r"\'([^']*)\'")
_single_quotes_pattern = re.compile(r"=\'([^']*)\'(?=[ >])")
_cross_ref_pattern = re.compile(r'href="bword://([^"]+)"')

def __init__(self, dictionary_name: str, dictionary_path: str, resource_dir: str) -> None:
Expand Down Expand Up @@ -45,7 +45,7 @@ def _lower_html_tags(self, html: str) -> str:
return html.replace('<IMG', '<img').replace('</IMG', '</img').replace(' SRC=', ' src=').replace('<A HREF=', '<a href=').replace('</A>', '</a>').replace('<A href=', '<a href=')

def _convert_single_quotes_to_double(self, html: str) -> str:
return self._single_quotes_pattern.sub("\"\\1\"", html)
return self._single_quotes_pattern.sub('="\\1"', html)

def _fix_cross_ref(self, html: str) -> str:
return self._cross_ref_pattern.sub(self._cross_ref_replacement, html)
Expand Down

0 comments on commit b45a03b

Please sign in to comment.