Skip to content

Commit

Permalink
improve dict
Browse files Browse the repository at this point in the history
  • Loading branch information
cdhigh committed Jun 19, 2024
1 parent b07e02e commit 907ae89
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 17 deletions.
13 changes: 5 additions & 8 deletions application/lib/dictionary/mdict/mdict.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,23 +139,20 @@ def post_process(self, content):
tag.name = 'div'

#删除多媒体资源和脚本
for tag in list(soup.find_all(['img', 'script', 'base', 'iframe', 'canvas', 'embed', 'source',
for tag in list(soup.find_all(['head', 'img', 'script', 'base', 'iframe', 'canvas', 'embed', 'source',
'command', 'datalist', 'video', 'audio', 'noscript', 'meta', 'button'])):
tag.extract()

self.adjust_css(soup)
self.inline_css(soup)
#self.remove_empty_tags(soup)

tag = soup.head
if tag:
tag.extract()

#mdict质量良莠不齐,有些词典在html/body外写释义
#所以不能直接提取body内容,直接修改为div简单粗暴也有效
for tag in (soup.html, soup.body):
#所以不能直接提取body内容
for name in ('html', 'body'):
tag = soup.find(name)
if tag:
tag.name = 'div'
tag.unwrap()

return str(soup)

Expand Down
2 changes: 1 addition & 1 deletion application/static/reader.css
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ body::-webkit-scrollbar-thumb {
margin-right: auto;
margin-top: -200px;
background-color: white;
border: 1px solid #ccc;
border: 1px solid #302e2e;;
border-radius: 20px;
box-shadow: 0px 0px 3px #aaa;
display: none;
Expand Down
4 changes: 2 additions & 2 deletions application/static/reader.js
Original file line number Diff line number Diff line change
Expand Up @@ -686,7 +686,7 @@ function closeDictDialog(event) {
event.stopPropagation();
event.preventDefault();
var href = target.getAttribute('href') || '';
if (href.startsWith('https://kindleear/entry/')) {
if (href.indexOf('https://kindleear/entry/') == 0) {
var word = href.substring(24);
if (word) {
translateWord(word);
Expand Down Expand Up @@ -991,7 +991,7 @@ function iframeLoadEvent(evt) {
}
g_dictMode = false;
document.getElementById('corner-dict-hint').style.display = 'none';
} else if (dictDialog && dictDialog.style.display != 'none') { //关闭查词窗口
} else if (dictDialog && dictDialog.style.display == 'block') { //关闭查词窗口
closeDictDialog();
} else if (!text) { //没有选择文本才翻页
clickEvent(event);
Expand Down
47 changes: 41 additions & 6 deletions application/view/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,20 @@ def ReaderDictPost(user: KeUser, userDir: str):
try:
definition = inst.definition(word, language)
if not definition and language: #如果查询不到,尝试使用构词法词典获取词根
stem = GetWordStem(word, language)
hObj = InitHunspell(language)
stem = GetWordStem(hObj, word)
if stem:
definition = inst.definition(stem, language) #再次查询

if not definition:
suggests = GetWordSuggestions(hObj, word)
if suggests:
sugTxt = ' '.join([f'<a href="https://kindleear/entry/{s}" style="font-size:1.2em;font-weight:bold;margin:10px 20px 5px 0px">{s}</a>'
for s in suggests])
definition = '<br/>'.join([_("No definitions found for '{}'.").format(word),
_("Did you mean?"), sugTxt])
else:
word = stem
definition = inst.definition(word, language) #再次查询
except Exception as e:
#import traceback
#traceback.print_exc()
Expand All @@ -243,9 +253,9 @@ def ReaderDictPost(user: KeUser, userDir: str):
return {'status': 'ok', 'word': word, 'definition': definition,
'dictname': str(inst), 'others': others}

#根据构词法获取词干
#构建Hunspell实例
#language: 语种代码,只有前两个字母
def GetWordStem(word, language):
def InitHunspell(language):
try:
import dictionary
import hunspell #type:ignore
Expand All @@ -268,9 +278,21 @@ def GetWordStem(word, language):
else:
return ''

try:
return hunspell.Hunspell(lang=dic, hunspell_data_dir=morphDir)
except Exception as e:
default_log.warning(f'Init hunspell failed: {e}')
return None

#根据构词法获取词干
#hObj: hunspell 实例
#word: 要查询的单词
def GetWordStem(hObj, word) -> str:
if not hObj:
return ''

stems = []
try:
hObj = hunspell.Hunspell(lang=dic, hunspell_data_dir=morphDir)
stems = [s for s in hObj.stem(word) if s != word]
default_log.debug(f'got stem tuple: {stems}')
except Exception as e:
Expand All @@ -280,7 +302,20 @@ def GetWordStem(word, language):
if isinstance(stem, bytes):
stem = stem.decode('utf-8')
return stem


#获取单词的拼写建议
#hObj: hunspell 实例
#word: 要查询的单词
def GetWordSuggestions(hObj, word) -> list:
if not hObj:
return []

try:
return [(s.decode('utf-8') if isinstance(s, bytes) else s) for s in hObj.suggest(word)]
except Exception as e:
print(e)
return []

#将一个特定的文章制作成电子书推送
def PushSingleArticle(src: str, title: str, user: KeUser, userDir: str, language: str):
path = os.path.join(userDir, src).replace('\\', '/')
Expand Down

0 comments on commit 907ae89

Please sign in to comment.