Skip to content

Commit

Permalink
0105
Browse files Browse the repository at this point in the history
  • Loading branch information
osfans committed Jan 5, 2025
1 parent 2596058 commit 86dfad3
Show file tree
Hide file tree
Showing 102 changed files with 890,104 additions and 8,322 deletions.
46 changes: 41 additions & 5 deletions tools/tables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import tables._詳情
from pypinyin import pinyin, Style
from collections import defaultdict
from itertools import combinations
from opencc import OpenCC

SOURCE = "data"
Expand Down Expand Up @@ -154,6 +155,14 @@ def getLangs(dicts, 參數, 省=None):
推薦人 = defaultdict(int)
維護人 = defaultdict(int)
keys = None
同音字頻 = defaultdict(int)
同音字頻表 = os.path.exists("同音字頻.tsv")
if 同音字頻表:
t = open("同音字頻.tsv", "r", encoding="U8")
for line in t:
, = line.strip().split("\t")
同音字頻[] = int()
t.close()
t = open("warnings.txt", "w", encoding="U16")
for mod in mods:
if mod in 詳情:
Expand Down Expand Up @@ -214,12 +223,33 @@ def getLangs(dicts, 參數, 省=None):
if :
維護人[] += 1
+= 1
if .檢查同音字():
if 同音字頻表:
for , 字組 in .音典.items():
if "□" in 字組: 字組.remove("□")
for 字甲 in 字組:
字頻 = 0
字組乙 = set(字組)
字組乙.remove(字甲)
n = len(字組乙)
if n < 2: continue
for 字乙 in 字組乙:
字頻 += 同音字頻["".join(sorted((字甲, 字乙)))]
if 字頻 < n:
..append(f"{字甲}可能不讀{}")
else:
for 字組 in .音典.values():
if "□" in 字組: 字組.remove("□")
if len(字組) < 2: continue
for in combinations(字組, 2):
雙字 = "".join(sorted())
同音字頻[雙字] += 1
if .:
all_editors = ",".join(editor)
.全稱 = .info["語言"]
print(f"{.全稱}{})-{.文件名}-{all_editors}", file=t)
for 調 in .:
print(f"\t{調}", file=t)
for in .:
print(f"\t{}", file=t)
else:
= import_module(f"tables.{mod}").()
d = dict()
Expand All @@ -231,10 +261,10 @@ def getLangs(dicts, 參數, 省=None):
.加載(dicts)
.info["字數"] = .字數
.info["□數"] = .框數 if .框數 else None
聲韻調數 = .聲韻調數
音節數 = .音節數
聲韻數 = .聲韻數
.info["音節數"] = 聲韻調數 if 聲韻調數 else None
.info["不帶調音節數"] = 聲韻數 if 聲韻數 and 聲韻數 != 聲韻調數 else None
.info["音節數"] = 音節數 if 音節數 else None
.info["不帶調音節數"] = 聲韻數 if 聲韻數 and 聲韻數 != 音節數 else None
.info["網站"] = .網站
.info["網址"] = .網址
lang_t = .info["語言"]
Expand All @@ -249,6 +279,12 @@ def getLangs(dicts, 參數, 省=None):
if not keys: keys = .info.keys()
語組.append()
t.close()
if not 同音字頻表:
t = open("同音字頻.tsv", "w", encoding="U8")
for i, j in 同音字頻.items():
if j > 1:
t.write(f"{i}\t{j}\n")
t.close()
= 語組[0]
for in keys:
if not in .info: .info[] = None
Expand Down
42 changes: 21 additions & 21 deletions tools/tables/_表.py
Original file line number Diff line number Diff line change
Expand Up @@ -318,8 +318,11 @@ def 正音(自, 音, 檢查=False):
..append(f"{} 音節重複")
return

def 檢查同音字():
return .分區 and .簡稱 not in ("普通話",) and not .分區.startswith("歷史音") and not .分區.startswith("域外方音")

def 爲方言():
return . in ("老國音","党項") or (.爲語() and not .分區.startswith("歷史音"))
return .簡稱 in ("老國音","党項") or (.爲語() and not .分區.startswith("歷史音"))

def 分註(, ):
if not : return ""
Expand Down Expand Up @@ -394,15 +397,14 @@ def 框數(自):
return 1 if > 0 else 0

@property
def 聲韻調數():
def 音節數():
return len(.音典)

@property
def 聲韻數():
return len(set(map(lambda x:x.split("/")[0].rstrip("1234567890"), .音典.keys())))
return len(set(map(lambda x:x.rstrip("1234567890"), .音典.keys())))

def ():
start = time()
if .過時(): .更新()
.音典.clear()
.d.clear()
Expand All @@ -413,37 +415,35 @@ def 讀(自):
if "\t" not in : continue
, py = .split("\t", 1)
if .爲語():
js = ""
if "\t" in py: py, js = py.split("\t", 1)
if js and .爲語():
js = .分註(js)
= ""
if "\t" in py: py, = py.split("\t", 1)
if and .爲語():
= .分註()
try:
yd = getYD(py)
except:
print("\t\t\t", .簡稱, py, js)
print("\t\t\t", .簡稱, py, )
exit(1)
if yd and py.count("*") <= 1:
js = f"({yd}){js}"
= f"({yd}){}"
py = py[:-1]
if re.match(r"^\([^()]*?\)$", js):
js = js[1:-1]
syd = re.sub(r"\(.*?\)","",py).strip(" _`*")
if "-" not in syd:
.音典[syd].add()
if js:
py += "{%s}" % js
if re.match(r"^\([^()]*?\)$", ):
= [1:-1]
= re.sub(r"\(.*?\)","",py).strip(" _`*")
if "-" not in :
.音典[.split("/", 1)[0]].add()
if :
py += "{%s}" %
else:
if .字書:
sep = "▲" if . == "匯纂" else "\t"
py2, js = py.split(sep, 1)
py = ("\n\n" if .d[] else "") + py2 + sep + .分註(js)
py2, = py.split(sep, 1)
py = ("\n\n" if .d[] else "") + py2 + sep + .分註()
elif .簡稱 in ("部件檢索","字形描述"):
py = .正部件(py)
py = py.replace("\t", "\n")
if py not in .d[]:
.d[].append(py)
# passed = time() - start
# logging.info(f"({自.count:5d}({自.框數})-{自.聲韻調數:4d}-{自.聲韻數:4d}) {passed:6.3f} {自}")

def 加載(, dicts):
.()
Expand Down
7 changes: 7 additions & 0 deletions tools/tables/_跳跳老鼠.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,13 @@ def 析(自, 列):
elif in ("江華河路口", "江華粟米塘", "全州黃沙河", "安仁新洲", "1935長沙", "長沙黃花", "瀏陽鎭頭"):
聲韻, 調, = [:3]
= .normS()
elif in ("東海",):
= re.findall(r"^(.+?)\[(\d+)\][ ]*?(.+)$", "".join())
if not : return
聲韻, 調值, = [0]
調 = .僅轉調類(調值)
= .normG(, "〚\\1〛")
= .replace("*", "")
elif in ("孝昌小河",):
= re.findall(r"^(.+?)(\d+) ?(.+)$", [0])
if not : return
Expand Down
Loading

0 comments on commit 86dfad3

Please sign in to comment.