Skip to content

Commit

Permalink
0103
Browse files Browse the repository at this point in the history
  • Loading branch information
osfans committed Jan 3, 2025
1 parent 9d75d02 commit 38378e4
Show file tree
Hide file tree
Showing 122 changed files with 15,509 additions and 4,748 deletions.
2 changes: 1 addition & 1 deletion tools/make.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

dicts = defaultdict(dict)
langs = getLangs(dicts, argv, =args.)
keys = [f"{lang}" for lang in langs]
keys = [f"{lang.簡稱}" for lang in langs]
fields = [f"`{i}`" for i in keys]
CREATE = 'CREATE VIRTUAL TABLE mcpdict USING fts3 (%s)' % (",".join(fields))
INSERT = 'INSERT INTO mcpdict VALUES (%s)'% (','.join('?' * len(keys)))
Expand Down
130 changes: 65 additions & 65 deletions tools/tables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,18 @@ def hex2chr(uni):
def cjkorder(s):
return hzorders.get(s, [0x100, ord(s)])

def isCompatible(c):
def 爲兼容字(c):
n = ord(c)
return (0xF900 <= n < 0xFB00 and c not in '﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩' or 0x2F800 <= n < 0x2FA20)

def isHZ(c):
def 爲字(c):
c = c.strip()
if len(c) != 1: return False
n = ord(c)
return 0x3400<=n<0xA000 or n in (0x25A1, 0x3007) or 0xF900<=n<0xFB00 or 0x20000<=n<=0x323AF and not isCompatible(c)
return 0x3400<=n<0xA000 or n in (0x25A1, 0x3007) or 0xF900<=n<0xFB00 or 0x20000<=n<=0x323AF and not 爲兼容字(c)

def get_pinyin(word):
return pinyin(t2s(word), style=Style.TONE3, heteronym=False) if isHZ(word[0]) else [[word.lower()]]
def 普拼(word):
return pinyin(t2s(word), style=Style.TONE3, heteronym=False) if 爲字(word[0]) else [[word.lower()]]

def getSTVariants(level=2):
d = dict()
Expand All @@ -99,17 +99,17 @@ def s2t(字組, level=1):
t +=
return t

def addAllFq(d, fq, order,ignorePian = False):
def addAllFq(d, fq, order,不加片 = False):
if order is None or fq is None: return
fqs = fq.split(",")[0].split("-")
for i in range(len(fqs)):
name = "-".join(fqs[0:i+1])
if not name: continue
if ignorePian and name.endswith("片"): continue
= "-".join(fqs[0:i+1])
if not : continue
if 不加片 and .endswith("片"): continue
order = "-".join(order.split("-")[0:i+1])
if name in d:
if d[name] < order: continue
d[name] = order
if in d:
if d[] < order: continue
d[] = order

def addCfFq(d, fq, order):
if fq is None: return
Expand Down Expand Up @@ -138,16 +138,16 @@ def getLangsByArgv(infos, argv):
break
return l

def getLangs(dicts, argv, =None):
infos = tables._詳情.load()
langs = []
count = 0
if len(argv) == 1:
def getLangs(dicts, 參數, =None):
詳情 = tables._詳情.加載()
語組 = []
= 0
if len(參數) == 1:
mods = ["漢字"]
mods.extend(getLangsByArgv(infos, argv))
mods.extend(getLangsByArgv(詳情, 參數))
else:
mods = 辭典.copy()
mods.extend(getLangsByArgv(infos, argv) if argv else infos.keys())
mods.extend(getLangsByArgv(詳情, 參數) if 參數 else 詳情.keys())
mods.extend(形碼)
types = [dict(),dict(),dict()]
= defaultdict(int)
Expand All @@ -156,15 +156,15 @@ def getLangs(dicts, argv, 省=None):
keys = None
t = open("warnings.txt", "w", encoding="U16")
for mod in mods:
if mod in infos:
d = infos[mod]
if mod in 詳情:
d = 詳情[mod]
try:
if d["文件格式"]:
= import_module(f'tables._{d["文件格式"]}').()
.setmod(mod)
else:
= import_module(f"tables.{mod}").()
if not ._file: ._file = d["文件名"]
if not .文件名: .文件名 = d["文件名"]
except Exception as e:
print(f"\t\t\t{e} {mod}")
continue
Expand All @@ -177,58 +177,58 @@ def getLangs(dicts, argv, 省=None):
addAllFq(types[1], d["音典分區"], d["音典排序"])
addCfFq(types[2], d["陳邡分區"], d["陳邡排序"])
if d["聲調"]:
toneMaps = dict()
sds = json.loads(d["聲調"])
for i in sds:
dz = sds[i][0]
if dz in toneMaps and "入" in sds[i][3]:
dz += "0"
toneMaps[dz] = i
.toneMaps = toneMaps
調典 = dict()
調組 = json.loads(d["聲調"])
for 調 in 調組:
調值 = 調組[調][0]
if 調值 in 調典 and "入" in 調組[調][3]:
調值 += "0"
調典[調值] = 調
.調典 = 調典
.info = d
.load(dicts)
.加載(dicts)
if d["文件名"] != "mcpdict.db":
if .count == 0: continue
if .count < 900:
print(f"{} 字數太少: {.count}")
if .字數 == 0: continue
if .字數 < 900:
print(f"{} 字數太少: {.字數}")
elif .聲韻數 < 100:
print(f"{} 音節太少: {.聲韻數}")
if not d["無調"] and not toneMaps:
if not d["無調"] and not 調典:
print(f"{} 無調值")
.info["文件名"] = ._file
.info["文件名"] = .文件名
if d["省"]:
[d["省"]] += 1
if d["推薦人"]:
for i in d["推薦人"].split(","):
i = i.strip()
if i:
推薦人[i] += 1
for in d["推薦人"].split(","):
= .strip()
if :
推薦人[] += 1
editors = [set(d[i].split(",")) for i in ("作者", "錄入人", "維護人") if d[i]]
editor = set()
for i in editors:
editor.update(i)
for i in editor:
i = re.sub("(.*?)", "", i).strip()
if i:
維護人[i] += 1
count += 1
if .錯誤:
for in editors:
editor.update()
for in editor:
= re.sub("(.*?)", "", ).strip()
if :
維護人[] += 1
+= 1
if .:
all_editors = ",".join(editor)
.全稱 = .info["語言"]
print(f"{.全稱}{})-{._file}-{all_editors}", file=t)
for i in .錯誤:
print(f"\t{i}", file=t)
print(f"{.全稱}{})-{.文件名}-{all_editors}", file=t)
for 調 in .:
print(f"\t{調}", file=t)
else:
= import_module(f"tables.{mod}").()
d = dict()
d["語言"] = .全稱 if .全稱 else mod
d["簡稱"] = .簡稱 if .簡稱 else mod
d["地圖集二顏色"] = .顏色 if count == 0 else None
d["地圖集二顏色"] = .顏色 if == 0 else None
d["地圖集二分區"] = None
.info = d
.load(dicts)
.info["字數"] = .count
.info["□數"] = .unknownCount if .unknownCount else None
.加載(dicts)
.info["字數"] = .字數
.info["□數"] = .框數 if .框數 else None
聲韻調數 = .聲韻調數
聲韻數 = .聲韻數
.info["音節數"] = 聲韻調數 if 聲韻調數 else None
Expand All @@ -245,23 +245,23 @@ def getLangs(dicts, argv, 省=None):
.info["語言索引"] = lang_t
if .說明: .info["說明"] = .說明
if not keys: keys = .info.keys()
langs.append()
語組.append()
t.close()
= langs[0]
for i in keys:
if i not in .info: .info[i] = None
= 語組[0]
for in keys:
if not in .info: .info[] = None
.info["字數"] = len(dicts)
.info["說明"] = "語言數:%d<br><br>%s"%(count, .說明)
省表 = sorted(省_set, key=get_pinyin)
.info["說明"] = "語言數:%d<br><br>%s"%(, .說明)
省表 = sorted(省_set, key=普拼)
if "海外" in 省表:
省表.remove("海外")
省表.append("海外")
.info["省"] = ",".join([f"{i} ({[i]})" for i in 省表])
.info["維護人"] = ",".join([f"{i} ({維護人[i]})" for i in sorted(維護人.keys(), key=get_pinyin)])
.info["推薦人"] = ",".join([f"{i} ({推薦人[i]})" for i in sorted(推薦人.keys(), key=get_pinyin)])
.info["維護人"] = ",".join([f"{i} ({維護人[i]})" for i in sorted(維護人.keys(), key=普拼)])
.info["推薦人"] = ",".join([f"{i} ({推薦人[i]})" for i in sorted(推薦人.keys(), key=普拼)])
.info["地圖集二分區"] = ",".join(sorted(types[0].keys(),key=lambda x:types[0][x]))
.info["音典分區"] = ",".join(sorted(types[1].keys(),key=lambda x:types[1][x]))
.info["陳邡分區"] = ",".join(sorted(types[2].keys(),key=lambda x:types[2][x]))
.info["版本"] = datetime.datetime.now().strftime("%Y-%m-%d")
print("語言數", count)
return langs
print("語言數", )
return 語組
2 changes: 1 addition & 1 deletion tools/tables/_五筆.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from tables._表 import as _表

class (_表):
_file = "wb.csv"
文件名 = "wb.csv"
說明 = "來源:<a href=https://github.com/CNMan/UnicodeCJK-WuBi>五筆字型Unicode CJK超大字符集編碼數據庫</a>、<a href=https://github.com/yanhuacuo/98wubi-unicode>98五筆超大字符集碼表</a><br>說明:12345分別代表橫豎撇捺折,可以輸入“12345”查到“札”。也可以輸入五筆字型的編碼查詢漢字,比如輸入“snn”查詢“扎”。"
index = 5

Expand Down
104 changes: 104 additions & 0 deletions tools/tables/_化州.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#!/usr/bin/env python3

from tables._表 import as _表

class (_表):
sms = {"b#":"p",
"p#":"pʰ",
"bb#":"ɓ",
"m#":"m",
"f#":"f",
"d#":"t",
"t#":"tʰ",
"dd#":"ɗ",
"n#":"n",
"l#":"l",
"z#":"ts",
"c#":"tsʰ",
"s#":"s",
"sl#":"ɬ",
"g#":"k",
"k#":"kʰ",
"ng#":"ŋ",
"gw#":"kʋ",
"kw#":"kʰʋ",
"ngw#":"ŋʋ",
"h#":"h",
"w#":"ʋ",
"j#":"j",
"nj#":"ȵ",
"#":"ʔ"}
yms = {"aa":"aː",
"aai":"aːj",
"aau":"aːw",
"aan":"aːn",
"aang":"aːŋ",
"aam":"aːm",
"aat":"at̚",
"aak":"ak̚",
"aap":"ap̚",
"ai":"ɐj",
"au":"ɐw",
"an":"ɐn",
"ang":"ɐŋ",
"am":"ɐm",
"at":"ɐt̚",
"ak":"ɐk̚",
"ap":"ɐp̚",
"e":"ɛ",
"ei":"ɛj",
"eu":"e̯ɛw",
"en":"e̯ɛn",
"eng":"e̯ɛŋ",
"em":"e̯ɛm",
"et":"e̯ɛt̚",
"ek":"e̯ɛk̚",
"ep":"e̯ɛp̚",
"i":"i",
"iu":"iw",
"in":"in",
"ing":"iŋ",
"im":"im",
"it":"ɪt̚",
"ik":"ɪk̚",
"ip":"ɪp̚",
"o":"ɔ",
"oi":"u̯ɔj",
"eoi":"ɵj",
"ou":"u̯ɔw",
"ooau":"u̯ɔɒw",
"on":"u̯ɔn",
"ong":"u̯ɔŋʷ",
"ot":"u̯ɔt̚",
"ok":"u̯ɔk̚",
"u":"ʋ̩",
"ui":"ʋ̩j",
"un":"un",
"ung":"uŋʷ",
"ut":"ʊt̚",
"uk":"ʊk̚",
"ng":"ŋ̍",
"m":"m̩",
"n":"n̩",
"":""
}
列序 = {"化州下江":1, "化州上江":2}

def (, ):
, yb, js = [0], [.列序[.簡稱]], [3]
if not yb: return
yb = yb.lstrip("又").replace("gv", "gw")
yb, sd = .分音(yb)
if sd.isdigit():
if yb[-1] in "ptk":
if sd == "1": sd = "7"
elif sd == "3": sd = "8"
elif sd == "6": sd = "9"
else:
sd = ""
for ym in sorted(.yms.keys(), key=lambda x:-len(x)):
if yb.endswith(ym):
sm = yb[:-len(ym)]
break
yb = .sms[sm+"#"] + .yms[ym] + sd
return , yb, js
2 changes: 1 addition & 1 deletion tools/tables/_數據庫.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from tables._表 import as _表

class (_表):
_file = "mcpdict.db"
文件名 = "mcpdict.db"
爲音 = False

def (, ):
Expand Down
Loading

0 comments on commit 38378e4

Please sign in to comment.