0103

osfans · Jan 3, 2025 · 38378e4 · 38378e4
1 parent 9d75d02
commit 38378e4
Show file tree

Hide file tree

Showing 122 changed files with 15,509 additions and 4,748 deletions.
diff --git a/tools/make.py b/tools/make.py
@@ -13,7 +13,7 @@
 
 dicts = defaultdict(dict)
 langs = getLangs(dicts, argv, 省=args.省)
-keys = [f"{lang}" for lang in langs]
+keys = [f"{lang.簡稱}" for lang in langs]
 fields = [f"`{i}`" for i in keys]
 CREATE = 'CREATE VIRTUAL TABLE mcpdict USING fts3 (%s)' % (",".join(fields))
 INSERT = 'INSERT INTO mcpdict VALUES (%s)'% (','.join('?' * len(keys)))

diff --git a/tools/tables/__init__.py b/tools/tables/__init__.py
@@ -61,18 +61,18 @@ def hex2chr(uni):
 def cjkorder(s):
 	return hzorders.get(s, [0x100, ord(s)])
 
-def isCompatible(c):
+def 爲兼容字(c):
 	n = ord(c)
 	return (0xF900 <= n < 0xFB00 and c not in '﨎﨏﨑﨓﨔﨟﨡﨣﨤﨧﨨﨩' or 0x2F800 <= n < 0x2FA20)
 
-def isHZ(c):
+def 爲字(c):
 	c = c.strip()
 	if len(c) != 1: return False
 	n = ord(c)
-	return 0x3400<=n<0xA000 or n in (0x25A1, 0x3007) or 0xF900<=n<0xFB00 or 0x20000<=n<=0x323AF and not isCompatible(c)
+	return 0x3400<=n<0xA000 or n in (0x25A1, 0x3007) or 0xF900<=n<0xFB00 or 0x20000<=n<=0x323AF and not 爲兼容字(c)
 
-def get_pinyin(word):
-	return pinyin(t2s(word), style=Style.TONE3, heteronym=False) if isHZ(word[0]) else [[word.lower()]]
+def 普拼(word):
+	return pinyin(t2s(word), style=Style.TONE3, heteronym=False) if 爲字(word[0]) else [[word.lower()]]
 
 def getSTVariants(level=2):
 	d = dict()
@@ -99,17 +99,17 @@ def s2t(字組, level=1):
 		t += 字
 	return t
 
-def addAllFq(d, fq, order,ignorePian = False):
+def addAllFq(d, fq, order,不加片 = False):
 	if order is None or fq is None: return
 	fqs = fq.split(",")[0].split("-")
 	for i in range(len(fqs)):
-		name = "-".join(fqs[0:i+1])
-		if not name: continue
-		if ignorePian and name.endswith("片"): continue
+		名 = "-".join(fqs[0:i+1])
+		if not 名: continue
+		if 不加片 and 名.endswith("片"): continue
 		order = "-".join(order.split("-")[0:i+1])
-		if name in d:
-			if d[name] < order: continue
-		d[name] = order
+		if 名 in d:
+			if d[名] < order: continue
+		d[名] = order
 
 def addCfFq(d, fq, order):
 	if fq is None: return
@@ -138,16 +138,16 @@ def getLangsByArgv(infos, argv):
 					break
 	return l
 
-def getLangs(dicts, argv, 省=None):
-	infos = tables._詳情.load(省)
-	langs = []
-	count = 0
-	if len(argv) == 1:
+def getLangs(dicts, 參數, 省=None):
+	詳情 = tables._詳情.加載(省)
+	語組 = []
+	數 = 0
+	if len(參數) == 1:
 		mods = ["漢字"]
-		mods.extend(getLangsByArgv(infos, argv))
+		mods.extend(getLangsByArgv(詳情, 參數))
 	else:
 		mods = 辭典.copy()
-		mods.extend(getLangsByArgv(infos, argv) if argv else infos.keys())
+		mods.extend(getLangsByArgv(詳情, 參數) if 參數 else 詳情.keys())
 		mods.extend(形碼)
 	types = [dict(),dict(),dict()]
 	省 = defaultdict(int)
@@ -156,15 +156,15 @@ def getLangs(dicts, argv, 省=None):
 	keys = None
 	t = open("warnings.txt", "w", encoding="U16")
 	for mod in mods:
-		if mod in infos:
-			d = infos[mod]
+		if mod in 詳情:
+			d = 詳情[mod]
 			try:
 				if d["文件格式"]:
 					語 = import_module(f'tables._{d["文件格式"]}').表()
 					語.setmod(mod)
 				else:
 					語 = import_module(f"tables.{mod}").表()
-				if not 語._file: 語._file = d["文件名"]
+				if not 語.文件名: 語.文件名 = d["文件名"]
 			except Exception as e:
 				print(f"\t\t\t{e} {mod}")
 				continue
@@ -177,58 +177,58 @@ def getLangs(dicts, argv, 省=None):
 			addAllFq(types[1], d["音典分區"], d["音典排序"])
 			addCfFq(types[2], d["陳邡分區"], d["陳邡排序"])
 			if d["聲調"]:
-				toneMaps = dict()
-				sds = json.loads(d["聲調"])
-				for i in sds:
-					dz = sds[i][0]
-					if dz in toneMaps and "入" in sds[i][3]:
-						dz += "0"
-					toneMaps[dz] = i
-				語.toneMaps = toneMaps
+				調典 = dict()
+				調組 = json.loads(d["聲調"])
+				for 調 in 調組:
+					調值 = 調組[調][0]
+					if 調值 in 調典 and "入" in 調組[調][3]:
+						調值 += "0"
+					調典[調值] = 調
+				語.調典 = 調典
 			語.info = d
-			語.load(dicts)
+			語.加載(dicts)
 			if d["文件名"] != "mcpdict.db":
-				if 語.count == 0: continue
-				if 語.count < 900:
-					print(f"{語} 字數太少: {語.count}")
+				if 語.字數 == 0: continue
+				if 語.字數 < 900:
+					print(f"{語} 字數太少: {語.字數}")
 				elif 語.聲韻數 < 100:
 					print(f"{語} 音節太少: {語.聲韻數}")
-			if not d["無調"] and not toneMaps:
+			if not d["無調"] and not 調典:
 				print(f"{語} 無調值")
-			語.info["文件名"] = 語._file
+			語.info["文件名"] = 語.文件名
 			if d["省"]:
 				省[d["省"]] += 1
 			if d["推薦人"]:
-				for i in d["推薦人"].split(","):
-					i = i.strip()
-					if i:
-						推薦人[i] += 1
+				for 人 in d["推薦人"].split(","):
+					人 = 人.strip()
+					if 人:
+						推薦人[人] += 1
 			editors = [set(d[i].split(",")) for i in ("作者", "錄入人", "維護人") if d[i]]
 			editor = set()
-			for i in editors:
-				editor.update(i)
-			for i in editor:
-				i = re.sub("（.*?）", "", i).strip()
-				if i:
-					維護人[i] += 1
-			count += 1
-			if 語.錯誤:
+			for 人 in editors:
+				editor.update(人)
+			for 人 in editor:
+				人 = re.sub("（.*?）", "", 人).strip()
+				if 人:
+					維護人[人] += 1
+			數 += 1
+			if 語.誤:
 				all_editors = ",".join(editor)
 				語.全稱 = 語.info["語言"]
-				print(f"{語.全稱}（{語}）-{語._file}-{all_editors}", file=t)
-				for i in 語.錯誤:
-					print(f"\t{i}", file=t)
+				print(f"{語.全稱}（{語}）-{語.文件名}-{all_editors}", file=t)
+				for 調 in 語.誤:
+					print(f"\t{調}", file=t)
 		else:
 			語 = import_module(f"tables.{mod}").表()
 			d = dict()
 			d["語言"] = 語.全稱 if 語.全稱 else mod
 			d["簡稱"] = 語.簡稱 if 語.簡稱 else mod
-			d["地圖集二顏色"] = 語.顏色 if count == 0 else None
+			d["地圖集二顏色"] = 語.顏色 if 數 == 0 else None
 			d["地圖集二分區"] = None
 			語.info = d
-			語.load(dicts)
-		語.info["字數"] = 語.count
-		語.info["□數"] = 語.unknownCount if 語.unknownCount else None
+			語.加載(dicts)
+		語.info["字數"] = 語.字數
+		語.info["□數"] = 語.框數 if 語.框數 else None
 		聲韻調數 = 語.聲韻調數
 		聲韻數 = 語.聲韻數
 		語.info["音節數"] = 聲韻調數 if 聲韻調數 else None
@@ -245,23 +245,23 @@ def getLangs(dicts, argv, 省=None):
 		語.info["語言索引"] = lang_t
 		if 語.說明: 語.info["說明"] = 語.說明
 		if not keys: keys = 語.info.keys()
-		langs.append(語)
+		語組.append(語)
 	t.close()
-	字 = langs[0]
-	for i in keys:
-		if i not in 字.info: 字.info[i] = None
+	字 = 語組[0]
+	for 項 in keys:
+		if 項 not in 字.info: 字.info[項] = None
 	字.info["字數"] = len(dicts)
-	字.info["說明"] = "語言數：%d<br><br>%s"%(count, 字.說明)
-	省表 = sorted(省_set, key=get_pinyin)
+	字.info["說明"] = "語言數：%d<br><br>%s"%(數, 字.說明)
+	省表 = sorted(省_set, key=普拼)
 	if "海外" in 省表:
 		省表.remove("海外")
 		省表.append("海外")
 	字.info["省"] = ",".join([f"{i} ({省[i]})" for i in 省表])
-	字.info["維護人"] = ",".join([f"{i} ({維護人[i]})" for i in sorted(維護人.keys(), key=get_pinyin)])
-	字.info["推薦人"] = ",".join([f"{i} ({推薦人[i]})" for i in sorted(推薦人.keys(), key=get_pinyin)])
+	字.info["維護人"] = ",".join([f"{i} ({維護人[i]})" for i in sorted(維護人.keys(), key=普拼)])
+	字.info["推薦人"] = ",".join([f"{i} ({推薦人[i]})" for i in sorted(推薦人.keys(), key=普拼)])
 	字.info["地圖集二分區"] = ",".join(sorted(types[0].keys(),key=lambda x:types[0][x]))
 	字.info["音典分區"] = ",".join(sorted(types[1].keys(),key=lambda x:types[1][x]))
 	字.info["陳邡分區"] = ",".join(sorted(types[2].keys(),key=lambda x:types[2][x]))
 	字.info["版本"] = datetime.datetime.now().strftime("%Y-%m-%d")
-	print("語言數", count)
-	return langs
+	print("語言數", 數)
+	return 語組
diff --git a/tools/tables/_五筆.py b/tools/tables/_五筆.py
@@ -3,7 +3,7 @@
 from tables._表 import 表 as _表
 
 class 表(_表):
-	_file = "wb.csv"
+	文件名 = "wb.csv"
 	說明 = "來源：<a href=https://github.com/CNMan/UnicodeCJK-WuBi>五筆字型Unicode CJK超大字符集編碼數據庫</a>、<a href=https://github.com/yanhuacuo/98wubi-unicode>98五筆超大字符集碼表</a><br>說明：12345分別代表橫豎撇捺折，可以輸入“12345”查到“札”。也可以輸入五筆字型的編碼查詢漢字，比如輸入“snn”查詢“扎”。"
 	index = 5
 

diff --git a/tools/tables/_化州.py b/tools/tables/_化州.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python3
+
+from tables._表 import 表 as _表
+
+class 表(_表):
+	sms = {"b#":"p",
+"p#":"pʰ",
+"bb#":"ɓ",
+"m#":"m",
+"f#":"f",
+"d#":"t",
+"t#":"tʰ",
+"dd#":"ɗ",
+"n#":"n",
+"l#":"l",
+"z#":"ts",
+"c#":"tsʰ",
+"s#":"s",
+"sl#":"ɬ",
+"g#":"k",
+"k#":"kʰ",
+"ng#":"ŋ",
+"gw#":"kʋ",
+"kw#":"kʰʋ",
+"ngw#":"ŋʋ",
+"h#":"h",
+"w#":"ʋ",
+"j#":"j",
+"nj#":"ȵ",
+"#":"ʔ"}
+	yms = {"aa":"aː",
+"aai":"aːj",
+"aau":"aːw",
+"aan":"aːn",
+"aang":"aːŋ",
+"aam":"aːm",
+"aat":"at̚",
+"aak":"ak̚",
+"aap":"ap̚",
+"ai":"ɐj",
+"au":"ɐw",
+"an":"ɐn",
+"ang":"ɐŋ",
+"am":"ɐm",
+"at":"ɐt̚",
+"ak":"ɐk̚",
+"ap":"ɐp̚",
+"e":"ɛ",
+"ei":"ɛj",
+"eu":"e̯ɛw",
+"en":"e̯ɛn",
+"eng":"e̯ɛŋ",
+"em":"e̯ɛm",
+"et":"e̯ɛt̚",
+"ek":"e̯ɛk̚",
+"ep":"e̯ɛp̚",
+"i":"i",
+"iu":"iw",
+"in":"in",
+"ing":"iŋ",
+"im":"im",
+"it":"ɪt̚",
+"ik":"ɪk̚",
+"ip":"ɪp̚",
+"o":"ɔ",
+"oi":"u̯ɔj",
+"eoi":"ɵj",
+"ou":"u̯ɔw",
+"ooau":"u̯ɔɒw",
+"on":"u̯ɔn",
+"ong":"u̯ɔŋʷ",
+"ot":"u̯ɔt̚",
+"ok":"u̯ɔk̚",
+"u":"ʋ̩",
+"ui":"ʋ̩j",
+"un":"un",
+"ung":"uŋʷ",
+"ut":"ʊt̚",
+"uk":"ʊk̚",
+"ng":"ŋ̍",
+"m":"m̩",
+"n":"n̩",
+"":""
+}
+	列序 = {"化州下江":1, "化州上江":2}
+
+	def 析(自, 列):
+		字, yb, js = 列[0], 列[自.列序[自.簡稱]], 列[3]
+		if not yb: return
+		yb = yb.lstrip("又").replace("gv", "gw")
+		yb, sd = 自.分音(yb)
+		if sd.isdigit():
+			if yb[-1] in "ptk":
+				if sd == "1": sd = "7"
+				elif sd == "3": sd = "8"
+				elif sd == "6": sd = "9"
+		else:
+			sd = ""
+		for ym in sorted(自.yms.keys(), key=lambda x:-len(x)):
+			if yb.endswith(ym):
+				sm = yb[:-len(ym)]
+				break
+		yb = 自.sms[sm+"#"] + 自.yms[ym] + sd
+		return 字, yb, js
diff --git a/tools/tables/_數據庫.py b/tools/tables/_數據庫.py
@@ -5,7 +5,7 @@
 from tables._表 import 表 as _表
 
 class 表(_表):
-	_file = "mcpdict.db"
+	文件名 = "mcpdict.db"
 	爲音 = False
 
 	def 統(自, 行):