Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
osfans committed Dec 31, 2024
1 parent cff6dfc commit c574314
Show file tree
Hide file tree
Showing 71 changed files with 10,030 additions and 5,083 deletions.
24 changes: 12 additions & 12 deletions tools/tables/1796建甌.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,20 @@ class 表(_表):
sms = None
ym = None

def format(self, line):
return line.lstrip("#")
def (, ):
return .lstrip("#")

def parse(self, fs):
if not self.sms:
self.sms = fs
def (, ):
if not .sms:
.sms =
return
if fs[0]:
self.ym = fs[0]
sd = fs[1]
if [0]:
.ym = [0]
sd = [1]
l = list()
for i,sm in enumerate(self.sms):
for i,sm in enumerate(.sms):
if not sm: continue
yb = sm + self.ym + sd
for hz in fs[i]:
l.append((hz, yb))
yb = sm + .ym + sd
for in [i]:
l.append((, yb))
return l
156 changes: 78 additions & 78 deletions tools/tables/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,12 @@
n2o_dict = {}
o2n_dict = {}

for line in open("tables/data/mulcodechar.dt", encoding="U8"):
if not line or line[0] == "#": continue
fs = line.strip().split("-")
if len(fs) < 2: continue
n2o_dict[fs[0]] = fs[1]
o2n_dict[fs[1]] = fs[0]
for in open("tables/data/mulcodechar.dt", encoding="U8"):
if not or [0] == "#": continue
= .strip().split("-")
if len() < 2: continue
n2o_dict[[0]] = [1]
o2n_dict[[1]] = [0]

opencc_t2s = OpenCC("t2s.json")

Expand Down Expand Up @@ -76,27 +76,27 @@ def get_pinyin(word):

def getSTVariants(level=2):
d = dict()
for line in open(VARIANT_FILE,encoding="U8"):
if line.startswith("#"): continue
fs = line.strip().split("\t")
if level == 1 and "#" in line:
for in open(VARIANT_FILE,encoding="U8"):
if .startswith("#"): continue
= .strip().split("\t")
if level == 1 and "#" in :
continue
fs[1] = fs[1].split("#")[0].strip()
if " " not in fs[1]:
d[fs[0]] = fs[1]
[1] = [1].split("#")[0].strip()
if " " not in [1]:
d[[0]] = [1]
return d

normVariants = getSTVariants(1)
stVariants = getSTVariants(2)

def s2t(hzs, level=1):
def s2t(字組, level=1):
t = ""
for hz in hzs:
for in 字組:
if level == 1:
hz = normVariants.get(hz, hz)
= normVariants.get(, )
else:
hz = stVariants.get(hz, hz)
t += hz
= stVariants.get(, )
t +=
return t

def addAllFq(d, fq, order,ignorePian = False):
Expand All @@ -113,17 +113,17 @@ def addAllFq(d, fq, order,ignorePian = False):

def addCfFq(d, fq, order):
if fq is None: return
fs = fq.split(",")
fqs = fs[0].split("-")
= fq.split(",")
fqs = [0].split("-")
for i in range(len(fqs)):
name = "-".join(fqs[0:i+1])
if not name: continue
order = "-".join(order.split("-")[0:i+1])
if name in d:
if d[name] < order: continue
d[name] = order
if len(fs) >= 2:
d[fs[1]] = ""
if len() >= 2:
d[[1]] = ""

def getLangsByArgv(infos, argv):
l = []
Expand Down Expand Up @@ -160,42 +160,42 @@ def getLangs(dicts, argv, 省=None):
d = infos[mod]
try:
if d["文件格式"]:
lang = import_module(f'tables._{d["文件格式"]}').()
lang.setmod(mod)
= import_module(f'tables._{d["文件格式"]}').()
.setmod(mod)
else:
lang = import_module(f"tables.{mod}").()
if not lang._file: lang._file = d["文件名"]
= import_module(f"tables.{mod}").()
if not ._file: ._file = d["文件名"]
except Exception as e:
print(f"\t\t\t{e} {mod}")
continue
if "繁" not in d["繁簡"]: lang.simplified = 2
if "繁" not in d["繁簡"]: .simplified = 2
if d["地圖集二分區"] == None: d["地圖集二分區"] = ""
if "聯表列名" in d:
a = d["聯表列名"].upper()
lang.ybIndex = sum([26**(len(a)-1-i)*(ord(j)-ord('A')+1) for i,j in enumerate(a)]) - 1
.音列 = sum([26**(len(a)-1-i)*(ord(j)-ord('A')+1) for i,j in enumerate(a)]) - 1
addAllFq(types[0], d["地圖集二分區"], d["地圖集二排序"])
addAllFq(types[1], d["音典分區"], d["音典排序"])
addCfFq(types[2], d["陳邡分區"], d["陳邡排序"])
if d["聲調"]:
toneMaps = dict()
sds = json.loads(d["聲調"])
for i in sds:
tv = sds[i][0]
if tv in toneMaps and "入" in sds[i][3]:
tv += "0"
toneMaps[tv] = i
lang.toneMaps = toneMaps
lang.info = d
lang.load(dicts)
dz = sds[i][0]
if dz in toneMaps and "入" in sds[i][3]:
dz += "0"
toneMaps[dz] = i
.toneMaps = toneMaps
.info = d
.load(dicts)
if d["文件名"] != "mcpdict.db":
if lang.count == 0: continue
if lang.count < 900:
print(f"{lang} 字數太少: {lang.count}")
elif lang.syCount < 100:
print(f"{lang} 音節太少: {lang.syCount}")
if .count == 0: continue
if .count < 900:
print(f"{} 字數太少: {.count}")
elif .聲韻數 < 100:
print(f"{} 音節太少: {.聲韻數}")
if not d["無調"] and not toneMaps:
print(f"{lang} 無調值")
lang.info["文件名"] = lang._file
print(f"{} 無調值")
.info["文件名"] = ._file
if d["省"]:
[d["省"]] += 1
if d["推薦人"]:
Expand All @@ -212,56 +212,56 @@ def getLangs(dicts, argv, 省=None):
if i:
維護人[i] += 1
count += 1
if lang.errors:
if .錯誤:
all_editors = ",".join(editor)
lang.full = lang.info["語言"]
print(f"{lang.full}{lang})-{lang._file}-{all_editors}", file=t)
for i in lang.errors:
.全稱 = .info["語言"]
print(f"{.全稱}{})-{._file}-{all_editors}", file=t)
for i in .錯誤:
print(f"\t{i}", file=t)
else:
lang = import_module(f"tables.{mod}").()
= import_module(f"tables.{mod}").()
d = dict()
d["語言"] = lang.full if lang.full else mod
d["簡稱"] = lang.short if lang.short else mod
d["地圖集二顏色"] = lang.color if count == 0 else None
d["語言"] = .全稱 if .全稱 else mod
d["簡稱"] = .簡稱 if .簡稱 else mod
d["地圖集二顏色"] = .顏色 if count == 0 else None
d["地圖集二分區"] = None
lang.info = d
lang.load(dicts)
lang.info["字數"] = lang.count
lang.info["□數"] = lang.unknownCount if lang.unknownCount else None
sydCount = lang.sydCount
syCount = lang.syCount
lang.info["音節數"] = sydCount if sydCount else None
lang.info["不帶調音節數"] = syCount if syCount and syCount != sydCount else None
lang.info["網站"] = lang.site
lang.info["網址"] = lang.url
lang_t = lang.info["語言"]
lang_s = t2s(lang.info["語言"], 2)
.info = d
.load(dicts)
.info["字數"] = .count
.info["□數"] = .unknownCount if .unknownCount else None
聲韻調數 = .聲韻調數
聲韻數 = .聲韻數
.info["音節數"] = 聲韻調數 if 聲韻調數 else None
.info["不帶調音節數"] = 聲韻數 if 聲韻數 and 聲韻數 != 聲韻調數 else None
.info["網站"] = .網站
.info["網址"] = .網址
lang_t = .info["語言"]
lang_s = t2s(.info["語言"], 2)
if lang_s not in lang_t:
lang_t += f",{lang_s}"
lang_s = t2s(lang.info["語言"], 1)
lang_s = t2s(.info["語言"], 1)
if lang_s not in lang_t:
lang_t += f",{lang_s}"
lang.info["語言索引"] = lang_t
if lang.note: lang.info["說明"] = lang.note
if not keys: keys = lang.info.keys()
langs.append(lang)
.info["語言索引"] = lang_t
if .說明: .info["說明"] = .說明
if not keys: keys = .info.keys()
langs.append()
t.close()
hz = langs[0]
= langs[0]
for i in keys:
if i not in hz.info: hz.info[i] = None
hz.info["字數"] = len(dicts)
hz.info["說明"] = "語言數:%d<br><br>%s"%(count, hz.note)
if i not in .info: .info[i] = None
.info["字數"] = len(dicts)
.info["說明"] = "語言數:%d<br><br>%s"%(count, .說明)
省表 = sorted(省_set, key=get_pinyin)
if "海外" in 省表:
省表.remove("海外")
省表.append("海外")
hz.info["省"] = ",".join([f"{i} ({[i]})" for i in 省表])
hz.info["維護人"] = ",".join([f"{i} ({維護人[i]})" for i in sorted(維護人.keys(), key=get_pinyin)])
hz.info["推薦人"] = ",".join([f"{i} ({推薦人[i]})" for i in sorted(推薦人.keys(), key=get_pinyin)])
hz.info["地圖集二分區"] = ",".join(sorted(types[0].keys(),key=lambda x:types[0][x]))
hz.info["音典分區"] = ",".join(sorted(types[1].keys(),key=lambda x:types[1][x]))
hz.info["陳邡分區"] = ",".join(sorted(types[2].keys(),key=lambda x:types[2][x]))
hz.info["版本"] = datetime.datetime.now().strftime("%Y-%m-%d")
.info["省"] = ",".join([f"{i} ({[i]})" for i in 省表])
.info["維護人"] = ",".join([f"{i} ({維護人[i]})" for i in sorted(維護人.keys(), key=get_pinyin)])
.info["推薦人"] = ",".join([f"{i} ({推薦人[i]})" for i in sorted(推薦人.keys(), key=get_pinyin)])
.info["地圖集二分區"] = ",".join(sorted(types[0].keys(),key=lambda x:types[0][x]))
.info["音典分區"] = ",".join(sorted(types[1].keys(),key=lambda x:types[1][x]))
.info["陳邡分區"] = ",".join(sorted(types[2].keys(),key=lambda x:types[2][x]))
.info["版本"] = datetime.datetime.now().strftime("%Y-%m-%d")
print("語言數", count)
return langs
10 changes: 5 additions & 5 deletions tools/tables/_五筆.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@

class (_表):
_file = "wb.csv"
note = "來源:<a href=https://github.com/CNMan/UnicodeCJK-WuBi>五筆字型Unicode CJK超大字符集編碼數據庫</a>、<a href=https://github.com/yanhuacuo/98wubi-unicode>98五筆超大字符集碼表</a><br>說明:12345分別代表橫豎撇捺折,可以輸入“12345”查到“札”。也可以輸入五筆字型的編碼查詢漢字,比如輸入“snn”查詢“扎”。"
說明 = "來源:<a href=https://github.com/CNMan/UnicodeCJK-WuBi>五筆字型Unicode CJK超大字符集編碼數據庫</a>、<a href=https://github.com/yanhuacuo/98wubi-unicode>98五筆超大字符集碼表</a><br>說明:12345分別代表橫豎撇捺折,可以輸入“12345”查到“札”。也可以輸入五筆字型的編碼查詢漢字,比如輸入“snn”查詢“扎”。"
index = 5

def parse(self, fs):
hz = fs[1]
wb = fs[self.index]
return hz, wb
def (, ):
= [1]
wb = [.index]
return , wb
22 changes: 11 additions & 11 deletions tools/tables/_數據庫.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,26 @@

class (_表):
_file = "mcpdict.db"
isYb = False
爲音 = False

def format(self, line):
line = line.replace("|", "`").replace("*", "**")
return line
def (, ):
= .replace("|", "`").replace("*", "**")
return

def update(self):
def 更新():
d = defaultdict(list)
conn = sqlite3.connect(self.spath)
conn = sqlite3.connect(.spath)
conn.row_factory = sqlite3.Row
c = conn.cursor()
for r in c.execute('SELECT * FROM mcpdict'):
hz = chr(int(r["unicode"],16))
pys = r[self.dbkey]
= chr(int(r["unicode"],16))
pys = r[.]
if not pys: continue
pys = re.sub(r"\[\d\]", ",",pys).strip(",")
for py in pys.split(","):
py = py.strip()
if not py: continue
yb = self.format(py)
d[hz].append(yb)
yb = .(py)
d[].append(yb)
conn.close()
self.write(d)
.(d)
Loading

0 comments on commit c574314

Please sign in to comment.