forked from szcf-weiya/ESL-CN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gentag.py
61 lines (56 loc) · 1.99 KB
/
gentag.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import re
import glob
# ([\u4e00-\u9fa5]+): chinese translation
# (\b[a-zA-Z ]+\b): original
pat = re.compile(r"\*\*([\u4e00-\u9fa5\-a-zA-Z]+)\s?\((\b[a-zA-Z ,\-]+\b)\)\*\*")
tags = [[] for i in range(26)]
docsdir = os.listdir("docs/")
tagdict = dict()
for i in range(1, 27):
# get the idx of the child directory
for idx, x in enumerate(docsdir):
if f'{i:02}' in x:
break
chdir = docsdir[idx]
for file in glob.glob(f"docs/{chdir}/*.md"):
print(f"processing {file}...")
fl = open(file, "rt")
contents = fl.read()
fl.close()
mat = pat.findall(contents)
# get unique elements
mat = list(set(mat))
print(mat)
if mat:
secid = file.split(f'docs/{chdir}/')[1].split('-')[0]
url = file.split('docs/')[1].replace('.md', '/index.html')
for m in mat:
key = f'{m[1]}: {m[0]}'
if secid == "Bibliographic":
val = f'[第 {i} 章文献笔记]({url})'
else:
val = f'[第 {secid} 节]({url})'
try:
tagdict[key].append(val)
except:
tagdict[key] = [val]
# val = f"- [{m[1]}: {m[0]}]({url})"
# get the first character
# tags[ord(m[1][0].upper()) - ord('A')].append(val)
# rearrange
for k in tagdict.keys():
v = tagdict[k]
val = "- " + k + " (" + ', '.join(v) + ")"
tags[ord(k[0].upper()) - ord('A')].append(val)
# write into a tag file
tagpage = open("docs/tag.md", "w")
letters = [chr(i+ord('A')) for i in range(26)]
for i in range(26):
# escape letters without tags
if tags[i]:
# section
tagpage.write(f"\n## {letters[i]}\n")
# !!strange behavior of `writelines`: https://stackoverflow.com/questions/13730107/writelines-writes-lines-without-newline-just-fills-the-file
tagpage.writelines(tag + '\n' for tag in tags[i])
tagpage.close()