-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathabbreviated_bopomo.schema.yaml
185 lines (172 loc) · 4.96 KB
/
abbreviated_bopomo.schema.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
# Rime schema
# encoding: utf-8
schema:
schema_id: abbreviated_bopomo
name: "簡拼注音"
version: "1.1.2"
author:
- imper0502 <[email protected]>
description: |
簡拼注音,也叫縮寫注音
switches:
- name: ascii_mode
reset: 0
states: [ "中", "En" ]
- name: full_shape
reset: 0
states: [ "半形", "全形" ]
- name: ascii_punct
reset: 0
states: [ "。,", ".," ]
- name: s2tw
reset: 1
# 啓用 opencc 字形轉換,但不在選單中顯示。
# states: [ "简化字", "教標字" ]
- name: t2tw
reset: 1
# 啓用 opencc 字形轉換,但不在選單中顯示。正 → 臺
# states: [ "傳承字", "教標字" ]
- name: tw2s
reset: 0
# 啓用 opencc 字形轉換。
states: [ "教標字", "简化字" ]
engine:
processors:
- ascii_composer
- recognizer
- key_binder
- speller
- punctuator
- selector
- navigator
- express_editor
segmentors:
- ascii_segmentor
- matcher
- abc_segmentor
- punct_segmentor
- fallback_segmentor
translators:
- echo_translator
- punct_translator
- script_translator
filters:
- simplifier@s2tw # 簡繁轉換
- simplifier@t2tw # 正繁轉換
- simplifier@tw2s # 繁簡轉換
- uniquifier # 過濾重複的候選字,有可能來自簡正繁轉換
menu: # 修改候選單標籤、修改候選單大小
alternative_select_labels: [ 6, 7, 8, 9, 0 ]
alternative_select_keys: 67890
page_size: 5
style: # 修改候選單標籤格式
label_format: ' %s '
speller:
alphabet: "bpmfdtnlgkhjqxavorzcsiuyewIUYAOEQHWSRKDJFL12345"
finals: "12345"
delimiter: " '"
algebra:
# 預處理
- erase/^xx$/ # 移除【xx】編碼
- xform/^m(\d)$/mu$1/ # ㄇ調 → ㄇㄨ調
- xform/^r5$/er5/ # 〜兒
- xform/v/ü/ # v → ü/
- xform/iu/iou/ # 補上漏音
- xform/ui/uei/ # 補上漏音
- xform/iong/üng/ # [jqx]iong → [jqx]üng
- xform/ong/ung/ # yong, -ong → yung, -ung
- xform/^yu/ü/ # yue, yuan, yun, yung → üe, üan, ün, üng
- xform/^yi?/i/ # yi, y- → i, i-
- xform/^wu?/u/ # wu, w- → u, u-
- xform/^([jqx])u/$1ü/ # 取消省略的寫法
- xform/([iuü])n/$1en/ # in, ing, un, ung, ün, üng → ien, ieng, uen, ueng, üen, üeng
# 開始轉換輸入鍵
# 兩個「三合韻母」ㄤㄥ
- xform/ang/J/
- xform/eng/F/
# 六個「二合韻母」ㄞㄟㄠㄡㄢㄣ
- xform/ai/H/
- xform/ei/W/
- xform/ao/S/
- xform/ou/R/
- xform/an/K/
- xform/en/D/
# ㄦ及ㄝ
- xform/er/L/
- xform/eh/Q/
- xform/(?<=[iü])e/Q/
# 三個單韻母ㄚㄛㄜ
- xlit/aoe/AOE/
# 三個介音ㄧㄨㄩ
- xlit/iuy/IUY/
# 到此,介音、韻母全部大寫。
# 三個「二合聲母」ㄓㄔㄕ
- xform/^zh/a/
- xform/^ch/v/
- xform/^sh/o/
# 到此,【terra pinyin】→【bopomofo】
# 輸入鍵轉換
- xform/ü/y/ # `Y鍵`輸入ü
- xlit/IUYAOEQHWSRKDJFL/iuyaoeqhwsrkdjfl/ # 介音、韻母一律小寫
# 不使用零聲母,而是用大寫表示韻母獨立音節
- xform/^i(\d)/I$1/
- xform/^u(\d)/U$1/
- xform/^y(\d)/Y$1/
- xform/^a(\d)/A$1/
- xform/^o(\d)/O$1/
- xform/^e(\d)/E$1/
- xform/^q(\d)/Q$1/
- xform/^h(\d)/H$1/
- xform/^w(\d)/W$1/
- xform/^s(\d)/S$1/
- xform/^r(\d)/R$1/
- xform/^k(\d)/K$1/
- xform/^d(\d)/D$1/
- xform/^j(\d)/J$1/
- xform/^f(\d)/F$1/
- xform/^l(\d)/L$1/
# ㄓㄔㄕㄖㄗㄘㄙ的韻母省略,才是注音
- xform/^([avorzcs])i(\d)/$1$2/
# 到此,基本注音拼寫規則完成
# 設定簡拼(abbrev)、略拼(fuzz)
- fuzz/([A-Z])\d/$1/
- fuzz/([a-z])[a-z]*\d/$1/
- fuzz/([a-z])[a-z]*(\d)/$1$2/
translator:
strict_spelling: true
initial_quality: 0.99
dictionary: terra_pinyin
prism: abbreviated_bopomo
preedit_format:
- xlit/bpmfdtnlgkhjqxavorzwcseiuy/ㄅㄆㄇㄈㄉㄊㄋㄌㄍㄎㄏㄐㄑㄒㄓㄔㄕㄖㄗㄗㄘ厶◌ㄧㄨㄩ/
- xlit/IUYAOEQHWSRKDJFL12345/ㄧㄨㄩㄚㄛㄜㄝㄞㄟㄠㄡㄢㄣㄤㄥㄦˉˊˇˋ˙/
# 設定OpenCC字體轉換
s2tw: # 簡繁轉換
option_name: s2tw
opencc_config: s2tw.json
t2tw: # 正繁轉換
option_name: t2tw
opencc_config: t2tw.json
tips: char
tw2s: # 繁簡轉換
option_name: tw2s
opencc_config: tw2s.json
tips: all
punctuator:
import_preset: symbols
full_shape:
"<" : [ "<", "《", "〈", "«", "‹" ]
">" : [ ">", "》", "〉", "»", "›" ]
"$" : [ "NT$", "$", "€", "£", "¥", "¢", "¤" ]
half_shape:
"<" : [ "<", "《", "〈", "«", "‹" ]
">" : [ ">", "》", "〉", "»", "›" ]
"$" : [ "NT$", "$", "€", "£", "¥", "¢", "¤" ]
key_binder:
import_preset: default
recognizer:
import_preset: default
patterns:
email: "^[a-z][-_.0-9a-z]*@.*$"
punct: "^/([0-9]0?|[A-Za-z]+)$"
url: "^(www[.]|https?:|ftp:|mailto:).*$"