-
Notifications
You must be signed in to change notification settings - Fork 6
/
convert-metadix-dix.py
114 lines (102 loc) · 3.13 KB
/
convert-metadix-dix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
#!/usr/bin/env python3
#
# Copyright (C) 2020 Jaume Ortolà <[email protected]>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, see <http://www.gnu.org/licenses/>.
#
import sys, re
import xml.etree.ElementTree as ET
def XMLtoString(x):
rough_string = ET.tostring(x, encoding="UTF-8", method="xml");
return rough_string.decode("UTF-8")
def word(e):
word = None
# e.find("i") doesn't word. Bug?!
for part in e:
if part.tag == "i":
word = part.text
if word is None:
p = e.find("p")
if p is not None:
l = p.find("l")
word = l.text
if word is None:
word = ""
return word
def isMultiword(e):
for part in e:
if part.tag == "i":
b = part.find("b")
if b is not None:
return True
p = e.find("p")
if p is not None:
l=p.find("l")
if l is not None:
g = l.find("g")
if g is not None:
return True
b = l.find("b")
if b is not None:
return True
r=p.find("r")
if r is not None:
g = r.find("g")
if g is not None:
return True
b = r.find("b")
if b is not None:
return True
return False
source = sys.argv[1]
target = sys.argv[2]
tree = ET.ElementTree()
tree.parse(source)
pardefs = tree.find('pardefs')
prefixes = {}
for pardef in pardefs.iter(tag='pardef'):
namepardef = pardef.get("n")
if namepardef.startswith("prefixes_"):
grammarclass = re.sub ("prefixes_([^_]+)$", "\\1", namepardef)
prefixes[grammarclass]=namepardef
mainsection = tree.find('.//section[@id="main"]')
for e in mainsection.iter(tag='e'):
if isMultiword(e):
continue
par = e.find('par')
if par is None:
p = e.find('p')
if p is not None:
par = p.find('r').find('s')
if par is None:
i = e.find('i')
if i is not None:
par = i.find('s')
if par is None:
continue
parname = par.get("n")
for prefix in prefixes.keys():
if parname == prefix:
new = ET.Element('par')
prefixtoadd = prefixes[prefix]
new.set('n', prefixtoadd)
e.insert(0, new)
parname = par.get("n")
for prefix in prefixes.keys():
if parname.endswith("__"+prefix):
new = ET.Element('par')
prefixtoadd = prefixes[prefix]
new.set('n', prefixtoadd)
e.insert(0, new)
tree.write(target)