-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSPContentTables.py
278 lines (250 loc) · 11.1 KB
/
SPContentTables.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# -*- coding: utf-8 -*-
# Copyright (c) 2010 Stas Zykiewicz <[email protected]>
#
# SPContentTables
# This program is free software; you can redistribute it and/or
# modify it under the terms of version 3 of the GNU General Public License
# as published by the Free Software Foundation. A copy of this license should
# be included in the file GPL-3.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# This module parses the content xml files and turns them into sqlite tables.
# The xml files must be named like this: content_<name of the quiz>.xml
# For example the content file for the picture quiz, content_picture.xml
import os
import logging
import glob
import zlib
from .SPConstants import *
if NoGtk:
try:
from pysqlite2 import dbapi2 as sqlite3
except ImportError:
import sqlite3
else:
import sqlite3
try:
from xml.etree.ElementTree import ElementTree
except ImportError:
# try the python2.4 way
from elementtree.ElementTree import ElementTree
class ParserError(Exception):
pass
def check_tables(dbase):
"""Called by the core when it starts.
It checks if all the quiz content tables are in sync with the content xml files.
It checks for differences in the xml files by checking their adler32 checksums
against the sum in the adler32 sql table.
When the sums differ the xml is parsed and a new sql table is created.
When there's a xml file with no table it will be parsed and stored into a sql table.
"""
# first check the quizcontent dbase and add any missing table.
logger = logging.getLogger("childsplay.SPContentTables.check_tables")
logger.debug("Sync quiz content dbase: %s" % dbase)
contentdir = os.path.join(ACTIVITYDATADIR,'CPData','Quizcontent', '*', 'content_*.xml')
con = sqlite3.connect(dbase)
cursor = con.cursor()
cursor.execute(''' SELECT name FROM sqlite_master
WHERE type='table'
ORDER BY name;
''')
tl = [tn[0] for tn in cursor.fetchall()]
contentfiles = glob.glob(contentdir)
path_types = {}
for f in contentfiles:
lang = f.rsplit('/',2)[-2]
tp = os.path.basename(f).split('_')[1][:-4]
path_types[f] = tp + '_%s' % lang
for t in list(path_types.values()):
if t not in tl:
logger.debug("Creating table '%s'" % t)
# group is named _group as group is a reserved word in sqlite3
cursor.execute(''' CREATE TABLE %s
( id INTEGER PRIMARY KEY, content_id TEXT, content_type TEXT,
difficulty TEXT, question_text TEXT,
question_audio TEXT, answer TEXT, answer_audio TEXT,
wrong_answer1 TEXT, wrong_answer1_audio TEXT,
wrong_answer2 TEXT, wrong_answer2_audio TEXT,
wrong_answer3 TEXT, wrong_answer3_audio TEXT,
data TEXT, year TEXT, _group TEXT)
''' % t)
con.commit()
for p, pt in list(path_types.items()):
if pt == t:
break
if parse_xml(p, cursor, t):
con.commit()
if 'adler32' not in tl:
logger.debug("Creating table '%s'" % 'adler32')
cursor.execute(''' CREATE TABLE %s
( id INTEGER PRIMARY KEY, path TEXT, sum INTEGER, type TEXT)
''' % 'adler32')
con.commit()
# Now we know we have all the tables we gonna check the checksums
cursor.execute(''' SELECT * FROM adler32 ''')
rowlist = cursor.fetchall()
# check if we have rows
if not rowlist:
logger.debug("no rows found in adler32")
for p in contentfiles:
logger.debug("creating row for %s" % p)
data = open(p, 'r').read()
cs = zlib.adler32(data)
cursor.execute(''' INSERT INTO adler32 VALUES (NULL,?,?,?)''', (p, cs, path_types[p]))
logger.debug("created row for %s with adler %s" % (p, cs))
# As we don't had adler rows we assume we don't have correct content tables as well.
# remove content table
logger.debug("replacing table %s" % path_types[p])
cursor.execute('''DELETE FROM %s''' % path_types[p])
con.commit()
# parse content xml and stote it into the new content table
if parse_xml(p, cursor, path_types[p]):
con.commit()
# just to make sure
close_dbase(con)
return
else:
# if we have rows check that we have rows for each content file.
# The actual files present on disk are leading for this.
tp = [t[1] for t in rowlist]
for p in contentfiles:
if p not in tp:
data = open(p, 'r').read()
cs = zlib.adler32(data)
cursor.execute(''' INSERT INTO adler32 VALUES (NULL,?,?,?)''', (p, cs, path_types[p]))
logger.debug("created row for %s with adler %s" % (p, cs))
# renew rowlist
cursor.execute(''' SELECT * FROM adler32 ''')
rowlist = cursor.fetchall()
for row in rowlist:
p = row[1]
try:
data = open(p, 'r').read()
except IOError:
logger.warning("Quizcontent file not found: %s" % p)
logger.warning("replacing table %s" % p)
cursor.execute(''' DELETE FROM adler32 WHERE id=? ''', (row[0], ))
lang = p.rsplit('/',2)[-2]
tp = os.path.basename(p).split('_')[1][:-4]
ptype = tp + '_%s' % lang
cursor.execute('''DELETE FROM %s''' % ptype)
con.commit()
continue
cs = zlib.adler32(data)
logger.debug("checking data from %s with adler %s" % (p, cs))
#print type(cs), cs, type(row[2]), row[2]
if cs != row[2]:
logger.debug("checksum differ for %s" % p)
# first remove the row from adler32 table
cursor.execute(''' DELETE FROM adler32 WHERE id=? ''', (row[0], ))
# add a new one with correct sum
logger.debug("Added new row for %s" % row[1])
cursor.execute(''' INSERT INTO adler32 VALUES (NULL,?,?,?)''', (p, cs, path_types[p]))
con.commit()
# remove content table
logger.debug("replacing table %s" % path_types[p])
cursor.execute('''DELETE FROM %s''' % path_types[p])
con.commit()
# and parse content xml and stote it into the new content table
if parse_xml(p, cursor, path_types[p]):
con.commit()
# just to make sure
close_dbase(con)
def close_dbase(con):
logger = logging.getLogger("childsplay.SPContentTables.close_dbase")
logger.debug("Commiting transactions and closing dbase.")
con.commit()
con.close()
def parse_xml(xml, cursor, tname):
"""Parses the whole xml tree into a hash with lists. Each list contains hashes
with the elelements from a 'question' element.
When year is set only the entries that have a year element and whos value
is between 'year' and 'year'+9 is put into the hash.
This is used by the history activity."""
logger = logging.getLogger("childsplay.SPContentTables.parse_xml")
logger.debug("Starting to parse: %s" % xml)
# xml file:
# <?xml version="1.0" encoding="utf-8"?>
# <questions>
# <question id="2" type="text">
# <difficulty>1</difficulty>
# <question_text audio="">Wat is een Vlaamse reus?</question_text>
# <answer audio="">Een konijnensoort</answer>
# <data/><misc></misc>
# <wrong_answer1 audio="">Een trekpaard</wrong_answer1>
# <wrong_answer2 audio="">De bijnaam voor een opstandeling</wrong_
# answer2>
# <wrong_answer3 audio="">Een grote Belg</wrong_answer3>
# </question>
# .....
tree = ElementTree()
tree.parse(xml)
xml = {}
# here we start the parsing, the check if we have all the content is done
# in get_exercise.
# we don't do it here because of speed considerations.
index = 0
questions = tree.findall('question')
logger.debug("found %s questions in total" % len(questions))
for q in questions:
hash = {}
try:
hash['content_id'] = q.get('id')
hash['content_type'] = q.get('type')
e = q.find('difficulty')
hash['difficulty'] = e.text
e = q.find('data')
hash['data'] = e.text
e = q.find('group')
hash['_group'] = e.text
e = q.find('question_text')
hash['question_text'] = e.text
hash['question_audio'] = e.get('audio')
e = q.find('answer')
hash['answer'] = e.text
hash['answer_audio'] = e.get('audio')
e = q.find('wrong_answer1')
hash['wrong_answer1'] = e.text
hash['wrong_answer1_audio'] = e.get('audio')
e = q.find('wrong_answer2')
hash['wrong_answer2'] = e.text
hash['wrong_answer2_audio'] = e.get('audio')
e = q.find('wrong_answer3')
hash['wrong_answer3'] = e.text
hash['wrong_answer3_audio'] = e.get('audio')
try:
# year is optional
e = q.find('year')
hash['year'] = e.text
except:
hash['year'] = ''
except AttributeError as info:
logger.error("The content.xml is badly formed, missing element(s):%s,%s" % (info, e))
logger.error("question '%s' will be removed from the collection" % q.get('id'))
else:
index += 1
cursor.execute(''' INSERT INTO %s VALUES (\
NULL, :content_id, :content_type, :difficulty,\
:question_text, :question_audio, :answer,\
:answer_audio, :wrong_answer1,\
:wrong_answer1_audio, :wrong_answer2,\
:wrong_answer2_audio, :wrong_answer3,\
:wrong_answer3_audio, :data, :year, :_group\
) ''' % tname , hash)
logger.debug("Done parsing %s questions" % index)
return True
if __name__ == '__main__':
from . import SPLogging
SPLogging.set_level('debug')
SPLogging.start()
lang = 'nl'
dbase = os.path.join(HOMEDIR, 'quizcontent.db')
contentdir = os.path.join(ACTIVITYDATADIR,'CPData','Quizcontent', lang, 'content_*.xml')
check_tables(dbase, contentdir, lang)