-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPPTools.py
642 lines (360 loc) · 17.8 KB
/
PPTools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
#!/usr/bin/python3
# The support routines for pretty printing Fritzing svg files (and possibly
# other xml as well.)
# Enable detail pretty printing of svg files. If you suspect the detail pretty
# printing is causeing problems, set this to 'n' to disable detail pretty
# printing (and if that fixes it, please report the bug).
DetailPP = 'Y'
Version = '0.0.2' # Version number of this file.
# Import os and sys to get file rename and the argv stuff, re for regex and
# logging to get logging support.
import os, sys, re, logging
# This library lets me write the lxml output to a string (which apparantly
# can't be done from lxml) to pretty print it further than lxml does.
from io import BytesIO
# and the lxml library for the xml parsing.
from lxml import etree
# Try and supress debug messages from PP which is debugged already.
logging.basicConfig(stream=sys.stderr, level=logging.WARNING)
# Define a XML pretty printer function because the system supplied one doesn't
# pretty print the svg stuff correctly and this does (at least so far).
# Obtained from:
# http://effbot.org/zone/element-lib.htm#prettyprint
def Indent(Elem, Debug, Level=0):
logging.info (' Entering indent level %s\n', Level)
I = "\n" + Level*" "
if len(Elem):
if not Elem.text or not Elem.text.strip():
Elem.text = I + " "
if not Elem.tail or not Elem.tail.strip():
Elem.tail = I
for Elem in Elem:
Indent(Elem, Debug, Level+1)
if not Elem.tail or not Elem.tail.strip():
Elem.tail = I
else:
if Level and (not Elem.tail or not Elem.tail.strip()):
Elem.tail = I
logging.info (' Exiting Indent level %s\n', Level)
# End of def Indent(Elem, Debug, Level=0):
# A splitting subroutine that will split only on blanks not in a quoted
# string (the only one in the thread that will do so without either eating
# the quotes or putting them in a separate item neither of which will do here)
# from:
#
# http://stackoverflow.com/questions/79968/split-a-string-by-spaces-preserving-quoted-substrings-in-python
#
# modified to also ignore spaces in text by matching '>.+?</text' as well as
# '".+?"' so it doesn't break text strings with blanks in them. As well it
# substitues a blank in front of the '>.+?</text' so that it will be split
# on to a new line by splitter, making it more readable. This works by
# first substituting '\x00' for the blanks we want to ignore (i.e. those in
# quoted strings and text), splitting on blanks, then substituting the '\x00'
# for a blank again after the split. Very clever!
def Splitter(S, Debug):
logging.info (' Entering Splitter\n')
# Replace ' ' with \x00 in places we don't want to split on the blank
# (inside quoted strings and in text elements)
def Replacer(M):
return M.group(0).replace(" ", "\x00")
# Replace the text string '>.+?<\/text>' by ' >.+?<\/text>' so the the
# text will be split on to a new line (even if there wasn't a space
# there before, which in ours is usually the case, it is on a line with
# a grom.
T = re.sub(r'(>.+?<\/text>)', ' \g<1>', S)
# Do the same for tspan
T = re.sub(r'(>.+?<\/tspan>)', ' \g<1>', S)
# Do this with a trailing blank for comments to so the following element
# is correctly indented on a new line.
T = re.sub(r'(<!--.+?-->)', '\g<1> ', S)
# Now replace the blanks in quoted strings, text, comments and
# referenceFile (all of which contain blanks we need to keep) with \x00
# then split the string on blanks.
Parts = re.sub(r'".+?"|>.+?<\/text>|>.+?<\/tspan>|<!--.+?-->|referenceFile\s*>.+?</\s*referenceFile', Replacer, T).split()
# Then substitute the \x00 for blanks again in all the resulting strings.
Parts = [P.replace("\x00", " ") for P in Parts]
logging.info (' Exiting Splitter\n')
return Parts
# End of def Splitter(S, Debug):
def PrettyPrintElements(XmlIn, Errors, Debug):
# Given the pretty printed (to the element level) xml from lxml, split the
# elements within an element in to one per line with the correct indenting.
# Then return the adjusted string to be written to the output file.
logging.info (' Entering PrettyPrintElements\n')
# Since we are processing in a loop, compile our regexes for efficiency.
# A regext to match the initial xml def if present
XmlRegex = re.compile(r'^<\?xml')
# A regex to match and save the leading whitespace
LeadingWhiteSpaceRegex = re.compile(r'^(\s*)')
# A regex to match the most common end tag.
EndTagRegex = re.compile(r'\/>$')
# Set up the output string initially empty
XmlPP = ''
# Set that we haven't seen an EndTag in case there isn't one (or more
# correctly we can't deal with the end tag present).
EndTag = None
for Line in XmlIn.split('\n'):
# if this is the xml definintions line (the first line), pass it
# through to the output string unchanged.
if (XmlRegex.search(Line)):
if Debug > 2:
logging.debug (' PrettyPrintElements: Pass xml def through\n')
# End of if Debug > 2:
XmlPP = XmlPP + Line + '\n'
else :
# First get the leading whitespace (if any) and save it
# for later use
WhiteSpace = LeadingWhiteSpaceRegex.search(Line)
LeadingWhiteSpace = WhiteSpace.group(1)
# Then remove the initial white space from the line to leave
# only the elements.
Line = LeadingWhiteSpaceRegex.sub('', Line)
# Use splitter (again from stackoverflow) to split only
# on blanks not inside a quoted string.
Items = Splitter(Line, Debug)
if Debug > 2:
logging.debug (' PrettyPrintElements: Split line\n%s\n', Items)
# End of if Debug > 2:
# The first element of the list is the tag name for this
# element so (if the list exists) pop the first element of
# the list in to variable Tag to keep to match the end tag
# if we need to and write the tag value to XmlPP with only
# the leading whitespace and no indentation.
if len(Items) == 0:
# Null element, shouldn't occur but it will be warned about
# earlier so do nothing here (except if debug is enabled)
# which also keeps python happy about the indentation.
if Debug > 2:
logging.debug (' PrettyPrintElements: Error, blank field in line\n')
# End of if Debug > 2:
elif len(Items) == 1:
# There is only one element so just copy it to the output
# with out any additional indentation.
XmlPP = XmlPP + LeadingWhiteSpace + Items[0] + '\n'
else:
# This is the first element of a multi element list and
# thus is the opening tag, so copy it to the output
# without further indentation.
XmlPP = XmlPP + LeadingWhiteSpace + Items[0] + '\n'
# Then remove it from the input.
Items.pop(0)
# Now move to the end of the list to remove the closing
# tag if present. Note this ignores end tags of the form
# </text> and the like, both because I was unsuccessful
# matching them and because removing </text> breaks the
# text formating. They seem to be rare enough to ignore
# for now.
RegexMatch = EndTagRegex.search(Items[len(Items) - 1])
if RegexMatch != None:
EndTag = RegexMatch.group(0)
if Debug > 2:
logging.debug (' Last element Endtag\n%s\n', EndTag)
# End of if Debug > 2:
# Then remove the end tag from the list element.
Items[len(Items) - 1] = EndTagRegex.sub('', Items[len(Items) - 1])
else:
if Debug > 2:
logging.debug (' PrettyPrintElements: regex no match\n%s\n', Items[len(Items) - 1])
# End of if Debug > 2:
# End of RegexMatch != None:
if Debug > 2:
logging.debug (' PrettyPrintElements: Modified last element\n%s\n', Items[len(Items) - 1])
# End of if Debug > 2:
for Item in Items:
# Then print the elements (with a 2 space indent),
# one to a line.
XmlPP = XmlPP + LeadingWhiteSpace + ' ' + Item + '\n'
# End of for Item in Items:
# Then, assuming we have an end tag (which we usually
# should) print it without the 2 space indent.
if EndTag != None:
XmlPP = XmlPP + LeadingWhiteSpace + EndTag + '\n'
# Then clear the EndTag to indicate we have finished
# with it,
EndTag = None
# End of if EndTag != None:
# End of if len(Items) == 0:
# end of if (XmlRegex.search(Line)):
# End of for Line in XmlIn.split('\n'):
logging.info (' Exiting PrettyPrintElements\n')
return (XmlPP)
# End of def PrettyPrintElements(XmlIn, Errors, Debug):
def OutputTree(Doc, Root, FileType, InFile, OutFile, Errors, Warnings, Info, Debug):
# Prettyprint the xml and write it to a file or the console (depending on
# the debug setting). If OutFile is None rename the InFile to InFile.bak
# and write the new data to InFile.
logging.info (' Entering OutputTree Debug %s\n', Debug)
# Do a rough (to the element level) pretty print of the document.
# (this is all we will do for an fpz file).
Root = Indent(Root, Debug)
# now use an answer from stackoverflow to get the properly formatted
# xml definition xml to a string using BytesIO (as lxml doesn't seem
# to be able to write this to a string directly)
Xml = BytesIO()
Doc.write(Xml, xml_declaration=True, encoding=Doc.docinfo.encoding, standalone=Doc.docinfo.standalone)
# again use a couple of stackoverflow answers to get that to a splitable
# string
ByteStr = Xml.getvalue()
# Then convert the bytes stream to a string for processing.
XmlIn = ByteStr.decode(Doc.docinfo.encoding)
if FileType == 'SVG' and DetailPP == 'Y':
# If this is an svg file (as opposed to an fpz file) then do a
# a finer grained pretty print inside the elements as well.
logging.debug(' OutputTree doing element level pretty print\n')
XmlPP = PrettyPrintElements(XmlIn, Errors, Debug)
else:
# We aren't detail pretty printing so just copy input to output.
XmlPP = XmlIn
# End of if FileType == 'SVG':
logging.debug(' OutputTree Start OutFile %s Debug %s\n', OutFile, Debug)
if Debug != 0:
# We are debugging so print the output to sysout rather than a file
# and do not rename (or change) the input file but do print the
# filename we would have used if sending it to a file.
if OutFile == None:
# OutFile None will go to the input filename
print ('Output would go to file:\n\n{0:s}\n\n'.format(InFile))
else:
# otherwise it will go to the OutFile as expected.
print ('Output would go to file:\n\n{0:s}\n\n'.format(OutFile))
# End of if OutFile == None:
print (XmlPP)
else:
# Normal operation, if OutFile is None rename the input file to
# filename.bak and write the xml to a new file with the original
# filename.
logging.debug(' OutputTree Normal operation OutFile %s Debug %s\n', OutFile, Debug)
if OutFile == None:
# rename the input file to .bak to preserve it, freeing the input
# filename to accept our modifed xml.
try:
os.rename (InFile, InFile + '.bak')
except os.error as e:
Errors.append('Error 1: Can not rename {0:s} {1:s} ({2:s})\n'.format(str( e.filename), e.strerror, str(e.errno)))
# Couldn't rename the file, can't proceed so set the error
# and return.
Doc = None
return
else:
# Set the output file to the Input file name (now renamed)
OutFile = InFile
# End of try:
# End of if OutFile == None:
# now open the output file
logging.debug(' OutputTree opening OutFile %s\n', OutFile)
try:
f = open(OutFile, 'w')
except os.error as e:
logging.debug(' OutputTree open error %s\n', e.strerror)
Errors.append('Error 2: Can not open {0:s} {1:s} ({2:s})\n'.format(str( e.filename), e.strerror, str(e.errno)))
else:
try:
logging.debug(' OutputTree write\n')
# and write the xml to it.
f.write(XmlPP)
except os.error as e:
logging.debug(' OutputTree write error %s\n', e.strerror)
Errors.append('Error 3: Can not write {0:s} {1:s} ({2:s})\n'.format(str( e.filename), e.strerror, str(e.errno)))
else:
try:
logging.debug(' OutputTree write\n')
f.close()
except os.error as e:
logging.debug(' OutputTree close error %s\n', e.strerror)
Errors.append('Error 4: Can not close {0:s} {1:s} ({2:s})\n'.format(str( e.filename), e.strerror, str(e.errno)))
# End of try f.close()
# End of try f.write(XmlPP)
# End of try f.open((OutFile, 'w')
# End of if Debug != 0:
logging.info (' Exiting OutputTree\n')
# End of def OutputTree(Doc, Root, FileType, InFile, OutFile, Errors, Warnings, Info, Debug):
def ParseFile (File, Errors):
# Parse the xml document and return either the root of the document or None
# and the error message(s) in Errors.
logging.info (' Entering ParseFile\n')
# Set Doc and Root to none in case of error.
Doc = None
Root = None
try:
parser = etree.XMLParser(remove_blank_text=True)
Doc = etree.parse(File, parser)
except IOError:
Errors.append('Error 5: ParseFile can\'t read file {0:s}\n'.format(str(File)))
logging.info (' Exiting ParseFile on no file error\n')
return None, Root
except etree.XMLSyntaxError:
Errors.append('Error 6: ParseFile error parsing the input xml file {0:s}\n'.format(str(File)))
logging.debug (' Parse error, parser.error_log %s\n', parser.error_log)
if len(parser.error_log):
for error in parser.error_log:
# Extract and log the errors the parser is reporting.
Errors.append('{0:s}\n'.format(str(error)))
# End of for error in parser.error_log:
# End of if len(parser.error_log):
logging.info (' Exiting ParseFile on parser error\n')
# Then return Doc as None to indicate an error occurred.
return None, Root
# End of try:
# If we got here we have successfully parsed the document so get it's
# root.
Root = Doc.getroot()
logging.info (' Exiting ParseFile with doc parsed\n')
return Doc, Root
# End of def ParseFile (File, Errors):
def PrintInfo(Info):
logging.info (' Entering PrintInfo\n')
if len(Info) != 0:
for Infodata in Info:
# There is Info so print them to the console.
print (Infodata)
# End of for Infodata in Info:
# End of if len(Info) != 0:
logging.info (' Exiting PrintInfo\n')
# end of def PrintInfo(Info):
def PrintWarnings(Warnings):
logging.info (' Entering PrintWarnings\n')
if len(Warnings) != 0:
for Warning in Warnings:
# There are warnings so print them to the console.
print (Warning)
# End of for Warning in Warnings:
# End of if len(Warnings != 0):
logging.info (' Exiting PrintWarnings\n')
# End of def PrintWarnings(Warnings):
def PrintErrors(Errors):
logging.info (' Entering PrintErrors\n')
if len(Errors) != 0:
# First print a line to space the error messages.
print('\n')
for Error in Errors:
# There are errors so print them to the console.
print (Error)
# End of for Error in Errors:
# End of if len(Errors != 0):
logging.info (' Exiting PrintErrors\n')
# End of def PrintErrors(Errors):
def ProcessArgs (Argv, Errors):
# Process the input arguments on the command line.
logging.info (' Entering ProcessArgs\n')
# Create an empty InFile list (in case of more than one input file)
InFile = []
if len(sys.argv) < 2:
# No input file so print a usage message and exit.
Errors.append('Usage 7: {0:s} filename (filename ...)\n'.format(str(sys.argv[0])))
return(InFile)
# End of if len(sys.argv) < 2:
for File in sys.argv:
# Skip the program name entry
if File != sys.argv[0]:
if not os.path.isfile(File):
# Input file isn't valid, note that, ignore it and proceed.
Errors.append('Error 8: {0:s} isn\'t a file: ignored\n'.format(str(File)))
else:
InFile.append(File)
# End of if not os.path.isfile(File):
# End of if File not sys.argv[0]:
# End of for File in sys.argv:
logging.debug (' End of ProcessArgs return InFile %s\n', InFile)
logging.info (' Exiting ProcessArgs\n')
return InFile
# End of def ProcessArgs (Argv, Errors):