forked from MilowSa/pbdl-book
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjson-cleanup-for-pdf.py
128 lines (102 loc) · 3.98 KB
/
json-cleanup-for-pdf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import sys, json, re, os
# usage: json-cleanup-for-pdf.py <int>
# if int>0, disable PDF mode (only do WWW cleanup, note metadata.name still needs to be cleaned up manually)
# disableWrites = True # debugging
pdfMode = True
print(format(sys.argv))
if len(sys.argv)>1:
if int(sys.argv[1])>0:
print("WWW mode on")
pdfMode = False
fileList = [
"diffphys-code-burgers.ipynb", "diffphys-code-ns.ipynb", "diffphys-code-sol.ipynb", "physicalloss-code.ipynb", # TF
"bayesian-code.ipynb", "supervised-airfoils.ipynb", # pytorch
"reinflearn-code.ipynb", # phiflow
]
#fileList = [ "diffphys-code-burgers.ipynb"] # debug, only 1 file
#fileList = [ "diffphys-code-ns.ipynb"] # debug, only 1 file
# main
for fnOut in fileList:
# create backups
fn0 = fnOut[:-5] + "bak"
fn = fn0 + "0"; cnt = 0
while os.path.isfile(fn):
#print("Error: "+fn+" already exists!"); exit(1)
print("Warning: "+fn+" already exists!")
fn = fn0 + format(cnt); cnt=cnt+1
print("renaming "+fnOut+ " to "+fn )
if os.path.isfile(fnOut):
os.rename(fnOut, fn)
if not os.path.isfile(fn):
print("Error: "+fn+" missing!")
exit(1)
with open(fn) as file:
d = json.load(file)
#print(d.keys()) #print(d["cells"][0].keys())
# remove TF / pytorch warnings, build list of regular expressions to search for
res = []
res.append( re.compile(r"WARNING:tensorflow:") )
res.append( re.compile(r"UserWarning:") )
res.append( re.compile(r"DeprecationWarning:") )
res.append( re.compile(r"InsecureRequestWarning") ) # for https download
res.append( re.compile(r"Building wheel") ) # phiflow install, also gives weird unicode characters
res.append( re.compile(r"warnings.warn") ) # phiflow warnings
# remove all "warnings.warn" from phiflow?
# shorten data line: "0.008612174447657694, 0.02584669669548606, 0.043136357266407785"
reD = re.compile(r"\[0.008612174447657694, 0.02584669669548606, 0.043136357266407785.+\]" )
reDt = "[0.008612174447657694, 0.02584669669548606, 0.043136357266407785 ... ]"
t="cells"
okay = 0
deletes = 0
for i in range(len(d[t])):
#for i in range(len(d[t])):
#print(d[t][0]["cell_type"])
#print(d[t][i]["cell_type"])
# remove images after code
if d[t][i]["cell_type"]=="code":
#print(d[t][i].keys())
#d[t][i]["outputs"] = ""
#print(d[t][i]["outputs"])
if pdfMode:
for j in range(len( d[t][i]["source"] )):
#print( d[t][i]["source"][j] )
#print( type(d[t][i]["source"][j] ))
dsOut = reD.sub( reDt, d[t][i]["source"][j] ) # replace long number string (only for burgers)
d[t][i]["source"][j] = dsOut
deletes = deletes+1
#print( d[t][i]["source"][j] +"\n >>> \n" +d2 )
#print(len( d[t][i]["outputs"] ))
for j in range(len( d[t][i]["outputs"] )):
#print(type( d[t][i]["outputs"][j] ))
#print( d[t][i]["outputs"][j].keys() )
# images
if d[t][i]["outputs"][j]["output_type"]=="stream":
#print("len "+ len( d[t][i]["outputs"][j]["text"] ) )
dell = [] # collect entries to delete
for k in range( len( d[t][i]["outputs"][j]["text"] ) ):
#print(" tout "+ d[t][i]["outputs"][j]["text"][k] ) # debug , print all lines
nums = []; all_good = True
for rr in range(len(res)):
nums.append( res[rr].search( d[t][i]["outputs"][j]["text"][k] ) )
if nums[-1] is not None:
all_good = False # skip!
if all_good:
okay = okay+1
else: # delete line "dell"
deletes = deletes+1
dell.append(d[t][i]["outputs"][j]["text"][k])
#print( format(nums) +" " + d[t][i]["outputs"][j]["text"][k] ) # len( d[t][i]["outputs"][j]["text"][k] ) )
for dl in dell:
d[t][i]["outputs"][j]["text"].remove(dl)
#print("len after "+format( len( d[t][i]["outputs"][j]["text"] )) + " A") # debug
if deletes==0:
print("Warning: Nothing found in "+fn+"!")
if not os.path.isfile(fnOut):
os.rename(fn, fnOut)
else:
print("Error, both files exist!?")
exit(1)
else:
print(" ... writing "+fnOut )
with open(fnOut,'w') as fileOut:
json.dump(d,fileOut, indent=1, sort_keys=True)