-
Notifications
You must be signed in to change notification settings - Fork 11
/
extractDocs.py
executable file
·344 lines (290 loc) · 12.8 KB
/
extractDocs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
#!/usr/bin/env python
# encoding: utf-8
"""
extractDocs.py
Originally created by Kent Quirk on 02/06/14.
Copyright (c) 2014 Tidepool.org.
"""
import os
import re
import sys
import glob
import json
import fnmatch
import argparse
import datetime
# we use this global variable to maintain the current order value
g_cur_order = 1000
# Processes a single file
# textbody is the contents of the file
def blockgen(textbody):
blockpat = re.compile("/\*\*\*+(.*?)\s*\*+/", re.DOTALL)
blocks = blockpat.findall(textbody)
for block in blocks:
yield block
def chunkgen(block, formatters):
chunkpat = re.compile("[ *\t]* (?P<name>[A-Z][A-Za-z0-9_-]+):\s*(?P<body>.*)")
directivepat = re.compile("\(\[(?P<directive>[a-z]+)\s*[=:]\s*(?P<value>.*)\]\)")
id = ''
body = []
cur_formatter = None
global g_cur_order
for line in block.split('\n'):
# first see if there's a directive on the line; if so, process it
#print ('order: %s line: "%s"' % (g_cur_order, line))
directive = directivepat.search(line)
if directive:
if directive.groupdict()['directive'] == 'order':
g_cur_order = int(directive.groupdict()['value'])
# remove the directive from the output
line = line[:directive.start()] + line[directive.end():]
# now look for chunks
m = chunkpat.match(line)
if m:
# if there's an id, we're already processing a chunk, so end it
if id:
yield id, cur_formatter, body
# and start a new one
id = m.groupdict()['name']
body = [m.groupdict()['body']]
g_cur_order = int(m.groupdict().get('order', g_cur_order+1))
cur_formatter = formatters.get(id, formatters['_default'])
else:
# this is a line in a block that not the start of a chunk
#print "raw line, no chunk"
body.append(line)
# if we have no id, it's at the start of a block with no chunk, so set one up
if not id:
id = '_'
cur_formatter = formatters.get(id, formatters['_default'])
g_cur_order += 1
# make sure we yield the last chunk in the block
yield id, cur_formatter, body
def process(infile, formatters):
global g_cur_order
textbody = open(infile).read()
result = {}
print "************" + infile
for block in blockgen(textbody):
for id, formatter, body in chunkgen(block, formatters):
if body:
text = ''
for line in body:
newtext = None
# print('id: %s, line: "%s"' % (id, line))
found = False
for item in formatter:
m = re.search(item['pattern'], line)
if m:
d = dict(blockid=id)
d.update(m.groupdict())
newtext = item['formatter'] % d
# print("Formatter found: newtext='%s'" % newtext)
found = True
break
if newtext is None and line:
# print("No formatter found.")
newtext = '%s\n' % line
# if not found:
# print("NOT FOUND '%s'" % newtext)
if newtext is not None:
text += newtext
result[g_cur_order] = text
return result
# Look for .gitignore; this only looks in the current directory.
# Properly handling .gitignore in a parent directory is painful, so
# we'll leave that for some future time.
def find_ignorefile(p, ignorefile='.gitignore'):
f = os.path.join(p, ignorefile)
if os.path.exists(f):
return f
else:
#sys.stderr.write('No %s found.\n' % ignorefile)
return None
# Does the equivalent of a glob, except it walks directory trees and
# returns all the files that match.
def deep_glob(start, pat):
matches = []
for path, dirnames, filenames in os.walk(start):
# print path, dirnames, filenames
dirnames[:] = [d for d in dirnames if not d.startswith('.')]
# print dirnames
for filename in fnmatch.filter(filenames, pat):
matches.append(os.path.join(path, filename))
return matches
# reads .gitignore if it exists and gets a list of files to exclude
# Every file returned has the full path
def get_exclude_patterns():
ignorepats = set()
for fname in ['.gitignore', '.licignore']:
ignorepath = find_ignorefile(os.path.realpath('.'), ignorefile=fname)
if ignorepath:
ignorepats.update(open(ignorepath).read().split('\n'));
return ignorepats
# Generates a set of files according to a bunch of patterns like you
# see in .gitignore. It does not support the full syntax of .gitignore
# but filenames, directory names, and file patterns with or without
# subdirectories are supported. ! lines are ignored which in this
# use case won't matter.
def process_pats(pats):
files = set()
root = os.path.realpath('.')
for i in pats:
if (not i) or i.startswith('!'):
continue
if i.startswith('/'):
i = os.path.join(root, i[1:])
if '/' in i:
p, f = os.path.split(i)
files.update(deep_glob(os.path.join(root, p), f))
elif os.path.isdir(i):
files.update(deep_glob(os.path.join(root, i), '*'))
else:
files.update(deep_glob(root, i))
return files
# Does the real work -- generate a set of excludes and includes, and
# subtract one from the other.
def get_filelist(pats):
excludes = process_pats(get_exclude_patterns())
includes = process_pats(pats)
result = includes.difference(excludes)
# print result
return result
def main(argv=None):
parser = argparse.ArgumentParser(description='''
Reads a set of source files and extracts documentation.
The files explored are specified either on the command line with the --files switch
or in an options file. OS-style wildcards are supported.
Files listed in .gitignore or .docignore are always ignored (even if included in the
include list).
Files are searched for blocks between documentation markers. Documentation blocks
start with /*** (3 or more *s) and end with */ (1 or more stars).
All lines between these markers are extracted.
The text in blocks is searched to find lines that start with a single
word that begins with a capital letter and ends with a colon (:). These lines
are used to define chunks that begin after the colon and end with the last line
before the next chunk.
The starting word is a label that is used to look up a formatter, then the
block is passed through the formatter to produce a chunk of output text, along with
sequence information.
The output text is then sent to the output file. Before being written to, the output
file is scanned for a special tag (by default, "# Docs"). If the tag is found, everything
that comes before it is preserved (and everything after it is overwritten).
Formatters are defined in an options file, called "docoptions.json" by default.
The options file is a dictionary with several keys. Valid keys include:
output: The name of the output file
files: An array containing filenames or filename patterns to process
tag: The string used to mark the header in the output file
formats: A dictionary defining formats (see below)
-- The formats dictionary --
Each formatter has a key of the label (the name of the label being matched, without the
colon), and the value being an array containing one or more dictionaries, each with keys of
pattern, formatter, and possibly order.
If the label specified by the chunk is not found, then the formatter with the name
of "_default" is used.
After finding the formatter for a chunk, the array of formats is compared in sequence with
the pattern (a regular expression) against each line of the output; when the first match is
found, the corresponding formatter is used to render the matched pattern.
The order property, if it exists, must be an integer, and is used to establish the relative
ordering of the output. If it does not exist, the default value will be one more than the
previous entry.
If the value corresponding to a label is a string instead of an object, then it's considered
to be an alias.
Example:
"Function": [{
pattern: "(?P<funcname>[A-Za-z0-9$_]+)\s*\((?P<args>[^\)]*)\)",
formatter: "* ```**%%(funcname)s** (%%(args)s)```"
}],
"Method": "Function"
Directives to the system can be embedded in comment blocks and have
the form: ([keyword=value])
So far, the only directive is 'order'. If specified, the current value of
order is set to value at the point when it is evaluated. This can
be used to sequence a set of multiple files or to reorder items within
a file. Order starts at a value of 1000.
Any files or folders listed in .gitignore or in .docignore (same syntax)
will be ignored, even if specified on the command line or in the options file.
The output file defaults to docs.md, but can be overridden by an option or by
a command line switch.
''')
parser.add_argument(dest='files', metavar='FILE', nargs='*',
help='A list of files to be processed. OS-style wildcards are allowed.')
parser.add_argument("--output", "-O", dest="output", nargs=1, default="docs.md",
help="define the output template file", metavar="LICENSE")
parser.add_argument("--options", "-o", dest="options", default='docoptions.json',
help="Specify the JSON file containing option definitions.",
metavar="OPTIONS")
parser.add_argument("--tag", "-t", dest="tag", default='# Docs',
help="The output file is searched for this tag; if found, it and everything"
" before it is preserved.", metavar="TAG")
parser.add_argument("--debug", "-d", dest="debug", default=False, action="store_true",
help="Set this if you're trying to debug your formatters and extra info"
"will be written to the output file.")
args = parser.parse_args()
app_path = os.path.split(sys.argv[0])[0]
# look for the option file either in the current folder or, if that fails, in the
# folder containing the .py file
try:
optfile = open(args.options)
except IOError:
try:
optfile = open(os.path.join(app_path, args.options))
except IOError:
print("Couldn't find %s to load options." % args.options)
exit(1)
options = json.loads(optfile.read())
formatters = options['formats']
# if there are indirect formats, make them direct
for f in formatters:
if not isinstance(formatters[f], list):
formatters[f] = formatters[formatters[f]]
# load the files specified
# command line wins, then options file -- or if all else fails, just
# read all the .js files
if not args.files:
if 'files' in options:
args.files = options['files']
else:
args.files = ['*.js']
files = get_filelist(args.files)
# iterate over all the files and compile all the output data
output = {}
for f in files:
if os.path.isfile(f):
readfile = f
output.update(process(f, formatters=formatters))
# import pprint
# pprint.pprint(output)
if 'tag' in options:
args.tag = options['tag']
# search for the tag in the output file; if it exists, write the prefix
# to the output file before we do anything else
try:
outfilename = options['output']
except KeyError:
outfilename = args.output
contents = ''
try:
filedata = open(outfilename).read()
tagloc = filedata.index(args.tag)
contents = filedata[:tagloc + len(args.tag)]
except IOError:
pass # output hasn't been created yet
except ValueError:
pass # tag wasn't found in the file
outfile = open(outfilename, 'w')
outfile.write(contents)
for name in sorted(output.keys()):
if args.debug:
outfile.write('%s: %s' % (name, output[name]))
else:
outfile.write('%s' % output[name])
outfile.close()
return 0
if __name__ == "__main__":
rv = main()
if rv:
sys.stderr.write("Failed. Use --help for full instructions.\n")
sys.exit(rv)
else:
sys.exit(0)