forked from rhizomedotorg/classic.rhizome.org
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path__init__.py
executable file
·769 lines (658 loc) · 24.6 KB
/
__init__.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
"""
BB Code parser by Jonas 'Ojii' Obrist (c) 2009
USAGE:
Parsing:
parsed, errors = bbcode.parse(content, strict=True)
This might raise a bbocde.PaserError if strict is True (default). Otherwise on a
ParserError the content is returned unparsed and errors contains the reason.
Validation:
errors = bbcode.validate(content)
Returns errors caused by parsing the code or an empty sequence.
Extending:
Subclassing bbcode.TagNode and bbcode.register the class adds new BB Code Tags.
Each node must have an opening and closing pattern (open_pattern, close_pattern)
and push, pushed, pull and close methods. For further information read the doc
strings of the TagNode class.
"""
import re
import cgi
try:
from django.utils.translation import ugettext as _
except ImportError:
_ = lambda x: x
AUTODISCOVERED = False
LINEFEED_PATTERN = re.compile('\n\s*\n', re.MULTILINE)
def convert_linefeeds(content):
content = LINEFEED_PATTERN.sub('<br /><br />', content)
return content.replace('\n', '<br />')
class UnmatchablePseudoPattern(object):
"""
A class which should look like a compiled regular expression but never match.
"""
def match(self, content):
return False
def search(self, content):
return False
def finditer(self, content):
return iter([])
def sub(self, replacement, content):
return content
class patterns:
"""
This is a class for namespacing reasons
"""
no_argument = r'\[%s\]'
self_closing_tag = r'\[%s\s*/\]'
single_argument = r'\[%s(\]|="?(?P<argument>[^\]]+)"?\])'
argument = r'( (\w+)=([^\] ]+))?'
closing = r'\[/%s\]'
unmatchable = UnmatchablePseudoPattern()
def get_tag_name(klass):
"""
Convert a class to tagname
"""
return klass.tagname if hasattr(klass, 'tagname') else klass.__name__.lower()
class NeedsSubclassingError(Exception): pass
class ParserError(Exception): pass
class SoftException(object):
def __init__(self, lineno, message):
self.lineno = lineno
self.message = message
def __str__(self):
return '<span class="bbcode-error lineno">Line %s:</span> <span class="bbcode-error message">%s</span>' % (self.lineno, self.message)
__unicode__ = __str__
class SoftExceptionManager(object):
"""
Allows 'soft exceptions'. Soft exceptions are exceptions which don't break
the flow of the code but are rather stored in a list and can then be told
given to the user.
"""
def __init__(self):
self.exceptions = []
self.line_number = 1
def set_line_number(self, number):
"""
Update the line number
"""
self.line_number = number
def soft_raise(self, exception):
"""
Soft raise an exception. Stores the line number the exception occured
and the exception message. If deployed in django it will make the
message i18n ready.
"""
self.exceptions.append(SoftException(self.line_number, _(exception)))
def pull(self):
"""
Pulls all exception since initialization or last pull. Resets exception
list.
"""
old = self.exceptions
self.exceptions = []
return old
sem = SoftExceptionManager()
soft_raise = sem.soft_raise
class VariableScope(dict):
def add(self, name, value):
dict.__setitem__(self, str(name), str(value))
def resolve(self, context):
context = context.strip('"')
for var, value in dict.iteritems(self):
context = context.replace('$%s$' % var, value)
return context
def lazy_resolve(self, context):
class Lazy:
def __init__(self, resolver, context):
self.resolver = resolver
self.context = context
def __int__(self):
self.context = self.resolver(self.context)
return int(self.context)
def __getattr__(self, attr):
self.context = self.resolver(self.context)
return self.context.__getattribute__(attr)
return Lazy(self.resolve, context)
class Node(object):
"""
This is the baseclass for all objects in a BBCode Parse Tree.
To understand Nodes it is important to understand the Tree.
Each Parse Tree has one, and only one, head node. This node has child nodes
and those children have child nodes themselves. This continues until there
are no more child nodes. In a standard Parse Tree the last leaves of a
branch are instances of TextNode, however since empty TextNodes are not kept
in the Tree, they might also be missing.
When the Parse Tree is generated the nodes get 'pushed', 'appended', 'pulled'
and 'closed'. Only TextNodes can be appended to a node's nodelist. When a
new child node is found it is 'pushed' and becomes the current node. When a
node cannot be closed correctly it is 'pulled', which means it's unparsed
contents are added to it's parent. Usually this causes a ParserError, which
means the Tree is not parseable. When a node is finished parsing it's
'closed' which normally returns the parent.
"""
name = 'node'
is_text_node = False
def __init__(self, parent, match, fullcontent, context=None):
"""
Normal nodes take their parent node as first argument, the regular
expression match as second argument and the full context as third
argument.
"""
self.start = match.start()
self.fullcontent = fullcontent
self.raw_content = ''
self.parent = parent
self.match = match
self.nodes = []
self.context = context # for django only
# copy the variable scope
self.variables = parent.variables
def soft_raise(self, errmsg):
soft_raise(errmsg)
return self.raw_content
def append(self, text):
"""
Adds a text node to the node
"""
self.nodes.append(TextNode(self, text))
def push(self, nodeklass, match, fullcontent):
"""
Adds a nested tag node and returns that node
"""
node = nodeklass(self, match, fullcontent, self.context)
self.nodes.append(node)
return node.pushed()
def pushed(self):
"""
Normal Nodes return themselves when being pushed. Self closing nodes
can overwrite this method to handle this in another fashion.
"""
return self
def pull(self, end):
"""
Pulls all text nodes and returns the parent
"""
###
# Adding this try, except allows for errors to be returned to form -nh
####
try:
self.parent.nodes.append(TextNode(self.fullcontent[self.start:end]))
except:
pass
return self.parent
def close(self, end):
"""
When closing the node just return the parent.
"""
self.end = end
self.raw_content = self.fullcontent[self.start:end]
return self.parent
def parse(self):
"""
Parses the node. This is also responsible to parse child nodes. Should
return a string and fail silently.
"""
raise NeedsSubclassingError
class HeadNode(Node):
"""
The head node of the BBCode parse tree.
"""
name = 'head'
def __init__(self, raw_content, context=None):
self.raw_content = raw_content
self.nodes = []
self.context = context
self.variables = VariableScope()
def pull(self, end):
raise ParserError, "Cannot pull from headnode, invalid BBCode Tree"
def close(self, end):
raise ParserError, "Cannot close headnode, invalid BBCode Tree"
def parse(self):
content = ''
failed = []
for node in self.nodes:
content += node.parse()
return content
class TextNode(Node):
smilie_pattern = re.compile(':(?P<name>\w+):')
is_text_node = True
def __init__(self, parent, text):
self.text = text
self.variables = parent.variables
self.parent = parent
self.raw_content = text
self.nodes = []
def append(self, text):
raise TypeError, "TextNode does not support appending"
def push(self, node):
raise TypeError, "TextNode does not support pushing"
def pull(self, end):
raise TypeError, "TextNode does not support pulling"
def close(self, end):
raise TypeError, "TextNode does not support closing"
def __repr__(self):
return '<TextNode instance "%s">' % self.text
def parse(self):
"""
Return cgi-escaped content
"""
return cgi.escape(self.variables.resolve(self.text))
def __str__(self):
return 'TextNode: %r' % self.text
class TagNode(Node):
@staticmethod
def open_pattern():
raise NeedsSubclassingError
@staticmethod
def close_pattern():
raise NeedsSubclassingError
def parse_inner(self):
"""
Shortcut for parsing all inner nodes and return their combined contents.
"""
inner = ''
for node in self.nodes:
inner += node.parse()
return inner
def __str__(self):
return self.__class__.__name__
class ReplaceTagNode(TagNode):
"""
A specialized TagNode subclass with a predefined parse method. It allows
easy creation of simple bbcode - html replacement tags. [tag] becomes <tag>
and [/tag] becomes </tag>. These tags do not take any arguments and parse
all inner content.
Requires an explicit 'tagname' attribute, otherwise the lowered class name
will be used as tagname
"""
def __init__(self, parent, match, content, context):
"""
Implicitly set tag name if not available.
"""
if not hasattr(self, 'tagname'):
self.tagname = self.__class__.__name__.lower()
TagNode.__init__(self, parent, match, content, context)
def parse(self):
return '<%s>%s</%s>' % (self.tagname, self.parse_inner(), self.tagname)
def __str__(self):
return 'ReplaceTagNode: %s' % self.__class__.__name__
class ArgumentTagNode(TagNode):
"""
TagNode which takes one (or no) argument. Open pattern must have a named
group 'argument'.
"""
def __init__(self, parent, match, content, context):
TagNode.__init__(self, parent, match, content, context)
arg = match.group('argument')
self.argument = self.variables.lazy_resolve(arg.strip('"') if arg else '')
def __str__(self):
return '%s (%s)' % (self.__class__.__name__, self.argument)
class _MultiArgs(dict):
"""
Dictionary-like class which allows items to be accessed via attributes.
"""
def __getattr__(self, attr):
return dict.__getitem__(self, attr)
class MultiArgumentTagNode(TagNode):
"""
TagNode which takes multiple (or no) arguments. Must have an attribute
_arguments which holds key, value pairs of the arguments and their defaults.
Open pattern should use bbcode.patterns.argument as argument matching
expression.
"""
_arguments = []
def __init__(self, parent, match, content, context):
TagNode.__init__(self, parent, match, content, context)
args = match.groups()
kwargs = dict(self._arguments)
for index, value in enumerate(filter(bool, args)):
if not index or not index % 3:
continue
if not (index + 1) % 3:
kwargs[args[index - 1]] = self.variables.lazy_resolve(value)
self.arguments = _MultiArgs(kwargs)
def __str__(self):
args = []
for key, value in self.arguments.iteritems():
args.append('%s: %s' % (key, value))
return '%s (%s)' % (self.__class__.__name__, ', '.join(args))
class SelfClosingTagNode(TagNode):
"""
A tag which is self closed.
"""
close_pattern = patterns.unmatchable
def __init__(self, parent, match, content, context):
self.start = match.start()
self.context = context
self.fullcontent = content
self.raw_content = content[match.start():match.end()]
self.parent = parent
self.match = match
self.nodes = []
self.variables = parent.variables
def pushed(self):
"""
A self closing node returns it's parent. Thus it will never have child
nodes!
"""
return self.parent
def __str__(self):
return 'SelfClosingTag: %s' % self.__class__.__name__
class AutoDict(dict):
def __init__(self, default_thing=set, *args, **kwargs):
self.__default_thing = default_thing
dict.__init__(self, *args, **kwargs)
def __getitem__(self, item):
if not dict.__contains__(self, item):
dict.__setitem__(self, item, self.__default_thing() if callable(self.__default_thing) else self.__default_thing)
return dict.__getitem__(self, item)
class Library(object):
"""
The core of the BBCode parser. Keeps track of all bbcode tags and text
parsers. Also handles building BBCode Parse Trees and the automated help
generation.
"""
name_pat1 = re.compile('([a-z0-9])([A-Z])')
name_pat2 = re.compile('(.)([A-Z][a-z]+)')
def __init__(self):
self.names = AutoDict(None)
self.raw_names = {}
self.tags = AutoDict(set)
self.klasses = AutoDict(None)
def convert(self, name):
"""
Convert a class name to something a bit more readable
"""
return self.name_pat1.sub(r'\1 \2', self.name_pat2.sub(r'\1 \2', name))
def dsparse(self, docs):
"""
Parse docstrings
"""
content, errors = parse(docs, strict=False, auto_discover=True)
return content
def get_default_namespaces(self, klass):
bits = klass.__module__.split('.')
return (bits[-1], bits[-3], klass.__name__.lower())
def register(self, klass):
"""
Register a BBCode Tag Node
"""
# Add the class to their namespaces.
if hasattr(klass, 'namespaces'):
for ns in klass.namespaces:
self.tags[ns].add(klass)
if not hasattr(klass, 'not_in_all') or not klass.not_in_all:
self.tags['__all__'].add(klass)
elif not hasattr(klass, 'not_in_all') or not klass.not_in_all:
self.tags['__all__'].add(klass)
if not hasattr(klass, 'namespaces'):
setattr(klass, 'namespaces', [])
d_namespaces = self.get_default_namespaces(klass)
for default in d_namespaces:
self.tags[default].add(klass)
for ns in reversed(d_namespaces):
klass.namespaces.insert(0, ns)
# Register documentation
docstrings = klass.__doc__
if hasattr(klass, 'tagname'):
tagname = klass.tagname
else:
tagname = klass.__name__.lower()
if docstrings:
if hasattr(klass, 'verbose_name'):
verbose_name = klass.verbose_name
else:
verbose_name = self.convert(klass.__name__)
self.names[tagname] = {'docs': docstrings.strip(),
'name': verbose_name,
'class': klass}
self.klasses[klass] = self.names[tagname]
self.raw_names[klass.__name__] = klass
def add_namespace(self, klass, *namespaces):
"""
Add a tag to a namespace or several namespaces
"""
if isinstance(klass, TagNode):
for namespace in namespaces:
self.tags[namespace].add(klass)
elif isinstance(klass, basestring):
if klass in self.raw_names:
self.add_namespace(self.raw_names[klass], *namespaces)
elif klass in self.names:
self.add_namespace(self.names[klass]['class'], *namespaces)
def remove_namespace(self, klass, *namespaces):
"""
Remove a tag from a namespace or several namespaces
"""
if isinstance(klass, TagNode):
for namespace in namespaces:
if klass in self.tags[namespace]:
self.tags[namespace].remove(klass)
elif isinstance(klass, basestring):
if klass in self.raw_names:
self.add_namespace(self.raw_names[klass], *namespaces)
elif klass in remove_namespace.names:
self.remove_namespace(self.names[klass]['class'], *namespaces)
def set_not_in_all(self, klass, flag=True):
"""
Set 'not_in_all' for a tag.
"""
if flag:
self.remove_namespace(klass, '__all__')
else:
self.add_namespace(klass, '__all__')
def get_help(self, *tags):
"""
Get help for a tag or for all tags.
Returns a dictionary with keys 'name', 'tag', 'docstring'.
"""
if not tags:
tags = self.get_tags()
help_objects = []
for tag in tags:
if issubclass(tag, Node):
obj = self.klasses[tag]
if obj is None:
continue
else:
obj = self.names[tag]
if obj is None:
continue
help_objects.append({'name': obj['name'],
'docstring': parse(obj['docs'], strict=False, auto_discover=True)[0],
'obj': obj['class']})
return help_objects
def get_tags(self, namespaces=None):
"""
Get a list of tag classes for the namespaces
"""
if namespaces is None:
namespaces = get_default_namespaces()
tags = set()
exclude = []
include = []
# Split the 'namespaces' into exclude and include namespaces
for ns in namespaces:
if ns.startswith('no-'):
_ns = ns[3:]
if _ns in self.tags:
exclude.append(_ns)
elif ns in self.tags:
include.append(ns)
# Include first
if not include or '__all__' in include:
tags = set(self.tags['__all__'])
else:
if 'base' in include:
tags = set(self.tags['__all__'])
for ns in include:
tags = tags.union(self.tags[ns])
# Then exclude
for ns in exclude:
tags = tags.difference(self.tags[ns])
return tags
def get_taglist(self, content, namespaces=None):
"""
Get the tag-match list of a content for given namespaces
"""
if namespaces is None:
namespaces = get_default_namespaces()
tags = self.get_tags(namespaces)
# Build tag list
taglist = []
for tagklass in tags:
op = tagklass.open_pattern
if callable(op):
op = op()
i = 1
for match in op.finditer(content):
i += 1
taglist.append((match.start(), match, tagklass, True))
cp = tagklass.close_pattern
if callable(cp):
cp = cp()
for match in cp.finditer(content):
taglist.append((match.start(), match, tagklass, False))
# Sort by position
return sorted(taglist)
def get_parse_tree(self, content, namespaces=None, context=None):
"""
Prepare content for parsing.
Returns a HeadNode instance
"""
if namespaces is None:
namespaces = get_default_namespaces()
taglist = self.get_taglist(content, namespaces)
# Get headnode
headnode = HeadNode(content, context)
lastpos = 0
currentnode = headnode
# Loop over tag matches
for pos, match, tagklass, opener in taglist:
start, end = match.span()
# Prevent tags matching within other tags (eg AutoDetectURL)
if start < lastpos:
continue
# Append text between last tag and this one
text = content[lastpos:start]
if text:
currentnode.append(text)
# Set new position
lastpos = end
# Get line number for soft exceptions
lineno = content[:start].count('\n') + 1
sem.set_line_number(lineno)
# if opener, push new node
if opener:
currentnode = currentnode.push(tagklass, match, content)
# else close the tag
else:
# pull all unclosed child tags of the current node
while tagklass != currentnode.__class__:
try:
currentnode = currentnode.pull(end)
except ParserError:
sem.soft_raise("Error: BBCode could not be parsed. There are probably unclosed or uneven tags!")
raise ParserError, "Failed to find matching opening tag for closing tag '%s' in line %s." % (get_tag_name(tagklass), lineno)
# close the node
currentnode = currentnode.close(end)
text = content[lastpos:]
if text:
headnode.append(text)
# Return the head node
return headnode
def get_visual_parse_tree(self, content, namespaces=None, indent=4):
if namespaces is None:
namespaces = get_default_namespaces()
def recurse(nodes, level, indent):
cindent = level * indent
sindent = ' ' * cindent
next = level + 1
l = []
for node in nodes:
l.append('%s-%s' % (sindent, str(node)))
l += recurse(node.nodes, next, indent)
return l
try:
head = self.get_parse_tree(content, namespaces)
except ParserError:
return '-Parse Error'
visuals = ['-HeadNode']
visuals += recurse(head.nodes, 1, indent)
return '\n'.join(visuals)
def validate(self, content, namespaces=None, auto_discover=False):
"""
Validates a given content and returns the errors or an empty sequence.
"""
if namespaces is None:
namespaces = get_default_namespaces()
if auto_discover:
autodiscover()
try:
headnode = self.get_parse_tree(content, namespaces)
except ParserError:
return sem.pull()
parsed = headnode.parse()
return sem.pull()
lib = Library()
register = lib.register
validate = lib.validate
get_help = lib.get_help
get_visual = lib.get_visual_parse_tree
def get_default_namespaces():
from django.conf import settings
if hasattr(settings, 'BBCODE_DEFAULT_NAMESPACES'):
return settings.BBCODE_DEFAULT_NAMESPACES
return ['__all__']
def parse(content, namespaces=None, strict=False, auto_discover=False,
context=None):
"""
Parse a content with the BBCodes
"""
if auto_discover:
autodiscover()
if namespaces is None:
namespaces = get_default_namespaces()
# Fix windows linefeeds
content = content.replace('\r','')
# Get head node
if strict:
head = lib.get_parse_tree(content, namespaces, context)
else:
try:
head = lib.get_parse_tree(content, namespaces, context)
except ParserError:
return convert_linefeeds(content), sem.pull()
# parse BB Codes
content = head.parse()
# fix urls
from utils.helpers import rhizome_urlize
content = rhizome_urlize(content)
# Replace linefeeds
content = convert_linefeeds(content)
return content, sem.pull()
def autodiscover():
"""
Automatically register all bbcode tags. This searches the 'bbtags' modules
of all INSTALLED_APPS if available.
"""
global AUTODISCOVERED
if AUTODISCOVERED:
return
import imp
from django.conf import settings
import os
for app in settings.INSTALLED_APPS:
try:
module = __import__(app, {}, {}, [app.split('.')[-1]])
app_path = module.__path__
except AttributeError:
continue
try:
imp.find_module('bbtags', app_path)
except ImportError:
continue
for f in os.listdir(os.path.join(os.path.dirname(os.path.abspath(module.__file__)), 'bbtags')):
mod_name, ext = os.path.splitext(f)
if ext == '.py':
__import__("%s.bbtags.%s" % (app, mod_name))
AUTODISCOVERED = True