comparison core/html.py @ 581:ee87ea74d46b

For Django 1.4, rearranged project structure for new manage.py.
author Brian Neal <bgneal@gmail.com>
date Sat, 05 May 2012 17:10:48 -0500
parents gpp/core/html.py@b3b11edf91d8
children ff645a692791
comparison
equal deleted inserted replaced
580:c525f3e0b5d0 581:ee87ea74d46b
1 import html5lib
2 from html5lib import sanitizer, treebuilders, treewalkers, serializer
3
4 def sanitizer_factory(*args, **kwargs):
5 san = sanitizer.HTMLSanitizer(*args, **kwargs)
6 # This isn't available yet
7 # san.strip_tokens = True
8 return san
9
10 def clean_html(buf):
11 """Cleans HTML of dangerous tags and content."""
12 buf = buf.strip()
13 if not buf:
14 return buf
15
16 p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"),
17 tokenizer=sanitizer_factory)
18 dom_tree = p.parseFragment(buf)
19
20 walker = treewalkers.getTreeWalker("dom")
21 stream = walker(dom_tree)
22
23 s = serializer.htmlserializer.HTMLSerializer(
24 omit_optional_tags=False,
25 quote_attr_values=True)
26 return s.render(stream)
27
28 # vim: ts=4 sw=4