Mercurial > public > sg101
comparison core/html.py @ 581:ee87ea74d46b
For Django 1.4, rearranged project structure for new manage.py.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 05 May 2012 17:10:48 -0500 |
parents | gpp/core/html.py@b3b11edf91d8 |
children | ff645a692791 |
comparison
equal
deleted
inserted
replaced
580:c525f3e0b5d0 | 581:ee87ea74d46b |
---|---|
1 import html5lib | |
2 from html5lib import sanitizer, treebuilders, treewalkers, serializer | |
3 | |
4 def sanitizer_factory(*args, **kwargs): | |
5 san = sanitizer.HTMLSanitizer(*args, **kwargs) | |
6 # This isn't available yet | |
7 # san.strip_tokens = True | |
8 return san | |
9 | |
10 def clean_html(buf): | |
11 """Cleans HTML of dangerous tags and content.""" | |
12 buf = buf.strip() | |
13 if not buf: | |
14 return buf | |
15 | |
16 p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"), | |
17 tokenizer=sanitizer_factory) | |
18 dom_tree = p.parseFragment(buf) | |
19 | |
20 walker = treewalkers.getTreeWalker("dom") | |
21 stream = walker(dom_tree) | |
22 | |
23 s = serializer.htmlserializer.HTMLSerializer( | |
24 omit_optional_tags=False, | |
25 quote_attr_values=True) | |
26 return s.render(stream) | |
27 | |
28 # vim: ts=4 sw=4 |