Mercurial > public > sg101
diff core/html.py @ 581:ee87ea74d46b
For Django 1.4, rearranged project structure for new manage.py.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 05 May 2012 17:10:48 -0500 |
parents | gpp/core/html.py@b3b11edf91d8 |
children | ff645a692791 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/html.py Sat May 05 17:10:48 2012 -0500 @@ -0,0 +1,28 @@ +import html5lib +from html5lib import sanitizer, treebuilders, treewalkers, serializer + +def sanitizer_factory(*args, **kwargs): + san = sanitizer.HTMLSanitizer(*args, **kwargs) + # This isn't available yet + # san.strip_tokens = True + return san + +def clean_html(buf): + """Cleans HTML of dangerous tags and content.""" + buf = buf.strip() + if not buf: + return buf + + p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"), + tokenizer=sanitizer_factory) + dom_tree = p.parseFragment(buf) + + walker = treewalkers.getTreeWalker("dom") + stream = walker(dom_tree) + + s = serializer.htmlserializer.HTMLSerializer( + omit_optional_tags=False, + quote_attr_values=True) + return s.render(stream) + +# vim: ts=4 sw=4