Mercurial > public > sg101
view core/html.py @ 697:67f8d49a9377
Cleaned up the code a bit.
Separated the S3 stuff out into its own class.
This class maybe should be in core.
Still want to do some kind of context manager around the temporary file we are
creating to ensure it gets deleted.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sun, 08 Sep 2013 21:02:58 -0500 |
parents | ee87ea74d46b |
children | ff645a692791 |
line wrap: on
line source
import html5lib from html5lib import sanitizer, treebuilders, treewalkers, serializer def sanitizer_factory(*args, **kwargs): san = sanitizer.HTMLSanitizer(*args, **kwargs) # This isn't available yet # san.strip_tokens = True return san def clean_html(buf): """Cleans HTML of dangerous tags and content.""" buf = buf.strip() if not buf: return buf p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"), tokenizer=sanitizer_factory) dom_tree = p.parseFragment(buf) walker = treewalkers.getTreeWalker("dom") stream = walker(dom_tree) s = serializer.htmlserializer.HTMLSerializer( omit_optional_tags=False, quote_attr_values=True) return s.render(stream) # vim: ts=4 sw=4