Mercurial > public > sg101
view gpp/core/html.py @ 339:b871892264f2
Adding the sg101 IRC bot code to SVN. This code is pretty rough and needs love, but it gets the job done (one of my first Python apps). This fixes #150.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 26 Feb 2011 21:27:49 +0000 |
parents | b3b11edf91d8 |
children |
line wrap: on
line source
import html5lib from html5lib import sanitizer, treebuilders, treewalkers, serializer def sanitizer_factory(*args, **kwargs): san = sanitizer.HTMLSanitizer(*args, **kwargs) # This isn't available yet # san.strip_tokens = True return san def clean_html(buf): """Cleans HTML of dangerous tags and content.""" buf = buf.strip() if not buf: return buf p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"), tokenizer=sanitizer_factory) dom_tree = p.parseFragment(buf) walker = treewalkers.getTreeWalker("dom") stream = walker(dom_tree) s = serializer.htmlserializer.HTMLSerializer( omit_optional_tags=False, quote_attr_values=True) return s.render(stream) # vim: ts=4 sw=4