view gpp/core/html.py @ 552:9e42e6618168

For bitbucket issue #2, tweak the admin settings for the Post model to reduce slow queries. Define our own queryset() method so we can control the select_related(), and not have it cascade from post to topics to forums to categories. Removed 'topic' from list_display because MySQL still sucked with 2 inner joins. Now it seems to be tolerable with only one join to User.
author Brian Neal <bgneal@gmail.com>
date Wed, 25 Jan 2012 20:07:03 -0600
parents b3b11edf91d8
children
line wrap: on
line source
import html5lib
from html5lib import sanitizer, treebuilders, treewalkers, serializer

def sanitizer_factory(*args, **kwargs):
    san = sanitizer.HTMLSanitizer(*args, **kwargs)
    # This isn't available yet
    # san.strip_tokens = True
    return san

def clean_html(buf):
    """Cleans HTML of dangerous tags and content."""
    buf = buf.strip()
    if not buf:
        return buf

    p = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("dom"),
            tokenizer=sanitizer_factory)
    dom_tree = p.parseFragment(buf)

    walker = treewalkers.getTreeWalker("dom")
    stream = walker(dom_tree)

    s = serializer.htmlserializer.HTMLSerializer(
            omit_optional_tags=False,
            quote_attr_values=True)
    return s.render(stream) 

# vim: ts=4 sw=4