annotate core/html.py @ 850:202e0828aafe

Update requirements.txt for latest html5lib.
author Brian Neal <bgneal@gmail.com>
date Thu, 30 Oct 2014 19:40:26 -0500
parents ff645a692791
children 928b97ec55a7
rev   line source
bgneal@849 1 """Common HTML related functions"""
bgneal@849 2 import bleach
bgneal@9 3
bgneal@9 4
bgneal@849 5 # Each entry in the _CLEAN_PROFILES dict is a profile name -> 3-tuple pair. The
bgneal@849 6 # tuple consists of (allowed_tags_list, allowed_attributes_dict,
bgneal@849 7 # allowed_styles_list)
bgneal@849 8 #
bgneal@849 9 _CLEAN_PROFILES = {
bgneal@849 10 'comments': (
bgneal@849 11 [
bgneal@849 12 'a', 'b', 'blockquote', 'br', 'code', 'del', 'em',
bgneal@849 13 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
bgneal@849 14 'i', 'img', 'li', 'ol', 'p', 'pre', 'strong', 'ul',
bgneal@849 15 ],
bgneal@849 16 {
bgneal@849 17 'a': ['href'],
bgneal@849 18 'img': ['src', 'alt', 'title'],
bgneal@849 19 },
bgneal@849 20 [],
bgneal@849 21 ),
bgneal@849 22 'news': (
bgneal@849 23 [
bgneal@849 24 'a', 'b', 'blockquote', 'br', 'caption', 'center', 'code', 'col',
bgneal@849 25 'colgroup', 'dd', 'del', 'div', 'dl', 'dt', 'em',
bgneal@849 26 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
bgneal@849 27 'i', 'img', 'ins', 'li', 'ol', 'p', 'pre', 'small', 'strike',
bgneal@849 28 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th',
bgneal@849 29 'thead', 'tr', 'tt', 'u', 'ul',
bgneal@849 30 ],
bgneal@849 31 {
bgneal@849 32 'a': ['href'],
bgneal@849 33 'img': ['src', 'alt', 'title', 'width', 'height'],
bgneal@849 34 },
bgneal@849 35 [],
bgneal@849 36 ),
bgneal@849 37 }
bgneal@849 38
bgneal@849 39
bgneal@849 40 def clean_html(text, profile='comments'):
bgneal@9 41 """Cleans HTML of dangerous tags and content."""
bgneal@849 42 text = text.strip()
bgneal@849 43 if not text:
bgneal@849 44 return text
bgneal@9 45
bgneal@849 46 tags, attrs, styles = _CLEAN_PROFILES[profile]
bgneal@9 47
bgneal@849 48 return bleach.clean(text, tags=tags, attributes=attrs, styles=styles,
bgneal@849 49 strip=True, strip_comments=True)