annotate core/html.py @ 861:e4f8d87c3d30

Configure Markdown logger to reduce noise in logs. Markdown is logging at the INFO level whenever it loads an extension. This looks like it has been fixed in master at GitHub. But until then we will explicitly configure the MARKDOWN logger to log at WARNING or higher.
author Brian Neal <bgneal@gmail.com>
date Mon, 01 Dec 2014 18:36:27 -0600
parents ff645a692791
children 928b97ec55a7
rev   line source
bgneal@849 1 """Common HTML related functions"""
bgneal@849 2 import bleach
bgneal@9 3
bgneal@9 4
bgneal@849 5 # Each entry in the _CLEAN_PROFILES dict is a profile name -> 3-tuple pair. The
bgneal@849 6 # tuple consists of (allowed_tags_list, allowed_attributes_dict,
bgneal@849 7 # allowed_styles_list)
bgneal@849 8 #
bgneal@849 9 _CLEAN_PROFILES = {
bgneal@849 10 'comments': (
bgneal@849 11 [
bgneal@849 12 'a', 'b', 'blockquote', 'br', 'code', 'del', 'em',
bgneal@849 13 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
bgneal@849 14 'i', 'img', 'li', 'ol', 'p', 'pre', 'strong', 'ul',
bgneal@849 15 ],
bgneal@849 16 {
bgneal@849 17 'a': ['href'],
bgneal@849 18 'img': ['src', 'alt', 'title'],
bgneal@849 19 },
bgneal@849 20 [],
bgneal@849 21 ),
bgneal@849 22 'news': (
bgneal@849 23 [
bgneal@849 24 'a', 'b', 'blockquote', 'br', 'caption', 'center', 'code', 'col',
bgneal@849 25 'colgroup', 'dd', 'del', 'div', 'dl', 'dt', 'em',
bgneal@849 26 'h1', 'h2', 'h3', 'h4', 'h5', 'h6',
bgneal@849 27 'i', 'img', 'ins', 'li', 'ol', 'p', 'pre', 'small', 'strike',
bgneal@849 28 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'tfoot', 'th',
bgneal@849 29 'thead', 'tr', 'tt', 'u', 'ul',
bgneal@849 30 ],
bgneal@849 31 {
bgneal@849 32 'a': ['href'],
bgneal@849 33 'img': ['src', 'alt', 'title', 'width', 'height'],
bgneal@849 34 },
bgneal@849 35 [],
bgneal@849 36 ),
bgneal@849 37 }
bgneal@849 38
bgneal@849 39
bgneal@849 40 def clean_html(text, profile='comments'):
bgneal@9 41 """Cleans HTML of dangerous tags and content."""
bgneal@849 42 text = text.strip()
bgneal@849 43 if not text:
bgneal@849 44 return text
bgneal@9 45
bgneal@849 46 tags, attrs, styles = _CLEAN_PROFILES[profile]
bgneal@9 47
bgneal@849 48 return bleach.clean(text, tags=tags, attributes=attrs, styles=styles,
bgneal@849 49 strip=True, strip_comments=True)