annotate core/mdexts/urlize.py @ 861:e4f8d87c3d30

Configure Markdown logger to reduce noise in logs. Markdown is logging at the INFO level whenever it loads an extension. This looks like it has been fixed in master at GitHub. But until then we will explicitly configure the MARKDOWN logger to log at WARNING or higher.
author Brian Neal <bgneal@gmail.com>
date Mon, 01 Dec 2014 18:36:27 -0600
parents 32ebe22f0cad
children
rev   line source
bgneal@848 1 """
bgneal@848 2 This is a Python Markdown extension to automatically urlize text.
bgneal@848 3 Originally found here:
bgneal@848 4
bgneal@848 5 https://github.com/r0wb0t/markdown-urlize
bgneal@848 6
bgneal@848 7 Modified by Brian Neal to update doctest for Python Markdown 2.5.x (extension
bgneal@848 8 parameter changes).
bgneal@848 9
bgneal@848 10 Copyright (c) 2014 Rowan Nairn
bgneal@848 11 All rights reserved.
bgneal@848 12 Redistribution and use in source and binary forms, with or without
bgneal@848 13 modification, are permitted provided that the following conditions are
bgneal@848 14 met:
bgneal@848 15 1. Redistributions of source code must retain the above copyright
bgneal@848 16 notice, this list of conditions and the following disclaimer.
bgneal@848 17 2. Redistributions in binary form must reproduce the above copyright
bgneal@848 18 notice, this list of conditions and the following disclaimer in the
bgneal@848 19 documentation and/or other materials provided with the distribution.
bgneal@848 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
bgneal@848 21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
bgneal@848 22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
bgneal@848 23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
bgneal@848 24 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
bgneal@848 25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
bgneal@848 26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
bgneal@848 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
bgneal@848 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
bgneal@848 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
bgneal@848 30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bgneal@848 31
bgneal@848 32 A more liberal autolinker
bgneal@356 33
bgneal@356 34 Inspired by Django's urlize function.
bgneal@356 35
bgneal@356 36 Positive examples:
bgneal@356 37
bgneal@356 38 >>> import markdown
bgneal@848 39 >>> md = markdown.Markdown(extensions=[UrlizeExtension()])
bgneal@356 40
bgneal@356 41 >>> md.convert('http://example.com/')
bgneal@356 42 u'<p><a href="http://example.com/">http://example.com/</a></p>'
bgneal@356 43
bgneal@356 44 >>> md.convert('go to http://example.com')
bgneal@356 45 u'<p>go to <a href="http://example.com">http://example.com</a></p>'
bgneal@356 46
bgneal@356 47 >>> md.convert('example.com')
bgneal@356 48 u'<p><a href="http://example.com">example.com</a></p>'
bgneal@356 49
bgneal@356 50 >>> md.convert('example.net')
bgneal@356 51 u'<p><a href="http://example.net">example.net</a></p>'
bgneal@356 52
bgneal@356 53 >>> md.convert('www.example.us')
bgneal@356 54 u'<p><a href="http://www.example.us">www.example.us</a></p>'
bgneal@356 55
bgneal@356 56 >>> md.convert('(www.example.us/path/?name=val)')
bgneal@356 57 u'<p>(<a href="http://www.example.us/path/?name=val">www.example.us/path/?name=val</a>)</p>'
bgneal@356 58
bgneal@356 59 >>> md.convert('go to <http://example.com> now!')
bgneal@356 60 u'<p>go to <a href="http://example.com">http://example.com</a> now!</p>'
bgneal@356 61
bgneal@356 62 Negative examples:
bgneal@356 63
bgneal@356 64 >>> md.convert('del.icio.us')
bgneal@356 65 u'<p>del.icio.us</p>'
bgneal@356 66
bgneal@356 67 """
bgneal@356 68
bgneal@356 69 import markdown
bgneal@356 70
bgneal@356 71 # Global Vars
bgneal@356 72 URLIZE_RE = '(%s)' % '|'.join([
bgneal@356 73 r'<(?:f|ht)tps?://[^>]*>',
bgneal@356 74 r'\b(?:f|ht)tps?://[^)<>\s]+[^.,)<>\s]',
bgneal@356 75 r'\bwww\.[^)<>\s]+[^.,)<>\s]',
bgneal@356 76 r'[^(<\s]+\.(?:com|net|org)\b',
bgneal@356 77 ])
bgneal@356 78
bgneal@356 79 class UrlizePattern(markdown.inlinepatterns.Pattern):
bgneal@356 80 """ Return a link Element given an autolink (`http://example/com`). """
bgneal@356 81 def handleMatch(self, m):
bgneal@356 82 url = m.group(2)
bgneal@848 83
bgneal@356 84 if url.startswith('<'):
bgneal@356 85 url = url[1:-1]
bgneal@848 86
bgneal@356 87 text = url
bgneal@848 88
bgneal@356 89 if not url.split('://')[0] in ('http','https','ftp'):
bgneal@356 90 if '@' in url and not '/' in url:
bgneal@356 91 url = 'mailto:' + url
bgneal@356 92 else:
bgneal@356 93 url = 'http://' + url
bgneal@848 94
bgneal@658 95 el = markdown.util.etree.Element("a")
bgneal@356 96 el.set('href', url)
bgneal@658 97 el.text = markdown.util.AtomicString(text)
bgneal@356 98 return el
bgneal@356 99
bgneal@356 100 class UrlizeExtension(markdown.Extension):
bgneal@356 101 """ Urlize Extension for Python-Markdown. """
bgneal@356 102
bgneal@356 103 def extendMarkdown(self, md, md_globals):
bgneal@356 104 """ Replace autolink with UrlizePattern """
bgneal@356 105 md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
bgneal@356 106
bgneal@356 107 def makeExtension(configs=None):
bgneal@356 108 return UrlizeExtension(configs=configs)
bgneal@356 109
bgneal@356 110 if __name__ == "__main__":
bgneal@356 111 import doctest
bgneal@356 112 doctest.testmod()