bgneal@848: """
bgneal@848: This is a Python Markdown extension to automatically urlize text.
bgneal@848: Originally found here:
bgneal@848: 
bgneal@848: https://github.com/r0wb0t/markdown-urlize
bgneal@848: 
bgneal@848: Modified by Brian Neal to update doctest for Python Markdown 2.5.x (extension
bgneal@848: parameter changes).
bgneal@848: 
bgneal@848: Copyright (c) 2014 Rowan Nairn
bgneal@848: All rights reserved.
bgneal@848: Redistribution and use in source and binary forms, with or without
bgneal@848: modification, are permitted provided that the following conditions are
bgneal@848: met:
bgneal@848: 1. Redistributions of source code must retain the above copyright
bgneal@848: notice, this list of conditions and the following disclaimer.
bgneal@848: 2. Redistributions in binary form must reproduce the above copyright
bgneal@848: notice, this list of conditions and the following disclaimer in the
bgneal@848: documentation and/or other materials provided with the distribution.
bgneal@848: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
bgneal@848: "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
bgneal@848: LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
bgneal@848: A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
bgneal@848: HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
bgneal@848: SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
bgneal@848: LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
bgneal@848: DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
bgneal@848: THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
bgneal@848: (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
bgneal@848: OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bgneal@848: 
bgneal@848: A more liberal autolinker
bgneal@356: 
bgneal@356: Inspired by Django's urlize function.
bgneal@356: 
bgneal@356: Positive examples:
bgneal@356: 
bgneal@356: >>> import markdown
bgneal@848: >>> md = markdown.Markdown(extensions=[UrlizeExtension()])
bgneal@356: 
bgneal@356: >>> md.convert('http://example.com/')
bgneal@356: u'<p><a href="http://example.com/">http://example.com/</a></p>'
bgneal@356: 
bgneal@356: >>> md.convert('go to http://example.com')
bgneal@356: u'<p>go to <a href="http://example.com">http://example.com</a></p>'
bgneal@356: 
bgneal@356: >>> md.convert('example.com')
bgneal@356: u'<p><a href="http://example.com">example.com</a></p>'
bgneal@356: 
bgneal@356: >>> md.convert('example.net')
bgneal@356: u'<p><a href="http://example.net">example.net</a></p>'
bgneal@356: 
bgneal@356: >>> md.convert('www.example.us')
bgneal@356: u'<p><a href="http://www.example.us">www.example.us</a></p>'
bgneal@356: 
bgneal@356: >>> md.convert('(www.example.us/path/?name=val)')
bgneal@356: u'<p>(<a href="http://www.example.us/path/?name=val">www.example.us/path/?name=val</a>)</p>'
bgneal@356: 
bgneal@356: >>> md.convert('go to <http://example.com> now!')
bgneal@356: u'<p>go to <a href="http://example.com">http://example.com</a> now!</p>'
bgneal@356: 
bgneal@356: Negative examples:
bgneal@356: 
bgneal@356: >>> md.convert('del.icio.us')
bgneal@356: u'<p>del.icio.us</p>'
bgneal@356: 
bgneal@356: """
bgneal@356: 
bgneal@356: import markdown
bgneal@356: 
bgneal@356: # Global Vars
bgneal@356: URLIZE_RE = '(%s)' % '|'.join([
bgneal@356:     r'<(?:f|ht)tps?://[^>]*>',
bgneal@356:     r'\b(?:f|ht)tps?://[^)<>\s]+[^.,)<>\s]',
bgneal@356:     r'\bwww\.[^)<>\s]+[^.,)<>\s]',
bgneal@356:     r'[^(<\s]+\.(?:com|net|org)\b',
bgneal@356: ])
bgneal@356: 
bgneal@356: class UrlizePattern(markdown.inlinepatterns.Pattern):
bgneal@356:     """ Return a link Element given an autolink (`http://example/com`). """
bgneal@356:     def handleMatch(self, m):
bgneal@356:         url = m.group(2)
bgneal@848: 
bgneal@356:         if url.startswith('<'):
bgneal@356:             url = url[1:-1]
bgneal@848: 
bgneal@356:         text = url
bgneal@848: 
bgneal@356:         if not url.split('://')[0] in ('http','https','ftp'):
bgneal@356:             if '@' in url and not '/' in url:
bgneal@356:                 url = 'mailto:' + url
bgneal@356:             else:
bgneal@356:                 url = 'http://' + url
bgneal@848: 
bgneal@658:         el = markdown.util.etree.Element("a")
bgneal@356:         el.set('href', url)
bgneal@658:         el.text = markdown.util.AtomicString(text)
bgneal@356:         return el
bgneal@356: 
bgneal@356: class UrlizeExtension(markdown.Extension):
bgneal@356:     """ Urlize Extension for Python-Markdown. """
bgneal@356: 
bgneal@356:     def extendMarkdown(self, md, md_globals):
bgneal@356:         """ Replace autolink with UrlizePattern """
bgneal@356:         md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
bgneal@356: 
bgneal@356: def makeExtension(configs=None):
bgneal@356:     return UrlizeExtension(configs=configs)
bgneal@356: 
bgneal@356: if __name__ == "__main__":
bgneal@356:     import doctest
bgneal@356:     doctest.testmod()