annotate core/mdexts/urlize.py @ 1205:510ef3cbf3e6 modernize

Getting SG101 running on my macbook. This is the start of a branch to modernize the SG101 website.
author Brian Neal <bgneal@gmail.com>
date Sat, 04 Jan 2025 21:34:31 -0600
parents 32ebe22f0cad
children
rev   line source
bgneal@848 1 """
bgneal@848 2 This is a Python Markdown extension to automatically urlize text.
bgneal@848 3 Originally found here:
bgneal@848 4
bgneal@848 5 https://github.com/r0wb0t/markdown-urlize
bgneal@848 6
bgneal@848 7 Modified by Brian Neal to update doctest for Python Markdown 2.5.x (extension
bgneal@848 8 parameter changes).
bgneal@848 9
bgneal@848 10 Copyright (c) 2014 Rowan Nairn
bgneal@848 11 All rights reserved.
bgneal@848 12 Redistribution and use in source and binary forms, with or without
bgneal@848 13 modification, are permitted provided that the following conditions are
bgneal@848 14 met:
bgneal@848 15 1. Redistributions of source code must retain the above copyright
bgneal@848 16 notice, this list of conditions and the following disclaimer.
bgneal@848 17 2. Redistributions in binary form must reproduce the above copyright
bgneal@848 18 notice, this list of conditions and the following disclaimer in the
bgneal@848 19 documentation and/or other materials provided with the distribution.
bgneal@848 20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
bgneal@848 21 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
bgneal@848 22 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
bgneal@848 23 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
bgneal@848 24 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
bgneal@848 25 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
bgneal@848 26 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
bgneal@848 27 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
bgneal@848 28 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
bgneal@848 29 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
bgneal@848 30 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
bgneal@848 31
bgneal@848 32 A more liberal autolinker
bgneal@356 33
bgneal@356 34 Inspired by Django's urlize function.
bgneal@356 35
bgneal@356 36 Positive examples:
bgneal@356 37
bgneal@356 38 >>> import markdown
bgneal@848 39 >>> md = markdown.Markdown(extensions=[UrlizeExtension()])
bgneal@356 40
bgneal@356 41 >>> md.convert('http://example.com/')
bgneal@356 42 u'<p><a href="http://example.com/">http://example.com/</a></p>'
bgneal@356 43
bgneal@356 44 >>> md.convert('go to http://example.com')
bgneal@356 45 u'<p>go to <a href="http://example.com">http://example.com</a></p>'
bgneal@356 46
bgneal@356 47 >>> md.convert('example.com')
bgneal@356 48 u'<p><a href="http://example.com">example.com</a></p>'
bgneal@356 49
bgneal@356 50 >>> md.convert('example.net')
bgneal@356 51 u'<p><a href="http://example.net">example.net</a></p>'
bgneal@356 52
bgneal@356 53 >>> md.convert('www.example.us')
bgneal@356 54 u'<p><a href="http://www.example.us">www.example.us</a></p>'
bgneal@356 55
bgneal@356 56 >>> md.convert('(www.example.us/path/?name=val)')
bgneal@356 57 u'<p>(<a href="http://www.example.us/path/?name=val">www.example.us/path/?name=val</a>)</p>'
bgneal@356 58
bgneal@356 59 >>> md.convert('go to <http://example.com> now!')
bgneal@356 60 u'<p>go to <a href="http://example.com">http://example.com</a> now!</p>'
bgneal@356 61
bgneal@356 62 Negative examples:
bgneal@356 63
bgneal@356 64 >>> md.convert('del.icio.us')
bgneal@356 65 u'<p>del.icio.us</p>'
bgneal@356 66
bgneal@356 67 """
bgneal@356 68
bgneal@356 69 import markdown
bgneal@356 70
bgneal@356 71 # Global Vars
bgneal@356 72 URLIZE_RE = '(%s)' % '|'.join([
bgneal@356 73 r'<(?:f|ht)tps?://[^>]*>',
bgneal@356 74 r'\b(?:f|ht)tps?://[^)<>\s]+[^.,)<>\s]',
bgneal@356 75 r'\bwww\.[^)<>\s]+[^.,)<>\s]',
bgneal@356 76 r'[^(<\s]+\.(?:com|net|org)\b',
bgneal@356 77 ])
bgneal@356 78
bgneal@356 79 class UrlizePattern(markdown.inlinepatterns.Pattern):
bgneal@356 80 """ Return a link Element given an autolink (`http://example/com`). """
bgneal@356 81 def handleMatch(self, m):
bgneal@356 82 url = m.group(2)
bgneal@848 83
bgneal@356 84 if url.startswith('<'):
bgneal@356 85 url = url[1:-1]
bgneal@848 86
bgneal@356 87 text = url
bgneal@848 88
bgneal@356 89 if not url.split('://')[0] in ('http','https','ftp'):
bgneal@356 90 if '@' in url and not '/' in url:
bgneal@356 91 url = 'mailto:' + url
bgneal@356 92 else:
bgneal@356 93 url = 'http://' + url
bgneal@848 94
bgneal@658 95 el = markdown.util.etree.Element("a")
bgneal@356 96 el.set('href', url)
bgneal@658 97 el.text = markdown.util.AtomicString(text)
bgneal@356 98 return el
bgneal@356 99
bgneal@356 100 class UrlizeExtension(markdown.Extension):
bgneal@356 101 """ Urlize Extension for Python-Markdown. """
bgneal@356 102
bgneal@356 103 def extendMarkdown(self, md, md_globals):
bgneal@356 104 """ Replace autolink with UrlizePattern """
bgneal@356 105 md.inlinePatterns['autolink'] = UrlizePattern(URLIZE_RE, md)
bgneal@356 106
bgneal@356 107 def makeExtension(configs=None):
bgneal@356 108 return UrlizeExtension(configs=configs)
bgneal@356 109
bgneal@356 110 if __name__ == "__main__":
bgneal@356 111 import doctest
bgneal@356 112 doctest.testmod()