Mercurial > public > pelican-blog
changeset 10:6c03ca07a16d
Renamed my tools directory to "tools".
I named it __bgn because I was worried it would clash with a future Pelican
updaet. But it seems like this would only happen if I re-ran the quickstart
script. "tools" is a better name. :)
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sun, 02 Feb 2014 11:32:13 -0600 |
parents | 271bed1181df |
children | 75a003a548c4 |
files | __bgn/import_blogophile.py tools/import_blogophile.py |
diffstat | 2 files changed, 174 insertions(+), 174 deletions(-) [+] |
line wrap: on
line diff
--- a/__bgn/import_blogophile.py Sat Feb 01 14:29:54 2014 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,174 +0,0 @@ -#!/usr/bin/env python -""" -A simple script to convert my Blogofile restructured text posts into the format -expected by Pelican. - -""" -# Copyright (C) 2014 by Brian Neal. -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -import datetime -import os -import re -import time - - -SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts') -DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding') - -TITLE_RE = re.compile(r'^title: (?P<title>.*)$') -DATE_RE = re.compile(r'^date: (?P<year>\d{4})/' - r'(?P<month>\d{1,2})/' - r'(?P<day>\d{1,2})\s*' - r'(?P<time>\d{2}:\d{2}:\d{2})\s*$') -CAT_RE = re.compile(r'^categories: (?P<cats>.*)$') - -PELICAN_FMT = """\ -{title} -{title_underline} - -:date: {date} -:tags: {tags} -:slug: {slug} -:author: Brian Neal - -{content} -""" - - -class ConvertError(Exception): - """Exception class for the conversion process""" - - -def slugify(s): - """Return a slug from the string s. - - This code must match what Blogofile was doing in order to keep the URLs the - same. In this case I had customized Blogfile's functionality based on a tip - by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/ - - """ - slug = s.lower() - - # convert ellipses to spaces - slug = re.sub(r'\.{2,}', ' ', slug) - - # flatten everything non alpha or . into a single - - slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug) - - # trim off leading/trailing - - slug = re.sub(r'^-+|-+$', '', slug) - return slug - - -def convert(src, dst): - """Convert Blogofile to Pelican.""" - print '{} -> {}'.format(src, dst) - meta, content = parse_input(src) - write_output(meta, content, dst) - - -def parse_input(src): - """Parse input Blogofile .rst input. - - Returns a 2-tuple: - meta - dictionary of Blogofile metadata - content - blog post body as a string - - """ - with open(src, 'r') as fp: - lines = fp.readlines() - - # Find meta block - for i, line in enumerate(lines): - if line == '---\n': - meta_start = i - break - else: - raise ConvertError("Can't find start of meta block") - - for i, line in enumerate(lines[meta_start + 1 :]): - if line == '---\n': - meta_end = meta_start + 1 + i - break - else: - raise ConvertError("Can't find end of meta block") - - meta_lines = lines[meta_start + 1 : meta_end] - meta = {} - for line in meta_lines: - m = TITLE_RE.match(line) - if m: - meta['title'] = m.group('title').strip() - continue - m = DATE_RE.match(line) - if m: - year = int(m.group('year')) - month = int(m.group('month')) - day = int(m.group('day')) - t = time.strptime(m.group('time'), '%H:%M:%S') - meta['date'] = datetime.datetime.combine( - datetime.date(year, month, day), - datetime.time(t.tm_hour, t.tm_min, t.tm_sec)) - continue - m = CAT_RE.match(line) - if m: - meta['categories'] = m.group('cats').replace(' ', '').split(',') - continue - - for k in ['title', 'date', 'categories']: - if k not in meta: - raise ConvertError("Missing {} in metadata".format(k)) - - content = ''.join(lines[meta_end + 1:]).strip() - return meta, content - - -def write_output(meta, content, dst): - """Create the Pelican style .rst file from the Blogofile metadata and - content. Output is written to the file specified by dst. - - """ - title = meta['title'] - date = meta['date'].strftime('%Y-%m-%d %H:%M') - tags = ', '.join(meta['categories']) - slug = slugify(title) - - post = PELICAN_FMT.format(title=title, - title_underline='#'*len(title), - date=date, - tags=tags, - slug=slug, - content=content) - - with open(dst, 'w') as fp: - fp.write(post) - - -if __name__ == '__main__': - for name in os.listdir(SRC_DIR): - if name.endswith('.rst'): - src = os.path.join(SRC_DIR, name) - dst = os.path.join(DST_DIR, name) - - try: - convert(src, dst) - except ConvertError as ex: - print "Error converting {}: {}".format(name, ex)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/import_blogophile.py Sun Feb 02 11:32:13 2014 -0600 @@ -0,0 +1,174 @@ +#!/usr/bin/env python +""" +A simple script to convert my Blogofile restructured text posts into the format +expected by Pelican. + +""" +# Copyright (C) 2014 by Brian Neal. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import datetime +import os +import re +import time + + +SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts') +DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding') + +TITLE_RE = re.compile(r'^title: (?P<title>.*)$') +DATE_RE = re.compile(r'^date: (?P<year>\d{4})/' + r'(?P<month>\d{1,2})/' + r'(?P<day>\d{1,2})\s*' + r'(?P<time>\d{2}:\d{2}:\d{2})\s*$') +CAT_RE = re.compile(r'^categories: (?P<cats>.*)$') + +PELICAN_FMT = """\ +{title} +{title_underline} + +:date: {date} +:tags: {tags} +:slug: {slug} +:author: Brian Neal + +{content} +""" + + +class ConvertError(Exception): + """Exception class for the conversion process""" + + +def slugify(s): + """Return a slug from the string s. + + This code must match what Blogofile was doing in order to keep the URLs the + same. In this case I had customized Blogfile's functionality based on a tip + by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/ + + """ + slug = s.lower() + + # convert ellipses to spaces + slug = re.sub(r'\.{2,}', ' ', slug) + + # flatten everything non alpha or . into a single - + slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug) + + # trim off leading/trailing - + slug = re.sub(r'^-+|-+$', '', slug) + return slug + + +def convert(src, dst): + """Convert Blogofile to Pelican.""" + print '{} -> {}'.format(src, dst) + meta, content = parse_input(src) + write_output(meta, content, dst) + + +def parse_input(src): + """Parse input Blogofile .rst input. + + Returns a 2-tuple: + meta - dictionary of Blogofile metadata + content - blog post body as a string + + """ + with open(src, 'r') as fp: + lines = fp.readlines() + + # Find meta block + for i, line in enumerate(lines): + if line == '---\n': + meta_start = i + break + else: + raise ConvertError("Can't find start of meta block") + + for i, line in enumerate(lines[meta_start + 1 :]): + if line == '---\n': + meta_end = meta_start + 1 + i + break + else: + raise ConvertError("Can't find end of meta block") + + meta_lines = lines[meta_start + 1 : meta_end] + meta = {} + for line in meta_lines: + m = TITLE_RE.match(line) + if m: + meta['title'] = m.group('title').strip() + continue + m = DATE_RE.match(line) + if m: + year = int(m.group('year')) + month = int(m.group('month')) + day = int(m.group('day')) + t = time.strptime(m.group('time'), '%H:%M:%S') + meta['date'] = datetime.datetime.combine( + datetime.date(year, month, day), + datetime.time(t.tm_hour, t.tm_min, t.tm_sec)) + continue + m = CAT_RE.match(line) + if m: + meta['categories'] = m.group('cats').replace(' ', '').split(',') + continue + + for k in ['title', 'date', 'categories']: + if k not in meta: + raise ConvertError("Missing {} in metadata".format(k)) + + content = ''.join(lines[meta_end + 1:]).strip() + return meta, content + + +def write_output(meta, content, dst): + """Create the Pelican style .rst file from the Blogofile metadata and + content. Output is written to the file specified by dst. + + """ + title = meta['title'] + date = meta['date'].strftime('%Y-%m-%d %H:%M') + tags = ', '.join(meta['categories']) + slug = slugify(title) + + post = PELICAN_FMT.format(title=title, + title_underline='#'*len(title), + date=date, + tags=tags, + slug=slug, + content=content) + + with open(dst, 'w') as fp: + fp.write(post) + + +if __name__ == '__main__': + for name in os.listdir(SRC_DIR): + if name.endswith('.rst'): + src = os.path.join(SRC_DIR, name) + dst = os.path.join(DST_DIR, name) + + try: + convert(src, dst) + except ConvertError as ex: + print "Error converting {}: {}".format(name, ex)