Mercurial > public > pelican-blog
diff __bgn/import_blogophile.py @ 2:b7be75ff95b0
Created a script to convert my Blogofile posts to Pelican.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Wed, 29 Jan 2014 21:32:04 -0600 |
parents | |
children | c3115da3ff73 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/__bgn/import_blogophile.py Wed Jan 29 21:32:04 2014 -0600 @@ -0,0 +1,175 @@ +#!/usr/bin/env python +""" +A simple script to convert my Blogofile restructured text posts into the format +expected by Pelican. + +""" +# Copyright (C) 2014 by Brian Neal. +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +import datetime +import os +import re +import time + + +SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts') +DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content') + +TITLE_RE = re.compile(r'^title: (?P<title>.*)$') +DATE_RE = re.compile(r'^date: (?P<year>\d{4})/' + r'(?P<month>\d{1,2})/' + r'(?P<day>\d{1,2})\s*' + r'(?P<time>\d{2}:\d{2}:\d{2})\s*$') +CAT_RE = re.compile(r'^categories: (?P<cats>.*)$') + +PELICAN_FMT = """\ +{title} +{title_underline} + +:date: {date} +:category: coding +:tags: {tags} +:slug: {slug} +:author: Brian Neal + +{content} +""" + + +class ConvertError(Exception): + """Exception class for the conversion process""" + + +def slugify(s): + """Return a slug from the string s. + + This code must match what Blogofile was doing in order to keep the URLs the + same. In this case I had customized Blogfile's functionality based on a tip + by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/ + + """ + slug = s.lower() + + # convert ellipses to spaces + slug = re.sub(r'\.{2,}', ' ', slug) + + # flatten everything non alpha or . into a single - + slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug) + + # trim off leading/trailing - + slug = re.sub(r'^-+|-+$', '', slug) + return slug + + +def convert(src, dst): + """Convert Blogofile to Pelican.""" + print '{} -> {}'.format(src, dst) + meta, content = parse_input(src) + write_output(meta, content, dst) + + +def parse_input(src): + """Parse input Blogofile .rst input. + + Returns a 2-tuple: + meta - dictionary of Blogofile metadata + content - blog post body as a string + + """ + with open(src, 'r') as fp: + lines = fp.readlines() + + # Find meta block + for i, line in enumerate(lines): + if line == '---\n': + meta_start = i + break + else: + raise ConvertError("Can't find start of meta block") + + for i, line in enumerate(lines[meta_start + 1 :]): + if line == '---\n': + meta_end = meta_start + 1 + i + break + else: + raise ConvertError("Can't find end of meta block") + + meta_lines = lines[meta_start + 1 : meta_end] + meta = {} + for line in meta_lines: + m = TITLE_RE.match(line) + if m: + meta['title'] = m.group('title').strip() + continue + m = DATE_RE.match(line) + if m: + year = int(m.group('year')) + month = int(m.group('month')) + day = int(m.group('day')) + t = time.strptime(m.group('time'), '%H:%M:%S') + meta['date'] = datetime.datetime.combine( + datetime.date(year, month, day), + datetime.time(t.tm_hour, t.tm_min, t.tm_sec)) + continue + m = CAT_RE.match(line) + if m: + meta['categories'] = m.group('cats').replace(' ', '').split(',') + continue + + for k in ['title', 'date', 'categories']: + if k not in meta: + raise ConvertError("Missing {} in metadata".format(k)) + + content = ''.join(lines[meta_end + 1:]).strip() + return meta, content + + +def write_output(meta, content, dst): + """Create the Pelican style .rst file from the Blogofile metadata and + content. Output is written to the file specified by dst. + + """ + title = meta['title'] + date = meta['date'].strftime('%Y-%m-%d %H:%M') + tags = ', '.join(meta['categories']) + slug = slugify(title) + + post = PELICAN_FMT.format(title=title, + title_underline='#'*len(title), + date=date, + tags=tags, + slug=slug, + content=content) + + with open(dst, 'w') as fp: + fp.write(post) + + +if __name__ == '__main__': + for name in os.listdir(SRC_DIR): + if name.endswith('.rst'): + src = os.path.join(SRC_DIR, name) + dst = os.path.join(DST_DIR, name) + + try: + convert(src, dst) + except ConvertError as ex: + print "Error converting {}: {}".format(name, ex)