pelican-blog: tools/import_blogophile.py comparison

comparison tools/import_blogophile.py @ 10:6c03ca07a16d

Renamed my tools directory to "tools". I named it __bgn because I was worried it would clash with a future Pelican updaet. But it seems like this would only happen if I re-ran the quickstart script. "tools" is a better name. :)

author	Brian Neal <bgneal@gmail.com>
date	Sun, 02 Feb 2014 11:32:13 -0600
parents	__bgn/import_blogophile.py@c3115da3ff73
children

comparison

equal deleted inserted replaced

-:271bed1181df
+:6c03ca07a16d
+#!/usr/bin/env python
+"""
+A simple script to convert my Blogofile restructured text posts into the format
+expected by Pelican.
+"""
+# Copyright (C) 2014 by Brian Neal.
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+import datetime
+import os
+import re
+import time
+SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts')
+DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding')
+TITLE_RE = re.compile(r'^title: (?P<title>.*)$')
+DATE_RE = re.compile(r'^date: (?P<year>\d{4})/'
+r'(?P<month>\d{1,2})/'
+r'(?P<day>\d{1,2})\s*'
+r'(?P<time>\d{2}:\d{2}:\d{2})\s*$')
+CAT_RE = re.compile(r'^categories: (?P<cats>.*)$')
+PELICAN_FMT = """\
+{title}
+{title_underline}
+:date: {date}
+:tags: {tags}
+:slug: {slug}
+:author: Brian Neal
+{content}
+"""
+class ConvertError(Exception):
+"""Exception class for the conversion process"""
+def slugify(s):
+"""Return a slug from the string s.
+This code must match what Blogofile was doing in order to keep the URLs the
+same. In this case I had customized Blogfile's functionality based on a tip
+by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/
+"""
+slug = s.lower()
+# convert ellipses to spaces
+slug = re.sub(r'\.{2,}', ' ', slug)
+# flatten everything non alpha or . into a single -
+slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug)
+# trim off leading/trailing -
+slug = re.sub(r'^-+|-+$', '', slug)
+return slug
+def convert(src, dst):
+"""Convert Blogofile to Pelican."""
+print '{} -> {}'.format(src, dst)
+meta, content = parse_input(src)
+write_output(meta, content, dst)
+def parse_input(src):
+"""Parse input Blogofile .rst input.
+Returns a 2-tuple:
+meta - dictionary of Blogofile metadata
+content - blog post body as a string
+"""
+with open(src, 'r') as fp:
+lines = fp.readlines()
+# Find meta block
+for i, line in enumerate(lines):
+if line == '---\n':
+meta_start = i
+break
+else:
+raise ConvertError("Can't find start of meta block")
+for i, line in enumerate(lines[meta_start + 1 :]):
+if line == '---\n':
+meta_end = meta_start + 1 + i
+break
+else:
+raise ConvertError("Can't find end of meta block")
+meta_lines = lines[meta_start + 1 : meta_end]
+meta = {}
+for line in meta_lines:
+m = TITLE_RE.match(line)
+if m:
+meta['title'] = m.group('title').strip()
+continue
+m = DATE_RE.match(line)
+if m:
+year = int(m.group('year'))
+month = int(m.group('month'))
+day = int(m.group('day'))
+t = time.strptime(m.group('time'), '%H:%M:%S')
+meta['date'] = datetime.datetime.combine(
+datetime.date(year, month, day),
+datetime.time(t.tm_hour, t.tm_min, t.tm_sec))
+continue
+m = CAT_RE.match(line)
+if m:
+meta['categories'] = m.group('cats').replace(' ', '').split(',')
+continue
+for k in ['title', 'date', 'categories']:
+if k not in meta:
+raise ConvertError("Missing {} in metadata".format(k))
+content = ''.join(lines[meta_end + 1:]).strip()
+return meta, content
+def write_output(meta, content, dst):
+"""Create the Pelican style .rst file from the Blogofile metadata and
+content. Output is written to the file specified by dst.
+"""
+title = meta['title']
+date = meta['date'].strftime('%Y-%m-%d %H:%M')
+tags = ', '.join(meta['categories'])
+slug = slugify(title)
+post = PELICAN_FMT.format(title=title,
+title_underline='#'*len(title),
+date=date,
+tags=tags,
+slug=slug,
+content=content)
+with open(dst, 'w') as fp:
+fp.write(post)
+if __name__ == '__main__':
+for name in os.listdir(SRC_DIR):
+if name.endswith('.rst'):
+src = os.path.join(SRC_DIR, name)
+dst = os.path.join(DST_DIR, name)
+try:
+convert(src, dst)
+except ConvertError as ex:
+print "Error converting {}: {}".format(name, ex)

Mercurial > public > pelican-blog

comparison tools/import_blogophile.py @ 10:6c03ca07a16d