annotate __bgn/import_blogophile.py @ 2:b7be75ff95b0

Created a script to convert my Blogofile posts to Pelican.
author Brian Neal <bgneal@gmail.com>
date Wed, 29 Jan 2014 21:32:04 -0600
parents
children c3115da3ff73
rev   line source
bgneal@2 1 #!/usr/bin/env python
bgneal@2 2 """
bgneal@2 3 A simple script to convert my Blogofile restructured text posts into the format
bgneal@2 4 expected by Pelican.
bgneal@2 5
bgneal@2 6 """
bgneal@2 7 # Copyright (C) 2014 by Brian Neal.
bgneal@2 8 #
bgneal@2 9 # Permission is hereby granted, free of charge, to any person obtaining a copy
bgneal@2 10 # of this software and associated documentation files (the "Software"), to deal
bgneal@2 11 # in the Software without restriction, including without limitation the rights
bgneal@2 12 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
bgneal@2 13 # copies of the Software, and to permit persons to whom the Software is
bgneal@2 14 # furnished to do so, subject to the following conditions:
bgneal@2 15 #
bgneal@2 16 # The above copyright notice and this permission notice shall be included in
bgneal@2 17 # all copies or substantial portions of the Software.
bgneal@2 18 #
bgneal@2 19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
bgneal@2 20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
bgneal@2 21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
bgneal@2 22 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
bgneal@2 23 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
bgneal@2 24 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
bgneal@2 25 # THE SOFTWARE.
bgneal@2 26
bgneal@2 27 import datetime
bgneal@2 28 import os
bgneal@2 29 import re
bgneal@2 30 import time
bgneal@2 31
bgneal@2 32
bgneal@2 33 SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts')
bgneal@2 34 DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content')
bgneal@2 35
bgneal@2 36 TITLE_RE = re.compile(r'^title: (?P<title>.*)$')
bgneal@2 37 DATE_RE = re.compile(r'^date: (?P<year>\d{4})/'
bgneal@2 38 r'(?P<month>\d{1,2})/'
bgneal@2 39 r'(?P<day>\d{1,2})\s*'
bgneal@2 40 r'(?P<time>\d{2}:\d{2}:\d{2})\s*$')
bgneal@2 41 CAT_RE = re.compile(r'^categories: (?P<cats>.*)$')
bgneal@2 42
bgneal@2 43 PELICAN_FMT = """\
bgneal@2 44 {title}
bgneal@2 45 {title_underline}
bgneal@2 46
bgneal@2 47 :date: {date}
bgneal@2 48 :category: coding
bgneal@2 49 :tags: {tags}
bgneal@2 50 :slug: {slug}
bgneal@2 51 :author: Brian Neal
bgneal@2 52
bgneal@2 53 {content}
bgneal@2 54 """
bgneal@2 55
bgneal@2 56
bgneal@2 57 class ConvertError(Exception):
bgneal@2 58 """Exception class for the conversion process"""
bgneal@2 59
bgneal@2 60
bgneal@2 61 def slugify(s):
bgneal@2 62 """Return a slug from the string s.
bgneal@2 63
bgneal@2 64 This code must match what Blogofile was doing in order to keep the URLs the
bgneal@2 65 same. In this case I had customized Blogfile's functionality based on a tip
bgneal@2 66 by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/
bgneal@2 67
bgneal@2 68 """
bgneal@2 69 slug = s.lower()
bgneal@2 70
bgneal@2 71 # convert ellipses to spaces
bgneal@2 72 slug = re.sub(r'\.{2,}', ' ', slug)
bgneal@2 73
bgneal@2 74 # flatten everything non alpha or . into a single -
bgneal@2 75 slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug)
bgneal@2 76
bgneal@2 77 # trim off leading/trailing -
bgneal@2 78 slug = re.sub(r'^-+|-+$', '', slug)
bgneal@2 79 return slug
bgneal@2 80
bgneal@2 81
bgneal@2 82 def convert(src, dst):
bgneal@2 83 """Convert Blogofile to Pelican."""
bgneal@2 84 print '{} -> {}'.format(src, dst)
bgneal@2 85 meta, content = parse_input(src)
bgneal@2 86 write_output(meta, content, dst)
bgneal@2 87
bgneal@2 88
bgneal@2 89 def parse_input(src):
bgneal@2 90 """Parse input Blogofile .rst input.
bgneal@2 91
bgneal@2 92 Returns a 2-tuple:
bgneal@2 93 meta - dictionary of Blogofile metadata
bgneal@2 94 content - blog post body as a string
bgneal@2 95
bgneal@2 96 """
bgneal@2 97 with open(src, 'r') as fp:
bgneal@2 98 lines = fp.readlines()
bgneal@2 99
bgneal@2 100 # Find meta block
bgneal@2 101 for i, line in enumerate(lines):
bgneal@2 102 if line == '---\n':
bgneal@2 103 meta_start = i
bgneal@2 104 break
bgneal@2 105 else:
bgneal@2 106 raise ConvertError("Can't find start of meta block")
bgneal@2 107
bgneal@2 108 for i, line in enumerate(lines[meta_start + 1 :]):
bgneal@2 109 if line == '---\n':
bgneal@2 110 meta_end = meta_start + 1 + i
bgneal@2 111 break
bgneal@2 112 else:
bgneal@2 113 raise ConvertError("Can't find end of meta block")
bgneal@2 114
bgneal@2 115 meta_lines = lines[meta_start + 1 : meta_end]
bgneal@2 116 meta = {}
bgneal@2 117 for line in meta_lines:
bgneal@2 118 m = TITLE_RE.match(line)
bgneal@2 119 if m:
bgneal@2 120 meta['title'] = m.group('title').strip()
bgneal@2 121 continue
bgneal@2 122 m = DATE_RE.match(line)
bgneal@2 123 if m:
bgneal@2 124 year = int(m.group('year'))
bgneal@2 125 month = int(m.group('month'))
bgneal@2 126 day = int(m.group('day'))
bgneal@2 127 t = time.strptime(m.group('time'), '%H:%M:%S')
bgneal@2 128 meta['date'] = datetime.datetime.combine(
bgneal@2 129 datetime.date(year, month, day),
bgneal@2 130 datetime.time(t.tm_hour, t.tm_min, t.tm_sec))
bgneal@2 131 continue
bgneal@2 132 m = CAT_RE.match(line)
bgneal@2 133 if m:
bgneal@2 134 meta['categories'] = m.group('cats').replace(' ', '').split(',')
bgneal@2 135 continue
bgneal@2 136
bgneal@2 137 for k in ['title', 'date', 'categories']:
bgneal@2 138 if k not in meta:
bgneal@2 139 raise ConvertError("Missing {} in metadata".format(k))
bgneal@2 140
bgneal@2 141 content = ''.join(lines[meta_end + 1:]).strip()
bgneal@2 142 return meta, content
bgneal@2 143
bgneal@2 144
bgneal@2 145 def write_output(meta, content, dst):
bgneal@2 146 """Create the Pelican style .rst file from the Blogofile metadata and
bgneal@2 147 content. Output is written to the file specified by dst.
bgneal@2 148
bgneal@2 149 """
bgneal@2 150 title = meta['title']
bgneal@2 151 date = meta['date'].strftime('%Y-%m-%d %H:%M')
bgneal@2 152 tags = ', '.join(meta['categories'])
bgneal@2 153 slug = slugify(title)
bgneal@2 154
bgneal@2 155 post = PELICAN_FMT.format(title=title,
bgneal@2 156 title_underline='#'*len(title),
bgneal@2 157 date=date,
bgneal@2 158 tags=tags,
bgneal@2 159 slug=slug,
bgneal@2 160 content=content)
bgneal@2 161
bgneal@2 162 with open(dst, 'w') as fp:
bgneal@2 163 fp.write(post)
bgneal@2 164
bgneal@2 165
bgneal@2 166 if __name__ == '__main__':
bgneal@2 167 for name in os.listdir(SRC_DIR):
bgneal@2 168 if name.endswith('.rst'):
bgneal@2 169 src = os.path.join(SRC_DIR, name)
bgneal@2 170 dst = os.path.join(DST_DIR, name)
bgneal@2 171
bgneal@2 172 try:
bgneal@2 173 convert(src, dst)
bgneal@2 174 except ConvertError as ex:
bgneal@2 175 print "Error converting {}: {}".format(name, ex)