bgneal@2: #!/usr/bin/env python bgneal@2: """ bgneal@2: A simple script to convert my Blogofile restructured text posts into the format bgneal@2: expected by Pelican. bgneal@2: bgneal@2: """ bgneal@2: # Copyright (C) 2014 by Brian Neal. bgneal@2: # bgneal@2: # Permission is hereby granted, free of charge, to any person obtaining a copy bgneal@2: # of this software and associated documentation files (the "Software"), to deal bgneal@2: # in the Software without restriction, including without limitation the rights bgneal@2: # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell bgneal@2: # copies of the Software, and to permit persons to whom the Software is bgneal@2: # furnished to do so, subject to the following conditions: bgneal@2: # bgneal@2: # The above copyright notice and this permission notice shall be included in bgneal@2: # all copies or substantial portions of the Software. bgneal@2: # bgneal@2: # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR bgneal@2: # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, bgneal@2: # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE bgneal@2: # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER bgneal@2: # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, bgneal@2: # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN bgneal@2: # THE SOFTWARE. bgneal@2: bgneal@2: import datetime bgneal@2: import os bgneal@2: import re bgneal@2: import time bgneal@2: bgneal@2: bgneal@2: SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts') bgneal@3: DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding') bgneal@2: bgneal@2: TITLE_RE = re.compile(r'^title: (?P.*)$') bgneal@2: DATE_RE = re.compile(r'^date: (?P<year>\d{4})/' bgneal@2: r'(?P<month>\d{1,2})/' bgneal@2: r'(?P<day>\d{1,2})\s*' bgneal@2: r'(?P<time>\d{2}:\d{2}:\d{2})\s*$') bgneal@2: CAT_RE = re.compile(r'^categories: (?P<cats>.*)$') bgneal@2: bgneal@2: PELICAN_FMT = """\ bgneal@2: {title} bgneal@2: {title_underline} bgneal@2: bgneal@2: :date: {date} bgneal@2: :tags: {tags} bgneal@2: :slug: {slug} bgneal@2: :author: Brian Neal bgneal@2: bgneal@2: {content} bgneal@2: """ bgneal@2: bgneal@2: bgneal@2: class ConvertError(Exception): bgneal@2: """Exception class for the conversion process""" bgneal@2: bgneal@2: bgneal@2: def slugify(s): bgneal@2: """Return a slug from the string s. bgneal@2: bgneal@2: This code must match what Blogofile was doing in order to keep the URLs the bgneal@2: same. In this case I had customized Blogfile's functionality based on a tip bgneal@2: by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/ bgneal@2: bgneal@2: """ bgneal@2: slug = s.lower() bgneal@2: bgneal@2: # convert ellipses to spaces bgneal@2: slug = re.sub(r'\.{2,}', ' ', slug) bgneal@2: bgneal@2: # flatten everything non alpha or . into a single - bgneal@2: slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug) bgneal@2: bgneal@2: # trim off leading/trailing - bgneal@2: slug = re.sub(r'^-+|-+$', '', slug) bgneal@2: return slug bgneal@2: bgneal@2: bgneal@2: def convert(src, dst): bgneal@2: """Convert Blogofile to Pelican.""" bgneal@2: print '{} -> {}'.format(src, dst) bgneal@2: meta, content = parse_input(src) bgneal@2: write_output(meta, content, dst) bgneal@2: bgneal@2: bgneal@2: def parse_input(src): bgneal@2: """Parse input Blogofile .rst input. bgneal@2: bgneal@2: Returns a 2-tuple: bgneal@2: meta - dictionary of Blogofile metadata bgneal@2: content - blog post body as a string bgneal@2: bgneal@2: """ bgneal@2: with open(src, 'r') as fp: bgneal@2: lines = fp.readlines() bgneal@2: bgneal@2: # Find meta block bgneal@2: for i, line in enumerate(lines): bgneal@2: if line == '---\n': bgneal@2: meta_start = i bgneal@2: break bgneal@2: else: bgneal@2: raise ConvertError("Can't find start of meta block") bgneal@2: bgneal@2: for i, line in enumerate(lines[meta_start + 1 :]): bgneal@2: if line == '---\n': bgneal@2: meta_end = meta_start + 1 + i bgneal@2: break bgneal@2: else: bgneal@2: raise ConvertError("Can't find end of meta block") bgneal@2: bgneal@2: meta_lines = lines[meta_start + 1 : meta_end] bgneal@2: meta = {} bgneal@2: for line in meta_lines: bgneal@2: m = TITLE_RE.match(line) bgneal@2: if m: bgneal@2: meta['title'] = m.group('title').strip() bgneal@2: continue bgneal@2: m = DATE_RE.match(line) bgneal@2: if m: bgneal@2: year = int(m.group('year')) bgneal@2: month = int(m.group('month')) bgneal@2: day = int(m.group('day')) bgneal@2: t = time.strptime(m.group('time'), '%H:%M:%S') bgneal@2: meta['date'] = datetime.datetime.combine( bgneal@2: datetime.date(year, month, day), bgneal@2: datetime.time(t.tm_hour, t.tm_min, t.tm_sec)) bgneal@2: continue bgneal@2: m = CAT_RE.match(line) bgneal@2: if m: bgneal@2: meta['categories'] = m.group('cats').replace(' ', '').split(',') bgneal@2: continue bgneal@2: bgneal@2: for k in ['title', 'date', 'categories']: bgneal@2: if k not in meta: bgneal@2: raise ConvertError("Missing {} in metadata".format(k)) bgneal@2: bgneal@2: content = ''.join(lines[meta_end + 1:]).strip() bgneal@2: return meta, content bgneal@2: bgneal@2: bgneal@2: def write_output(meta, content, dst): bgneal@2: """Create the Pelican style .rst file from the Blogofile metadata and bgneal@2: content. Output is written to the file specified by dst. bgneal@2: bgneal@2: """ bgneal@2: title = meta['title'] bgneal@2: date = meta['date'].strftime('%Y-%m-%d %H:%M') bgneal@2: tags = ', '.join(meta['categories']) bgneal@2: slug = slugify(title) bgneal@2: bgneal@2: post = PELICAN_FMT.format(title=title, bgneal@2: title_underline='#'*len(title), bgneal@2: date=date, bgneal@2: tags=tags, bgneal@2: slug=slug, bgneal@2: content=content) bgneal@2: bgneal@2: with open(dst, 'w') as fp: bgneal@2: fp.write(post) bgneal@2: bgneal@2: bgneal@2: if __name__ == '__main__': bgneal@2: for name in os.listdir(SRC_DIR): bgneal@2: if name.endswith('.rst'): bgneal@2: src = os.path.join(SRC_DIR, name) bgneal@2: dst = os.path.join(DST_DIR, name) bgneal@2: bgneal@2: try: bgneal@2: convert(src, dst) bgneal@2: except ConvertError as ex: bgneal@2: print "Error converting {}: {}".format(name, ex)