Mercurial > public > pelican-blog
view tools/import_blogophile.py @ 12:5ff71680269a
Set DISQUS_NO_ID so we don't confuse Disqus.
The Pelican Bootstrap3 theme is by default adding data-disqus-identifier to
my comment count links. Since my old blog didn't have these, I think Disqus
is getting confused. When I removed this, some blog entries where Disqus
didn't know what the comment count was suddenly began showing the correct
comment counts (mainly 0 I think).
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Tue, 04 Feb 2014 18:44:59 -0600 |
parents | 6c03ca07a16d |
children |
line wrap: on
line source
#!/usr/bin/env python """ A simple script to convert my Blogofile restructured text posts into the format expected by Pelican. """ # Copyright (C) 2014 by Brian Neal. # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. import datetime import os import re import time SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts') DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding') TITLE_RE = re.compile(r'^title: (?P<title>.*)$') DATE_RE = re.compile(r'^date: (?P<year>\d{4})/' r'(?P<month>\d{1,2})/' r'(?P<day>\d{1,2})\s*' r'(?P<time>\d{2}:\d{2}:\d{2})\s*$') CAT_RE = re.compile(r'^categories: (?P<cats>.*)$') PELICAN_FMT = """\ {title} {title_underline} :date: {date} :tags: {tags} :slug: {slug} :author: Brian Neal {content} """ class ConvertError(Exception): """Exception class for the conversion process""" def slugify(s): """Return a slug from the string s. This code must match what Blogofile was doing in order to keep the URLs the same. In this case I had customized Blogfile's functionality based on a tip by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/ """ slug = s.lower() # convert ellipses to spaces slug = re.sub(r'\.{2,}', ' ', slug) # flatten everything non alpha or . into a single - slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug) # trim off leading/trailing - slug = re.sub(r'^-+|-+$', '', slug) return slug def convert(src, dst): """Convert Blogofile to Pelican.""" print '{} -> {}'.format(src, dst) meta, content = parse_input(src) write_output(meta, content, dst) def parse_input(src): """Parse input Blogofile .rst input. Returns a 2-tuple: meta - dictionary of Blogofile metadata content - blog post body as a string """ with open(src, 'r') as fp: lines = fp.readlines() # Find meta block for i, line in enumerate(lines): if line == '---\n': meta_start = i break else: raise ConvertError("Can't find start of meta block") for i, line in enumerate(lines[meta_start + 1 :]): if line == '---\n': meta_end = meta_start + 1 + i break else: raise ConvertError("Can't find end of meta block") meta_lines = lines[meta_start + 1 : meta_end] meta = {} for line in meta_lines: m = TITLE_RE.match(line) if m: meta['title'] = m.group('title').strip() continue m = DATE_RE.match(line) if m: year = int(m.group('year')) month = int(m.group('month')) day = int(m.group('day')) t = time.strptime(m.group('time'), '%H:%M:%S') meta['date'] = datetime.datetime.combine( datetime.date(year, month, day), datetime.time(t.tm_hour, t.tm_min, t.tm_sec)) continue m = CAT_RE.match(line) if m: meta['categories'] = m.group('cats').replace(' ', '').split(',') continue for k in ['title', 'date', 'categories']: if k not in meta: raise ConvertError("Missing {} in metadata".format(k)) content = ''.join(lines[meta_end + 1:]).strip() return meta, content def write_output(meta, content, dst): """Create the Pelican style .rst file from the Blogofile metadata and content. Output is written to the file specified by dst. """ title = meta['title'] date = meta['date'].strftime('%Y-%m-%d %H:%M') tags = ', '.join(meta['categories']) slug = slugify(title) post = PELICAN_FMT.format(title=title, title_underline='#'*len(title), date=date, tags=tags, slug=slug, content=content) with open(dst, 'w') as fp: fp.write(post) if __name__ == '__main__': for name in os.listdir(SRC_DIR): if name.endswith('.rst'): src = os.path.join(SRC_DIR, name) dst = os.path.join(DST_DIR, name) try: convert(src, dst) except ConvertError as ex: print "Error converting {}: {}".format(name, ex)