pelican-blog: tools/import_blogophile.py annotate

annotate tools/import_blogophile.py @ 10:6c03ca07a16d

Renamed my tools directory to "tools". I named it __bgn because I was worried it would clash with a future Pelican updaet. But it seems like this would only happen if I re-ran the quickstart script. "tools" is a better name. :)

author	Brian Neal <bgneal@gmail.com>
date	Sun, 02 Feb 2014 11:32:13 -0600
parents	__bgn/import_blogophile.py@c3115da3ff73
children

rev	line source
bgneal@2	1 #!/usr/bin/env python
bgneal@2	2 """
bgneal@2	3 A simple script to convert my Blogofile restructured text posts into the format
bgneal@2	4 expected by Pelican.
bgneal@2	5
bgneal@2	6 """
bgneal@2	7 # Copyright (C) 2014 by Brian Neal.
bgneal@2	8 #
bgneal@2	9 # Permission is hereby granted, free of charge, to any person obtaining a copy
bgneal@2	10 # of this software and associated documentation files (the "Software"), to deal
bgneal@2	11 # in the Software without restriction, including without limitation the rights
bgneal@2	12 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
bgneal@2	13 # copies of the Software, and to permit persons to whom the Software is
bgneal@2	14 # furnished to do so, subject to the following conditions:
bgneal@2	15 #
bgneal@2	16 # The above copyright notice and this permission notice shall be included in
bgneal@2	17 # all copies or substantial portions of the Software.
bgneal@2	18 #
bgneal@2	19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
bgneal@2	20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
bgneal@2	21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
bgneal@2	22 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
bgneal@2	23 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
bgneal@2	24 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
bgneal@2	25 # THE SOFTWARE.
bgneal@2	26
bgneal@2	27 import datetime
bgneal@2	28 import os
bgneal@2	29 import re
bgneal@2	30 import time
bgneal@2	31
bgneal@2	32
bgneal@2	33 SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts')
bgneal@3	34 DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding')
bgneal@2	35
bgneal@2	36 TITLE_RE = re.compile(r'^title: (?P<title>.*)$')
bgneal@2	37 DATE_RE = re.compile(r'^date: (?P<year>\d{4})/'
bgneal@2	38 r'(?P<month>\d{1,2})/'
bgneal@2	39 r'(?P<day>\d{1,2})\s*'
bgneal@2	40 r'(?P<time>\d{2}:\d{2}:\d{2})\s*$')
bgneal@2	41 CAT_RE = re.compile(r'^categories: (?P<cats>.*)$')
bgneal@2	42
bgneal@2	43 PELICAN_FMT = """\
bgneal@2	44 {title}
bgneal@2	45 {title_underline}
bgneal@2	46
bgneal@2	47 :date: {date}
bgneal@2	48 :tags: {tags}
bgneal@2	49 :slug: {slug}
bgneal@2	50 :author: Brian Neal
bgneal@2	51
bgneal@2	52 {content}
bgneal@2	53 """
bgneal@2	54
bgneal@2	55
bgneal@2	56 class ConvertError(Exception):
bgneal@2	57 """Exception class for the conversion process"""
bgneal@2	58
bgneal@2	59
bgneal@2	60 def slugify(s):
bgneal@2	61 """Return a slug from the string s.
bgneal@2	62
bgneal@2	63 This code must match what Blogofile was doing in order to keep the URLs the
bgneal@2	64 same. In this case I had customized Blogfile's functionality based on a tip
bgneal@2	65 by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/
bgneal@2	66
bgneal@2	67 """
bgneal@2	68 slug = s.lower()
bgneal@2	69
bgneal@2	70 # convert ellipses to spaces
bgneal@2	71 slug = re.sub(r'\.{2,}', ' ', slug)
bgneal@2	72
bgneal@2	73 # flatten everything non alpha or . into a single -
bgneal@2	74 slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug)
bgneal@2	75
bgneal@2	76 # trim off leading/trailing -
bgneal@2	77 slug = re.sub(r'^-+\|-+$', '', slug)
bgneal@2	78 return slug
bgneal@2	79
bgneal@2	80
bgneal@2	81 def convert(src, dst):
bgneal@2	82 """Convert Blogofile to Pelican."""
bgneal@2	83 print '{} -> {}'.format(src, dst)
bgneal@2	84 meta, content = parse_input(src)
bgneal@2	85 write_output(meta, content, dst)
bgneal@2	86
bgneal@2	87
bgneal@2	88 def parse_input(src):
bgneal@2	89 """Parse input Blogofile .rst input.
bgneal@2	90
bgneal@2	91 Returns a 2-tuple:
bgneal@2	92 meta - dictionary of Blogofile metadata
bgneal@2	93 content - blog post body as a string
bgneal@2	94
bgneal@2	95 """
bgneal@2	96 with open(src, 'r') as fp:
bgneal@2	97 lines = fp.readlines()
bgneal@2	98
bgneal@2	99 # Find meta block
bgneal@2	100 for i, line in enumerate(lines):
bgneal@2	101 if line == '---\n':
bgneal@2	102 meta_start = i
bgneal@2	103 break
bgneal@2	104 else:
bgneal@2	105 raise ConvertError("Can't find start of meta block")
bgneal@2	106
bgneal@2	107 for i, line in enumerate(lines[meta_start + 1 :]):
bgneal@2	108 if line == '---\n':
bgneal@2	109 meta_end = meta_start + 1 + i
bgneal@2	110 break
bgneal@2	111 else:
bgneal@2	112 raise ConvertError("Can't find end of meta block")
bgneal@2	113
bgneal@2	114 meta_lines = lines[meta_start + 1 : meta_end]
bgneal@2	115 meta = {}
bgneal@2	116 for line in meta_lines:
bgneal@2	117 m = TITLE_RE.match(line)
bgneal@2	118 if m:
bgneal@2	119 meta['title'] = m.group('title').strip()
bgneal@2	120 continue
bgneal@2	121 m = DATE_RE.match(line)
bgneal@2	122 if m:
bgneal@2	123 year = int(m.group('year'))
bgneal@2	124 month = int(m.group('month'))
bgneal@2	125 day = int(m.group('day'))
bgneal@2	126 t = time.strptime(m.group('time'), '%H:%M:%S')
bgneal@2	127 meta['date'] = datetime.datetime.combine(
bgneal@2	128 datetime.date(year, month, day),
bgneal@2	129 datetime.time(t.tm_hour, t.tm_min, t.tm_sec))
bgneal@2	130 continue
bgneal@2	131 m = CAT_RE.match(line)
bgneal@2	132 if m:
bgneal@2	133 meta['categories'] = m.group('cats').replace(' ', '').split(',')
bgneal@2	134 continue
bgneal@2	135
bgneal@2	136 for k in ['title', 'date', 'categories']:
bgneal@2	137 if k not in meta:
bgneal@2	138 raise ConvertError("Missing {} in metadata".format(k))
bgneal@2	139
bgneal@2	140 content = ''.join(lines[meta_end + 1:]).strip()
bgneal@2	141 return meta, content
bgneal@2	142
bgneal@2	143
bgneal@2	144 def write_output(meta, content, dst):
bgneal@2	145 """Create the Pelican style .rst file from the Blogofile metadata and
bgneal@2	146 content. Output is written to the file specified by dst.
bgneal@2	147
bgneal@2	148 """
bgneal@2	149 title = meta['title']
bgneal@2	150 date = meta['date'].strftime('%Y-%m-%d %H:%M')
bgneal@2	151 tags = ', '.join(meta['categories'])
bgneal@2	152 slug = slugify(title)
bgneal@2	153
bgneal@2	154 post = PELICAN_FMT.format(title=title,
bgneal@2	155 title_underline='#'*len(title),
bgneal@2	156 date=date,
bgneal@2	157 tags=tags,
bgneal@2	158 slug=slug,
bgneal@2	159 content=content)
bgneal@2	160
bgneal@2	161 with open(dst, 'w') as fp:
bgneal@2	162 fp.write(post)
bgneal@2	163
bgneal@2	164
bgneal@2	165 if __name__ == '__main__':
bgneal@2	166 for name in os.listdir(SRC_DIR):
bgneal@2	167 if name.endswith('.rst'):
bgneal@2	168 src = os.path.join(SRC_DIR, name)
bgneal@2	169 dst = os.path.join(DST_DIR, name)
bgneal@2	170
bgneal@2	171 try:
bgneal@2	172 convert(src, dst)
bgneal@2	173 except ConvertError as ex:
bgneal@2	174 print "Error converting {}: {}".format(name, ex)

Mercurial > public > pelican-blog

annotate tools/import_blogophile.py @ 10:6c03ca07a16d