comparison tools/import_blogophile.py @ 10:6c03ca07a16d

Renamed my tools directory to "tools". I named it __bgn because I was worried it would clash with a future Pelican updaet. But it seems like this would only happen if I re-ran the quickstart script. "tools" is a better name. :)
author Brian Neal <bgneal@gmail.com>
date Sun, 02 Feb 2014 11:32:13 -0600
parents __bgn/import_blogophile.py@c3115da3ff73
children
comparison
equal deleted inserted replaced
9:271bed1181df 10:6c03ca07a16d
1 #!/usr/bin/env python
2 """
3 A simple script to convert my Blogofile restructured text posts into the format
4 expected by Pelican.
5
6 """
7 # Copyright (C) 2014 by Brian Neal.
8 #
9 # Permission is hereby granted, free of charge, to any person obtaining a copy
10 # of this software and associated documentation files (the "Software"), to deal
11 # in the Software without restriction, including without limitation the rights
12 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 # copies of the Software, and to permit persons to whom the Software is
14 # furnished to do so, subject to the following conditions:
15 #
16 # The above copyright notice and this permission notice shall be included in
17 # all copies or substantial portions of the Software.
18 #
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
22 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
24 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
25 # THE SOFTWARE.
26
27 import datetime
28 import os
29 import re
30 import time
31
32
33 SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts')
34 DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding')
35
36 TITLE_RE = re.compile(r'^title: (?P<title>.*)$')
37 DATE_RE = re.compile(r'^date: (?P<year>\d{4})/'
38 r'(?P<month>\d{1,2})/'
39 r'(?P<day>\d{1,2})\s*'
40 r'(?P<time>\d{2}:\d{2}:\d{2})\s*$')
41 CAT_RE = re.compile(r'^categories: (?P<cats>.*)$')
42
43 PELICAN_FMT = """\
44 {title}
45 {title_underline}
46
47 :date: {date}
48 :tags: {tags}
49 :slug: {slug}
50 :author: Brian Neal
51
52 {content}
53 """
54
55
56 class ConvertError(Exception):
57 """Exception class for the conversion process"""
58
59
60 def slugify(s):
61 """Return a slug from the string s.
62
63 This code must match what Blogofile was doing in order to keep the URLs the
64 same. In this case I had customized Blogfile's functionality based on a tip
65 by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/
66
67 """
68 slug = s.lower()
69
70 # convert ellipses to spaces
71 slug = re.sub(r'\.{2,}', ' ', slug)
72
73 # flatten everything non alpha or . into a single -
74 slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug)
75
76 # trim off leading/trailing -
77 slug = re.sub(r'^-+|-+$', '', slug)
78 return slug
79
80
81 def convert(src, dst):
82 """Convert Blogofile to Pelican."""
83 print '{} -> {}'.format(src, dst)
84 meta, content = parse_input(src)
85 write_output(meta, content, dst)
86
87
88 def parse_input(src):
89 """Parse input Blogofile .rst input.
90
91 Returns a 2-tuple:
92 meta - dictionary of Blogofile metadata
93 content - blog post body as a string
94
95 """
96 with open(src, 'r') as fp:
97 lines = fp.readlines()
98
99 # Find meta block
100 for i, line in enumerate(lines):
101 if line == '---\n':
102 meta_start = i
103 break
104 else:
105 raise ConvertError("Can't find start of meta block")
106
107 for i, line in enumerate(lines[meta_start + 1 :]):
108 if line == '---\n':
109 meta_end = meta_start + 1 + i
110 break
111 else:
112 raise ConvertError("Can't find end of meta block")
113
114 meta_lines = lines[meta_start + 1 : meta_end]
115 meta = {}
116 for line in meta_lines:
117 m = TITLE_RE.match(line)
118 if m:
119 meta['title'] = m.group('title').strip()
120 continue
121 m = DATE_RE.match(line)
122 if m:
123 year = int(m.group('year'))
124 month = int(m.group('month'))
125 day = int(m.group('day'))
126 t = time.strptime(m.group('time'), '%H:%M:%S')
127 meta['date'] = datetime.datetime.combine(
128 datetime.date(year, month, day),
129 datetime.time(t.tm_hour, t.tm_min, t.tm_sec))
130 continue
131 m = CAT_RE.match(line)
132 if m:
133 meta['categories'] = m.group('cats').replace(' ', '').split(',')
134 continue
135
136 for k in ['title', 'date', 'categories']:
137 if k not in meta:
138 raise ConvertError("Missing {} in metadata".format(k))
139
140 content = ''.join(lines[meta_end + 1:]).strip()
141 return meta, content
142
143
144 def write_output(meta, content, dst):
145 """Create the Pelican style .rst file from the Blogofile metadata and
146 content. Output is written to the file specified by dst.
147
148 """
149 title = meta['title']
150 date = meta['date'].strftime('%Y-%m-%d %H:%M')
151 tags = ', '.join(meta['categories'])
152 slug = slugify(title)
153
154 post = PELICAN_FMT.format(title=title,
155 title_underline='#'*len(title),
156 date=date,
157 tags=tags,
158 slug=slug,
159 content=content)
160
161 with open(dst, 'w') as fp:
162 fp.write(post)
163
164
165 if __name__ == '__main__':
166 for name in os.listdir(SRC_DIR):
167 if name.endswith('.rst'):
168 src = os.path.join(SRC_DIR, name)
169 dst = os.path.join(DST_DIR, name)
170
171 try:
172 convert(src, dst)
173 except ConvertError as ex:
174 print "Error converting {}: {}".format(name, ex)