Mercurial > public > pelican-blog
comparison tools/import_blogophile.py @ 10:6c03ca07a16d
Renamed my tools directory to "tools".
I named it __bgn because I was worried it would clash with a future Pelican
updaet. But it seems like this would only happen if I re-ran the quickstart
script. "tools" is a better name. :)
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sun, 02 Feb 2014 11:32:13 -0600 |
parents | __bgn/import_blogophile.py@c3115da3ff73 |
children |
comparison
equal
deleted
inserted
replaced
9:271bed1181df | 10:6c03ca07a16d |
---|---|
1 #!/usr/bin/env python | |
2 """ | |
3 A simple script to convert my Blogofile restructured text posts into the format | |
4 expected by Pelican. | |
5 | |
6 """ | |
7 # Copyright (C) 2014 by Brian Neal. | |
8 # | |
9 # Permission is hereby granted, free of charge, to any person obtaining a copy | |
10 # of this software and associated documentation files (the "Software"), to deal | |
11 # in the Software without restriction, including without limitation the rights | |
12 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |
13 # copies of the Software, and to permit persons to whom the Software is | |
14 # furnished to do so, subject to the following conditions: | |
15 # | |
16 # The above copyright notice and this permission notice shall be included in | |
17 # all copies or substantial portions of the Software. | |
18 # | |
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
22 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
23 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
24 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN | |
25 # THE SOFTWARE. | |
26 | |
27 import datetime | |
28 import os | |
29 import re | |
30 import time | |
31 | |
32 | |
33 SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts') | |
34 DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content/Coding') | |
35 | |
36 TITLE_RE = re.compile(r'^title: (?P<title>.*)$') | |
37 DATE_RE = re.compile(r'^date: (?P<year>\d{4})/' | |
38 r'(?P<month>\d{1,2})/' | |
39 r'(?P<day>\d{1,2})\s*' | |
40 r'(?P<time>\d{2}:\d{2}:\d{2})\s*$') | |
41 CAT_RE = re.compile(r'^categories: (?P<cats>.*)$') | |
42 | |
43 PELICAN_FMT = """\ | |
44 {title} | |
45 {title_underline} | |
46 | |
47 :date: {date} | |
48 :tags: {tags} | |
49 :slug: {slug} | |
50 :author: Brian Neal | |
51 | |
52 {content} | |
53 """ | |
54 | |
55 | |
56 class ConvertError(Exception): | |
57 """Exception class for the conversion process""" | |
58 | |
59 | |
60 def slugify(s): | |
61 """Return a slug from the string s. | |
62 | |
63 This code must match what Blogofile was doing in order to keep the URLs the | |
64 same. In this case I had customized Blogfile's functionality based on a tip | |
65 by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/ | |
66 | |
67 """ | |
68 slug = s.lower() | |
69 | |
70 # convert ellipses to spaces | |
71 slug = re.sub(r'\.{2,}', ' ', slug) | |
72 | |
73 # flatten everything non alpha or . into a single - | |
74 slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug) | |
75 | |
76 # trim off leading/trailing - | |
77 slug = re.sub(r'^-+|-+$', '', slug) | |
78 return slug | |
79 | |
80 | |
81 def convert(src, dst): | |
82 """Convert Blogofile to Pelican.""" | |
83 print '{} -> {}'.format(src, dst) | |
84 meta, content = parse_input(src) | |
85 write_output(meta, content, dst) | |
86 | |
87 | |
88 def parse_input(src): | |
89 """Parse input Blogofile .rst input. | |
90 | |
91 Returns a 2-tuple: | |
92 meta - dictionary of Blogofile metadata | |
93 content - blog post body as a string | |
94 | |
95 """ | |
96 with open(src, 'r') as fp: | |
97 lines = fp.readlines() | |
98 | |
99 # Find meta block | |
100 for i, line in enumerate(lines): | |
101 if line == '---\n': | |
102 meta_start = i | |
103 break | |
104 else: | |
105 raise ConvertError("Can't find start of meta block") | |
106 | |
107 for i, line in enumerate(lines[meta_start + 1 :]): | |
108 if line == '---\n': | |
109 meta_end = meta_start + 1 + i | |
110 break | |
111 else: | |
112 raise ConvertError("Can't find end of meta block") | |
113 | |
114 meta_lines = lines[meta_start + 1 : meta_end] | |
115 meta = {} | |
116 for line in meta_lines: | |
117 m = TITLE_RE.match(line) | |
118 if m: | |
119 meta['title'] = m.group('title').strip() | |
120 continue | |
121 m = DATE_RE.match(line) | |
122 if m: | |
123 year = int(m.group('year')) | |
124 month = int(m.group('month')) | |
125 day = int(m.group('day')) | |
126 t = time.strptime(m.group('time'), '%H:%M:%S') | |
127 meta['date'] = datetime.datetime.combine( | |
128 datetime.date(year, month, day), | |
129 datetime.time(t.tm_hour, t.tm_min, t.tm_sec)) | |
130 continue | |
131 m = CAT_RE.match(line) | |
132 if m: | |
133 meta['categories'] = m.group('cats').replace(' ', '').split(',') | |
134 continue | |
135 | |
136 for k in ['title', 'date', 'categories']: | |
137 if k not in meta: | |
138 raise ConvertError("Missing {} in metadata".format(k)) | |
139 | |
140 content = ''.join(lines[meta_end + 1:]).strip() | |
141 return meta, content | |
142 | |
143 | |
144 def write_output(meta, content, dst): | |
145 """Create the Pelican style .rst file from the Blogofile metadata and | |
146 content. Output is written to the file specified by dst. | |
147 | |
148 """ | |
149 title = meta['title'] | |
150 date = meta['date'].strftime('%Y-%m-%d %H:%M') | |
151 tags = ', '.join(meta['categories']) | |
152 slug = slugify(title) | |
153 | |
154 post = PELICAN_FMT.format(title=title, | |
155 title_underline='#'*len(title), | |
156 date=date, | |
157 tags=tags, | |
158 slug=slug, | |
159 content=content) | |
160 | |
161 with open(dst, 'w') as fp: | |
162 fp.write(post) | |
163 | |
164 | |
165 if __name__ == '__main__': | |
166 for name in os.listdir(SRC_DIR): | |
167 if name.endswith('.rst'): | |
168 src = os.path.join(SRC_DIR, name) | |
169 dst = os.path.join(DST_DIR, name) | |
170 | |
171 try: | |
172 convert(src, dst) | |
173 except ConvertError as ex: | |
174 print "Error converting {}: {}".format(name, ex) |