bgneal@2
|
1 #!/usr/bin/env python
|
bgneal@2
|
2 """
|
bgneal@2
|
3 A simple script to convert my Blogofile restructured text posts into the format
|
bgneal@2
|
4 expected by Pelican.
|
bgneal@2
|
5
|
bgneal@2
|
6 """
|
bgneal@2
|
7 # Copyright (C) 2014 by Brian Neal.
|
bgneal@2
|
8 #
|
bgneal@2
|
9 # Permission is hereby granted, free of charge, to any person obtaining a copy
|
bgneal@2
|
10 # of this software and associated documentation files (the "Software"), to deal
|
bgneal@2
|
11 # in the Software without restriction, including without limitation the rights
|
bgneal@2
|
12 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
bgneal@2
|
13 # copies of the Software, and to permit persons to whom the Software is
|
bgneal@2
|
14 # furnished to do so, subject to the following conditions:
|
bgneal@2
|
15 #
|
bgneal@2
|
16 # The above copyright notice and this permission notice shall be included in
|
bgneal@2
|
17 # all copies or substantial portions of the Software.
|
bgneal@2
|
18 #
|
bgneal@2
|
19 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
bgneal@2
|
20 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
bgneal@2
|
21 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
bgneal@2
|
22 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
bgneal@2
|
23 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
bgneal@2
|
24 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
bgneal@2
|
25 # THE SOFTWARE.
|
bgneal@2
|
26
|
bgneal@2
|
27 import datetime
|
bgneal@2
|
28 import os
|
bgneal@2
|
29 import re
|
bgneal@2
|
30 import time
|
bgneal@2
|
31
|
bgneal@2
|
32
|
bgneal@2
|
33 SRC_DIR = os.path.expanduser('~/coding/python/virtualenvs/blogofile/blog/_posts')
|
bgneal@2
|
34 DST_DIR = os.path.expanduser('~/coding/python/venvs/blog/blog-pelican/content')
|
bgneal@2
|
35
|
bgneal@2
|
36 TITLE_RE = re.compile(r'^title: (?P<title>.*)$')
|
bgneal@2
|
37 DATE_RE = re.compile(r'^date: (?P<year>\d{4})/'
|
bgneal@2
|
38 r'(?P<month>\d{1,2})/'
|
bgneal@2
|
39 r'(?P<day>\d{1,2})\s*'
|
bgneal@2
|
40 r'(?P<time>\d{2}:\d{2}:\d{2})\s*$')
|
bgneal@2
|
41 CAT_RE = re.compile(r'^categories: (?P<cats>.*)$')
|
bgneal@2
|
42
|
bgneal@2
|
43 PELICAN_FMT = """\
|
bgneal@2
|
44 {title}
|
bgneal@2
|
45 {title_underline}
|
bgneal@2
|
46
|
bgneal@2
|
47 :date: {date}
|
bgneal@2
|
48 :category: coding
|
bgneal@2
|
49 :tags: {tags}
|
bgneal@2
|
50 :slug: {slug}
|
bgneal@2
|
51 :author: Brian Neal
|
bgneal@2
|
52
|
bgneal@2
|
53 {content}
|
bgneal@2
|
54 """
|
bgneal@2
|
55
|
bgneal@2
|
56
|
bgneal@2
|
57 class ConvertError(Exception):
|
bgneal@2
|
58 """Exception class for the conversion process"""
|
bgneal@2
|
59
|
bgneal@2
|
60
|
bgneal@2
|
61 def slugify(s):
|
bgneal@2
|
62 """Return a slug from the string s.
|
bgneal@2
|
63
|
bgneal@2
|
64 This code must match what Blogofile was doing in order to keep the URLs the
|
bgneal@2
|
65 same. In this case I had customized Blogfile's functionality based on a tip
|
bgneal@2
|
66 by Mike Bayer: http://techspot.zzzeek.org/2010/12/06/my-blogofile-hacks/
|
bgneal@2
|
67
|
bgneal@2
|
68 """
|
bgneal@2
|
69 slug = s.lower()
|
bgneal@2
|
70
|
bgneal@2
|
71 # convert ellipses to spaces
|
bgneal@2
|
72 slug = re.sub(r'\.{2,}', ' ', slug)
|
bgneal@2
|
73
|
bgneal@2
|
74 # flatten everything non alpha or . into a single -
|
bgneal@2
|
75 slug = re.sub(r'[^0-9a-zA-Z\.]+', '-', slug)
|
bgneal@2
|
76
|
bgneal@2
|
77 # trim off leading/trailing -
|
bgneal@2
|
78 slug = re.sub(r'^-+|-+$', '', slug)
|
bgneal@2
|
79 return slug
|
bgneal@2
|
80
|
bgneal@2
|
81
|
bgneal@2
|
82 def convert(src, dst):
|
bgneal@2
|
83 """Convert Blogofile to Pelican."""
|
bgneal@2
|
84 print '{} -> {}'.format(src, dst)
|
bgneal@2
|
85 meta, content = parse_input(src)
|
bgneal@2
|
86 write_output(meta, content, dst)
|
bgneal@2
|
87
|
bgneal@2
|
88
|
bgneal@2
|
89 def parse_input(src):
|
bgneal@2
|
90 """Parse input Blogofile .rst input.
|
bgneal@2
|
91
|
bgneal@2
|
92 Returns a 2-tuple:
|
bgneal@2
|
93 meta - dictionary of Blogofile metadata
|
bgneal@2
|
94 content - blog post body as a string
|
bgneal@2
|
95
|
bgneal@2
|
96 """
|
bgneal@2
|
97 with open(src, 'r') as fp:
|
bgneal@2
|
98 lines = fp.readlines()
|
bgneal@2
|
99
|
bgneal@2
|
100 # Find meta block
|
bgneal@2
|
101 for i, line in enumerate(lines):
|
bgneal@2
|
102 if line == '---\n':
|
bgneal@2
|
103 meta_start = i
|
bgneal@2
|
104 break
|
bgneal@2
|
105 else:
|
bgneal@2
|
106 raise ConvertError("Can't find start of meta block")
|
bgneal@2
|
107
|
bgneal@2
|
108 for i, line in enumerate(lines[meta_start + 1 :]):
|
bgneal@2
|
109 if line == '---\n':
|
bgneal@2
|
110 meta_end = meta_start + 1 + i
|
bgneal@2
|
111 break
|
bgneal@2
|
112 else:
|
bgneal@2
|
113 raise ConvertError("Can't find end of meta block")
|
bgneal@2
|
114
|
bgneal@2
|
115 meta_lines = lines[meta_start + 1 : meta_end]
|
bgneal@2
|
116 meta = {}
|
bgneal@2
|
117 for line in meta_lines:
|
bgneal@2
|
118 m = TITLE_RE.match(line)
|
bgneal@2
|
119 if m:
|
bgneal@2
|
120 meta['title'] = m.group('title').strip()
|
bgneal@2
|
121 continue
|
bgneal@2
|
122 m = DATE_RE.match(line)
|
bgneal@2
|
123 if m:
|
bgneal@2
|
124 year = int(m.group('year'))
|
bgneal@2
|
125 month = int(m.group('month'))
|
bgneal@2
|
126 day = int(m.group('day'))
|
bgneal@2
|
127 t = time.strptime(m.group('time'), '%H:%M:%S')
|
bgneal@2
|
128 meta['date'] = datetime.datetime.combine(
|
bgneal@2
|
129 datetime.date(year, month, day),
|
bgneal@2
|
130 datetime.time(t.tm_hour, t.tm_min, t.tm_sec))
|
bgneal@2
|
131 continue
|
bgneal@2
|
132 m = CAT_RE.match(line)
|
bgneal@2
|
133 if m:
|
bgneal@2
|
134 meta['categories'] = m.group('cats').replace(' ', '').split(',')
|
bgneal@2
|
135 continue
|
bgneal@2
|
136
|
bgneal@2
|
137 for k in ['title', 'date', 'categories']:
|
bgneal@2
|
138 if k not in meta:
|
bgneal@2
|
139 raise ConvertError("Missing {} in metadata".format(k))
|
bgneal@2
|
140
|
bgneal@2
|
141 content = ''.join(lines[meta_end + 1:]).strip()
|
bgneal@2
|
142 return meta, content
|
bgneal@2
|
143
|
bgneal@2
|
144
|
bgneal@2
|
145 def write_output(meta, content, dst):
|
bgneal@2
|
146 """Create the Pelican style .rst file from the Blogofile metadata and
|
bgneal@2
|
147 content. Output is written to the file specified by dst.
|
bgneal@2
|
148
|
bgneal@2
|
149 """
|
bgneal@2
|
150 title = meta['title']
|
bgneal@2
|
151 date = meta['date'].strftime('%Y-%m-%d %H:%M')
|
bgneal@2
|
152 tags = ', '.join(meta['categories'])
|
bgneal@2
|
153 slug = slugify(title)
|
bgneal@2
|
154
|
bgneal@2
|
155 post = PELICAN_FMT.format(title=title,
|
bgneal@2
|
156 title_underline='#'*len(title),
|
bgneal@2
|
157 date=date,
|
bgneal@2
|
158 tags=tags,
|
bgneal@2
|
159 slug=slug,
|
bgneal@2
|
160 content=content)
|
bgneal@2
|
161
|
bgneal@2
|
162 with open(dst, 'w') as fp:
|
bgneal@2
|
163 fp.write(post)
|
bgneal@2
|
164
|
bgneal@2
|
165
|
bgneal@2
|
166 if __name__ == '__main__':
|
bgneal@2
|
167 for name in os.listdir(SRC_DIR):
|
bgneal@2
|
168 if name.endswith('.rst'):
|
bgneal@2
|
169 src = os.path.join(SRC_DIR, name)
|
bgneal@2
|
170 dst = os.path.join(DST_DIR, name)
|
bgneal@2
|
171
|
bgneal@2
|
172 try:
|
bgneal@2
|
173 convert(src, dst)
|
bgneal@2
|
174 except ConvertError as ex:
|
bgneal@2
|
175 print "Error converting {}: {}".format(name, ex)
|