Mercurial > public > sg101
comparison legacy/management/commands/translate_old_posts.py @ 581:ee87ea74d46b
For Django 1.4, rearranged project structure for new manage.py.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 05 May 2012 17:10:48 -0500 |
parents | gpp/legacy/management/commands/translate_old_posts.py@28de6caa4e6d |
children |
comparison
equal
deleted
inserted
replaced
580:c525f3e0b5d0 | 581:ee87ea74d46b |
---|---|
1 """ | |
2 translate_old_posts.py - A management command to join the bbposts and | |
3 bbposts_text tables together and output as a .csv file, suitable for use as an | |
4 input to mysqlimport into the new database. This method bypasses the Django ORM | |
5 as it was too slow given the number of old posts to import. | |
6 | |
7 """ | |
8 from __future__ import with_statement | |
9 import csv | |
10 import optparse | |
11 from datetime import datetime | |
12 | |
13 import MySQLdb | |
14 import postmarkup | |
15 | |
16 from django.core.management.base import NoArgsCommand, CommandError | |
17 | |
18 from legacy.phpbb import unphpbb | |
19 from legacy.html2md import MarkdownWriter | |
20 from core.markup import SiteMarkup | |
21 | |
22 | |
23 def convert_ip(s): | |
24 """ | |
25 Converts a hex string representing an IP address into dotted notation. | |
26 """ | |
27 n = int(s, 16) | |
28 return "%d.%d.%d.%d" % ( | |
29 ((n >> 24) & 0xff), | |
30 ((n >> 16) & 0xff), | |
31 ((n >> 8) & 0xff), | |
32 n & 0xff) | |
33 | |
34 | |
35 class Command(NoArgsCommand): | |
36 help = """\ | |
37 This command joins the SG101 1.0 posts to 2.0 format and outputs the | |
38 data as a .csv file suitable for importing into the new database scheme with | |
39 the mysqlimport utility. | |
40 """ | |
41 option_list = NoArgsCommand.option_list + ( | |
42 optparse.make_option("-s", "--progress", action="store_true", | |
43 help="Output a . after every 100 posts to show progress"), | |
44 optparse.make_option("-a", "--host", help="set MySQL host name"), | |
45 optparse.make_option("-u", "--user", help="set MySQL user name"), | |
46 optparse.make_option("-p", "--password", help="set MySQL user password"), | |
47 optparse.make_option("-d", "--database", help="set MySQL database name"), | |
48 optparse.make_option("-o", "--out-file", help="set output filename"), | |
49 ) | |
50 bb_parser = postmarkup.create(use_pygments=False, annotate_links=False) | |
51 md_writer = MarkdownWriter() | |
52 site_markup = SiteMarkup() | |
53 | |
54 def handle_noargs(self, **opts): | |
55 | |
56 host = opts.get('host', 'localhost') or 'localhost' | |
57 user = opts.get('user', 'root') or 'root' | |
58 password = opts.get('password', '') or '' | |
59 database = opts.get('database') | |
60 out_filename = opts.get('out_file', 'forums_post.csv') or 'forums_post.csv' | |
61 | |
62 if database is None: | |
63 raise CommandError("Please specify a database option") | |
64 | |
65 out_file = open(out_filename, "wb") | |
66 | |
67 # database columns (fieldnames) for the output CSV file: | |
68 cols = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date', | |
69 'body', 'html', 'user_ip') | |
70 self.writer = csv.writer(out_file) | |
71 | |
72 # Write an initial row of fieldnames to the output file | |
73 self.writer.writerow(cols) | |
74 | |
75 # connect to the legacy database | |
76 try: | |
77 db = MySQLdb.connect(host=host, | |
78 user=user, | |
79 passwd=password, | |
80 db=database) | |
81 except MySQLdb.DatabaseError, e: | |
82 raise CommandError(str(e)) | |
83 | |
84 c = db.cursor(MySQLdb.cursors.DictCursor) | |
85 | |
86 # query the legacy database | |
87 sql = ('SELECT * FROM sln_bbposts as p, sln_bbposts_text as t WHERE ' | |
88 'p.post_id = t.post_id ORDER BY p.post_id') | |
89 c.execute(sql) | |
90 | |
91 # convert the old data and write the output to the file | |
92 while True: | |
93 row = c.fetchone() | |
94 if row is None: | |
95 break | |
96 | |
97 self.process_row(row) | |
98 | |
99 c.close() | |
100 db.close() | |
101 out_file.close() | |
102 | |
103 def to_html(self, s): | |
104 return self.bb_parser.render_to_html(unphpbb(s), cosmetic_replace=False) | |
105 | |
106 def to_markdown(self, s): | |
107 self.md_writer.reset() | |
108 self.md_writer.feed(self.to_html(s)) | |
109 return self.md_writer.markdown() | |
110 | |
111 def process_row(self, row): | |
112 """ | |
113 This function accepts one row from the legacy database and converts the | |
114 contents to the new database format, and calls the writer to write the new | |
115 row to the output file. | |
116 """ | |
117 creation_date = datetime.fromtimestamp(float(row['post_time'])) | |
118 | |
119 if row['post_edit_time']: | |
120 update_date = datetime.fromtimestamp(float(row['post_edit_time'])) | |
121 else: | |
122 update_date = creation_date | |
123 | |
124 body = self.to_markdown(row['post_text']) | |
125 html = self.site_markup.convert(body) | |
126 | |
127 self.writer.writerow([row['post_id'], | |
128 row['topic_id'], | |
129 row['poster_id'], | |
130 creation_date, | |
131 update_date, | |
132 body.encode("utf-8"), | |
133 html.encode("utf-8"), | |
134 convert_ip(row['poster_ip'])]) |