comparison gpp/legacy/management/commands/translate_old_posts.py @ 294:254db4cb6a86

Changes / scripts to import forums. Other tweaks and moving other import scripts to the legacy application.
author Brian Neal <bgneal@gmail.com>
date Wed, 05 Jan 2011 04:09:35 +0000
parents
children 28de6caa4e6d
comparison
equal deleted inserted replaced
293:c92fb89dbc7d 294:254db4cb6a86
1 """
2 translate_old_posts.py - A management command to join the bbposts and
3 bbposts_text tables together and output as a .csv file, suitable for use as an
4 input to mysqlimport into the new database. This method bypasses the Django ORM
5 as it was too slow given the number of old posts to import.
6
7 """
8 from __future__ import with_statement
9 import csv
10 import optparse
11 from datetime import datetime
12
13 import MySQLdb
14 import postmarkup
15
16 from django.core.management.base import NoArgsCommand, CommandError
17
18 from legacy.phpbb import unphpbb
19 from legacy.html2md import MarkdownWriter
20 from core.markup import SiteMarkup
21
22
23 def convert_ip(s):
24 """
25 Converts a hex string representing an IP address into dotted notation.
26 """
27 n = int(s, 16)
28 return "%d.%d.%d.%d" % (
29 ((n >> 24) & 0xff),
30 ((n >> 16) & 0xff),
31 ((n >> 8) & 0xff),
32 n & 0xff)
33
34
35 class Command(NoArgsCommand):
36 help = """\
37 This command joins converts the SG101 1.0 posts to 2.0 format and outputs the
38 data as a .csv file suitable for importing into the new database scheme with
39 the mysqlimport utility.
40 """
41 option_list = NoArgsCommand.option_list + (
42 optparse.make_option("-s", "--progress", action="store_true",
43 help="Output a . after every 100 posts to show progress"),
44 optparse.make_option("-a", "--host", help="set MySQL host name"),
45 optparse.make_option("-u", "--user", help="set MySQL user name"),
46 optparse.make_option("-p", "--password", help="set MySQL user password"),
47 optparse.make_option("-d", "--database", help="set MySQL database name"),
48 optparse.make_option("-o", "--out-file", help="set output filename"),
49 )
50 bb_parser = postmarkup.create(use_pygments=False, annotate_links=False)
51 md_writer = MarkdownWriter()
52 site_markup = SiteMarkup()
53
54 def handle_noargs(self, **opts):
55
56 host = opts.get('host', 'localhost') or 'localhost'
57 user = opts.get('user', 'root') or 'root'
58 password = opts.get('password', '') or ''
59 database = opts.get('database')
60 out_filename = opts.get('out_file', 'forums_post.csv') or 'forums_post.csv'
61
62 if database is None:
63 raise CommandError("Please specify a database option")
64
65 out_file = open(out_filename, "wb")
66
67 # database columns (fieldnames) for the output CSV file:
68 cols = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date',
69 'body', 'html', 'user_ip')
70 self.writer = csv.writer(out_file)
71
72 # Write an initial row of fieldnames to the output file
73 self.writer.writerow(cols)
74
75 # connect to the legacy database
76 try:
77 db = MySQLdb.connect(host=host,
78 user=user,
79 passwd=password,
80 db=database)
81 except MySQLdb.DatabaseError, e:
82 raise CommandError(str(e))
83
84 c = db.cursor(MySQLdb.cursors.DictCursor)
85
86 # query the legacy database
87 sql = ('SELECT * FROM sln_bbposts as p, sln_bbposts_text as t WHERE '
88 'p.post_id = t.post_id ORDER BY p.post_id')
89 c.execute(sql)
90
91 # convert the old data and write the output to the file
92 while True:
93 row = c.fetchone()
94 if row is None:
95 break
96
97 self.process_row(row)
98
99 c.close()
100 db.close()
101 out_file.close()
102
103 def to_html(self, s):
104 return self.bb_parser.render_to_html(unphpbb(s), cosmetic_replace=False)
105
106 def to_markdown(self, s):
107 self.md_writer.reset()
108 self.md_writer.feed(self.to_html(s))
109 return self.md_writer.markdown()
110
111 def process_row(self, row):
112 """
113 This function accepts one row from the legacy database and converts the
114 contents to the new database format, and calls the writer to write the new
115 row to the output file.
116 """
117 creation_date = datetime.fromtimestamp(float(row['post_time']))
118
119 if row['post_edit_time']:
120 update_date = datetime.fromtimestamp(float(row['post_edit_time']))
121 else:
122 update_date = creation_date
123
124 body = self.to_markdown(row['post_text'])
125 html = self.site_markup.convert(body)
126
127 self.writer.writerow([row['post_id'],
128 row['topic_id'],
129 row['poster_id'],
130 creation_date,
131 update_date,
132 body.encode("utf-8"),
133 html.encode("utf-8"),
134 convert_ip(row['poster_ip'])])