bgneal@294
|
1 """
|
bgneal@294
|
2 translate_old_posts.py - A management command to join the bbposts and
|
bgneal@294
|
3 bbposts_text tables together and output as a .csv file, suitable for use as an
|
bgneal@294
|
4 input to mysqlimport into the new database. This method bypasses the Django ORM
|
bgneal@294
|
5 as it was too slow given the number of old posts to import.
|
bgneal@294
|
6
|
bgneal@294
|
7 """
|
bgneal@294
|
8 from __future__ import with_statement
|
bgneal@294
|
9 import csv
|
bgneal@294
|
10 import optparse
|
bgneal@294
|
11 from datetime import datetime
|
bgneal@294
|
12
|
bgneal@294
|
13 import MySQLdb
|
bgneal@294
|
14 import postmarkup
|
bgneal@294
|
15
|
bgneal@294
|
16 from django.core.management.base import NoArgsCommand, CommandError
|
bgneal@294
|
17
|
bgneal@294
|
18 from legacy.phpbb import unphpbb
|
bgneal@294
|
19 from legacy.html2md import MarkdownWriter
|
bgneal@294
|
20 from core.markup import SiteMarkup
|
bgneal@294
|
21
|
bgneal@294
|
22
|
bgneal@294
|
23 def convert_ip(s):
|
bgneal@294
|
24 """
|
bgneal@294
|
25 Converts a hex string representing an IP address into dotted notation.
|
bgneal@294
|
26 """
|
bgneal@294
|
27 n = int(s, 16)
|
bgneal@294
|
28 return "%d.%d.%d.%d" % (
|
bgneal@294
|
29 ((n >> 24) & 0xff),
|
bgneal@294
|
30 ((n >> 16) & 0xff),
|
bgneal@294
|
31 ((n >> 8) & 0xff),
|
bgneal@294
|
32 n & 0xff)
|
bgneal@294
|
33
|
bgneal@294
|
34
|
bgneal@294
|
35 class Command(NoArgsCommand):
|
bgneal@294
|
36 help = """\
|
bgneal@294
|
37 This command joins converts the SG101 1.0 posts to 2.0 format and outputs the
|
bgneal@294
|
38 data as a .csv file suitable for importing into the new database scheme with
|
bgneal@294
|
39 the mysqlimport utility.
|
bgneal@294
|
40 """
|
bgneal@294
|
41 option_list = NoArgsCommand.option_list + (
|
bgneal@294
|
42 optparse.make_option("-s", "--progress", action="store_true",
|
bgneal@294
|
43 help="Output a . after every 100 posts to show progress"),
|
bgneal@294
|
44 optparse.make_option("-a", "--host", help="set MySQL host name"),
|
bgneal@294
|
45 optparse.make_option("-u", "--user", help="set MySQL user name"),
|
bgneal@294
|
46 optparse.make_option("-p", "--password", help="set MySQL user password"),
|
bgneal@294
|
47 optparse.make_option("-d", "--database", help="set MySQL database name"),
|
bgneal@294
|
48 optparse.make_option("-o", "--out-file", help="set output filename"),
|
bgneal@294
|
49 )
|
bgneal@294
|
50 bb_parser = postmarkup.create(use_pygments=False, annotate_links=False)
|
bgneal@294
|
51 md_writer = MarkdownWriter()
|
bgneal@294
|
52 site_markup = SiteMarkup()
|
bgneal@294
|
53
|
bgneal@294
|
54 def handle_noargs(self, **opts):
|
bgneal@294
|
55
|
bgneal@294
|
56 host = opts.get('host', 'localhost') or 'localhost'
|
bgneal@294
|
57 user = opts.get('user', 'root') or 'root'
|
bgneal@294
|
58 password = opts.get('password', '') or ''
|
bgneal@294
|
59 database = opts.get('database')
|
bgneal@294
|
60 out_filename = opts.get('out_file', 'forums_post.csv') or 'forums_post.csv'
|
bgneal@294
|
61
|
bgneal@294
|
62 if database is None:
|
bgneal@294
|
63 raise CommandError("Please specify a database option")
|
bgneal@294
|
64
|
bgneal@294
|
65 out_file = open(out_filename, "wb")
|
bgneal@294
|
66
|
bgneal@294
|
67 # database columns (fieldnames) for the output CSV file:
|
bgneal@294
|
68 cols = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date',
|
bgneal@294
|
69 'body', 'html', 'user_ip')
|
bgneal@294
|
70 self.writer = csv.writer(out_file)
|
bgneal@294
|
71
|
bgneal@294
|
72 # Write an initial row of fieldnames to the output file
|
bgneal@294
|
73 self.writer.writerow(cols)
|
bgneal@294
|
74
|
bgneal@294
|
75 # connect to the legacy database
|
bgneal@294
|
76 try:
|
bgneal@294
|
77 db = MySQLdb.connect(host=host,
|
bgneal@294
|
78 user=user,
|
bgneal@294
|
79 passwd=password,
|
bgneal@294
|
80 db=database)
|
bgneal@294
|
81 except MySQLdb.DatabaseError, e:
|
bgneal@294
|
82 raise CommandError(str(e))
|
bgneal@294
|
83
|
bgneal@294
|
84 c = db.cursor(MySQLdb.cursors.DictCursor)
|
bgneal@294
|
85
|
bgneal@294
|
86 # query the legacy database
|
bgneal@294
|
87 sql = ('SELECT * FROM sln_bbposts as p, sln_bbposts_text as t WHERE '
|
bgneal@294
|
88 'p.post_id = t.post_id ORDER BY p.post_id')
|
bgneal@294
|
89 c.execute(sql)
|
bgneal@294
|
90
|
bgneal@294
|
91 # convert the old data and write the output to the file
|
bgneal@294
|
92 while True:
|
bgneal@294
|
93 row = c.fetchone()
|
bgneal@294
|
94 if row is None:
|
bgneal@294
|
95 break
|
bgneal@294
|
96
|
bgneal@294
|
97 self.process_row(row)
|
bgneal@294
|
98
|
bgneal@294
|
99 c.close()
|
bgneal@294
|
100 db.close()
|
bgneal@294
|
101 out_file.close()
|
bgneal@294
|
102
|
bgneal@294
|
103 def to_html(self, s):
|
bgneal@294
|
104 return self.bb_parser.render_to_html(unphpbb(s), cosmetic_replace=False)
|
bgneal@294
|
105
|
bgneal@294
|
106 def to_markdown(self, s):
|
bgneal@294
|
107 self.md_writer.reset()
|
bgneal@294
|
108 self.md_writer.feed(self.to_html(s))
|
bgneal@294
|
109 return self.md_writer.markdown()
|
bgneal@294
|
110
|
bgneal@294
|
111 def process_row(self, row):
|
bgneal@294
|
112 """
|
bgneal@294
|
113 This function accepts one row from the legacy database and converts the
|
bgneal@294
|
114 contents to the new database format, and calls the writer to write the new
|
bgneal@294
|
115 row to the output file.
|
bgneal@294
|
116 """
|
bgneal@294
|
117 creation_date = datetime.fromtimestamp(float(row['post_time']))
|
bgneal@294
|
118
|
bgneal@294
|
119 if row['post_edit_time']:
|
bgneal@294
|
120 update_date = datetime.fromtimestamp(float(row['post_edit_time']))
|
bgneal@294
|
121 else:
|
bgneal@294
|
122 update_date = creation_date
|
bgneal@294
|
123
|
bgneal@294
|
124 body = self.to_markdown(row['post_text'])
|
bgneal@294
|
125 html = self.site_markup.convert(body)
|
bgneal@294
|
126
|
bgneal@294
|
127 self.writer.writerow([row['post_id'],
|
bgneal@294
|
128 row['topic_id'],
|
bgneal@294
|
129 row['poster_id'],
|
bgneal@294
|
130 creation_date,
|
bgneal@294
|
131 update_date,
|
bgneal@294
|
132 body.encode("utf-8"),
|
bgneal@294
|
133 html.encode("utf-8"),
|
bgneal@294
|
134 convert_ip(row['poster_ip'])])
|