bgneal@294: """ bgneal@294: translate_old_posts.py - A management command to join the bbposts and bgneal@294: bbposts_text tables together and output as a .csv file, suitable for use as an bgneal@294: input to mysqlimport into the new database. This method bypasses the Django ORM bgneal@294: as it was too slow given the number of old posts to import. bgneal@294: bgneal@294: """ bgneal@294: from __future__ import with_statement bgneal@294: import csv bgneal@294: import optparse bgneal@294: from datetime import datetime bgneal@294: bgneal@294: import MySQLdb bgneal@294: import postmarkup bgneal@294: bgneal@294: from django.core.management.base import NoArgsCommand, CommandError bgneal@294: bgneal@294: from legacy.phpbb import unphpbb bgneal@294: from legacy.html2md import MarkdownWriter bgneal@294: from core.markup import SiteMarkup bgneal@294: bgneal@294: bgneal@294: def convert_ip(s): bgneal@294: """ bgneal@294: Converts a hex string representing an IP address into dotted notation. bgneal@294: """ bgneal@294: n = int(s, 16) bgneal@294: return "%d.%d.%d.%d" % ( bgneal@294: ((n >> 24) & 0xff), bgneal@294: ((n >> 16) & 0xff), bgneal@294: ((n >> 8) & 0xff), bgneal@294: n & 0xff) bgneal@294: bgneal@294: bgneal@294: class Command(NoArgsCommand): bgneal@294: help = """\ bgneal@294: This command joins converts the SG101 1.0 posts to 2.0 format and outputs the bgneal@294: data as a .csv file suitable for importing into the new database scheme with bgneal@294: the mysqlimport utility. bgneal@294: """ bgneal@294: option_list = NoArgsCommand.option_list + ( bgneal@294: optparse.make_option("-s", "--progress", action="store_true", bgneal@294: help="Output a . after every 100 posts to show progress"), bgneal@294: optparse.make_option("-a", "--host", help="set MySQL host name"), bgneal@294: optparse.make_option("-u", "--user", help="set MySQL user name"), bgneal@294: optparse.make_option("-p", "--password", help="set MySQL user password"), bgneal@294: optparse.make_option("-d", "--database", help="set MySQL database name"), bgneal@294: optparse.make_option("-o", "--out-file", help="set output filename"), bgneal@294: ) bgneal@294: bb_parser = postmarkup.create(use_pygments=False, annotate_links=False) bgneal@294: md_writer = MarkdownWriter() bgneal@294: site_markup = SiteMarkup() bgneal@294: bgneal@294: def handle_noargs(self, **opts): bgneal@294: bgneal@294: host = opts.get('host', 'localhost') or 'localhost' bgneal@294: user = opts.get('user', 'root') or 'root' bgneal@294: password = opts.get('password', '') or '' bgneal@294: database = opts.get('database') bgneal@294: out_filename = opts.get('out_file', 'forums_post.csv') or 'forums_post.csv' bgneal@294: bgneal@294: if database is None: bgneal@294: raise CommandError("Please specify a database option") bgneal@294: bgneal@294: out_file = open(out_filename, "wb") bgneal@294: bgneal@294: # database columns (fieldnames) for the output CSV file: bgneal@294: cols = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date', bgneal@294: 'body', 'html', 'user_ip') bgneal@294: self.writer = csv.writer(out_file) bgneal@294: bgneal@294: # Write an initial row of fieldnames to the output file bgneal@294: self.writer.writerow(cols) bgneal@294: bgneal@294: # connect to the legacy database bgneal@294: try: bgneal@294: db = MySQLdb.connect(host=host, bgneal@294: user=user, bgneal@294: passwd=password, bgneal@294: db=database) bgneal@294: except MySQLdb.DatabaseError, e: bgneal@294: raise CommandError(str(e)) bgneal@294: bgneal@294: c = db.cursor(MySQLdb.cursors.DictCursor) bgneal@294: bgneal@294: # query the legacy database bgneal@294: sql = ('SELECT * FROM sln_bbposts as p, sln_bbposts_text as t WHERE ' bgneal@294: 'p.post_id = t.post_id ORDER BY p.post_id') bgneal@294: c.execute(sql) bgneal@294: bgneal@294: # convert the old data and write the output to the file bgneal@294: while True: bgneal@294: row = c.fetchone() bgneal@294: if row is None: bgneal@294: break bgneal@294: bgneal@294: self.process_row(row) bgneal@294: bgneal@294: c.close() bgneal@294: db.close() bgneal@294: out_file.close() bgneal@294: bgneal@294: def to_html(self, s): bgneal@294: return self.bb_parser.render_to_html(unphpbb(s), cosmetic_replace=False) bgneal@294: bgneal@294: def to_markdown(self, s): bgneal@294: self.md_writer.reset() bgneal@294: self.md_writer.feed(self.to_html(s)) bgneal@294: return self.md_writer.markdown() bgneal@294: bgneal@294: def process_row(self, row): bgneal@294: """ bgneal@294: This function accepts one row from the legacy database and converts the bgneal@294: contents to the new database format, and calls the writer to write the new bgneal@294: row to the output file. bgneal@294: """ bgneal@294: creation_date = datetime.fromtimestamp(float(row['post_time'])) bgneal@294: bgneal@294: if row['post_edit_time']: bgneal@294: update_date = datetime.fromtimestamp(float(row['post_edit_time'])) bgneal@294: else: bgneal@294: update_date = creation_date bgneal@294: bgneal@294: body = self.to_markdown(row['post_text']) bgneal@294: html = self.site_markup.convert(body) bgneal@294: bgneal@294: self.writer.writerow([row['post_id'], bgneal@294: row['topic_id'], bgneal@294: row['poster_id'], bgneal@294: creation_date, bgneal@294: update_date, bgneal@294: body.encode("utf-8"), bgneal@294: html.encode("utf-8"), bgneal@294: convert_ip(row['poster_ip'])])