bgneal@294: """
bgneal@321: translate_old_posts.py - A management command to join the bbposts and
bgneal@294: bbposts_text tables together and output as a .csv file, suitable for use as an
bgneal@294: input to mysqlimport into the new database. This method bypasses the Django ORM
bgneal@294: as it was too slow given the number of old posts to import.
bgneal@294: 
bgneal@294: """
bgneal@294: from __future__ import with_statement
bgneal@294: import csv
bgneal@294: import optparse
bgneal@294: from datetime import datetime
bgneal@294: 
bgneal@294: import MySQLdb
bgneal@294: import postmarkup
bgneal@294: 
bgneal@294: from django.core.management.base import NoArgsCommand, CommandError
bgneal@294: 
bgneal@294: from legacy.phpbb import unphpbb
bgneal@294: from legacy.html2md import MarkdownWriter
bgneal@294: from core.markup import SiteMarkup
bgneal@294: 
bgneal@294: 
bgneal@294: def convert_ip(s):
bgneal@294:     """
bgneal@294:     Converts a hex string representing an IP address into dotted notation.
bgneal@294:     """
bgneal@294:     n = int(s, 16)
bgneal@294:     return "%d.%d.%d.%d" % (
bgneal@294:             ((n >> 24) & 0xff),
bgneal@294:             ((n >> 16) & 0xff),
bgneal@294:             ((n >> 8) & 0xff),
bgneal@294:             n & 0xff)
bgneal@294: 
bgneal@294: 
bgneal@294: class Command(NoArgsCommand):
bgneal@294:     help = """\
bgneal@321: This command joins the SG101 1.0 posts to 2.0 format and outputs the
bgneal@294: data as a .csv file suitable for importing into the new database scheme with
bgneal@294: the mysqlimport utility.
bgneal@294: """
bgneal@294:     option_list = NoArgsCommand.option_list + (
bgneal@294:         optparse.make_option("-s", "--progress", action="store_true",
bgneal@294:             help="Output a . after every 100 posts to show progress"),
bgneal@294:         optparse.make_option("-a", "--host", help="set MySQL host name"),
bgneal@294:         optparse.make_option("-u", "--user", help="set MySQL user name"),
bgneal@294:         optparse.make_option("-p", "--password", help="set MySQL user password"),
bgneal@294:         optparse.make_option("-d", "--database", help="set MySQL database name"),
bgneal@294:         optparse.make_option("-o", "--out-file", help="set output filename"),
bgneal@294:     )
bgneal@294:     bb_parser = postmarkup.create(use_pygments=False, annotate_links=False)
bgneal@294:     md_writer = MarkdownWriter()
bgneal@294:     site_markup = SiteMarkup()
bgneal@294: 
bgneal@294:     def handle_noargs(self, **opts):
bgneal@294: 
bgneal@294:         host = opts.get('host', 'localhost') or 'localhost'
bgneal@294:         user = opts.get('user', 'root') or 'root'
bgneal@294:         password = opts.get('password', '') or ''
bgneal@294:         database = opts.get('database')
bgneal@294:         out_filename = opts.get('out_file', 'forums_post.csv') or 'forums_post.csv'
bgneal@294: 
bgneal@294:         if database is None:
bgneal@294:             raise CommandError("Please specify a database option")
bgneal@294: 
bgneal@294:         out_file = open(out_filename, "wb")
bgneal@294: 
bgneal@294:         # database columns (fieldnames) for the output CSV file:
bgneal@294:         cols = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date',
bgneal@294:                 'body', 'html', 'user_ip')
bgneal@294:         self.writer = csv.writer(out_file)
bgneal@294: 
bgneal@294:         # Write an initial row of fieldnames to the output file 
bgneal@294:         self.writer.writerow(cols)
bgneal@294: 
bgneal@294:         # connect to the legacy database
bgneal@294:         try:
bgneal@294:             db = MySQLdb.connect(host=host,
bgneal@294:                     user=user,
bgneal@294:                     passwd=password,
bgneal@294:                     db=database)
bgneal@294:         except MySQLdb.DatabaseError, e:
bgneal@294:             raise CommandError(str(e))
bgneal@294: 
bgneal@294:         c = db.cursor(MySQLdb.cursors.DictCursor)
bgneal@294: 
bgneal@294:         # query the legacy database
bgneal@294:         sql = ('SELECT * FROM sln_bbposts as p, sln_bbposts_text as t WHERE '
bgneal@294:                 'p.post_id = t.post_id ORDER BY p.post_id')
bgneal@294:         c.execute(sql)
bgneal@294: 
bgneal@294:         # convert the old data and write the output to the file
bgneal@294:         while True:
bgneal@294:             row = c.fetchone()
bgneal@294:             if row is None:
bgneal@294:                 break
bgneal@294: 
bgneal@294:             self.process_row(row)
bgneal@294: 
bgneal@294:         c.close()
bgneal@294:         db.close()
bgneal@294:         out_file.close()
bgneal@294: 
bgneal@294:     def to_html(self, s):
bgneal@294:         return self.bb_parser.render_to_html(unphpbb(s), cosmetic_replace=False)
bgneal@294: 
bgneal@294:     def to_markdown(self, s):
bgneal@294:         self.md_writer.reset()
bgneal@294:         self.md_writer.feed(self.to_html(s))
bgneal@294:         return self.md_writer.markdown()
bgneal@294: 
bgneal@294:     def process_row(self, row):
bgneal@294:         """
bgneal@294:         This function accepts one row from the legacy database and converts the
bgneal@294:         contents to the new database format, and calls the writer to write the new
bgneal@294:         row to the output file.
bgneal@294:         """
bgneal@294:         creation_date = datetime.fromtimestamp(float(row['post_time']))
bgneal@294: 
bgneal@294:         if row['post_edit_time']:
bgneal@294:             update_date = datetime.fromtimestamp(float(row['post_edit_time'])) 
bgneal@294:         else:
bgneal@294:             update_date = creation_date
bgneal@294: 
bgneal@294:         body = self.to_markdown(row['post_text'])
bgneal@294:         html = self.site_markup.convert(body)
bgneal@294: 
bgneal@294:         self.writer.writerow([row['post_id'],
bgneal@294:                 row['topic_id'],
bgneal@294:                 row['poster_id'],
bgneal@294:                 creation_date,
bgneal@294:                 update_date,
bgneal@294:                 body.encode("utf-8"),
bgneal@294:                 html.encode("utf-8"),
bgneal@294:                 convert_ip(row['poster_ip'])])