Mercurial > public > sg101
changeset 292:2367c4795c92
Added a legacy management command to import old news comments.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Fri, 24 Dec 2010 22:20:30 +0000 (2010-12-24) |
parents | a6357f2bcbbc |
children | c92fb89dbc7d |
files | gpp/legacy/data.py gpp/legacy/html2md.py gpp/legacy/management/commands/import_old_news.py gpp/legacy/management/commands/import_old_news_comments.py |
diffstat | 4 files changed, 159 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gpp/legacy/data.py Fri Dec 24 22:20:30 2010 +0000 @@ -0,0 +1,17 @@ +""" +Misc data for the legacy management commands. + +""" + +# Over time various users asked me to change their username. The legacy site +# rarely stored foreign keys to users; instead it stored the name of the user +# at the time. This dictionary contains mappings from old usernames to new +# usernames. + +KNOWN_USERNAME_CHANGES = { + 'cavefishbutchdelux': 'butchdelux', + 'Findicator1': 'WaveOhhh', + 'Tikimania': 'Tikitena', + 'sandyfeet': 'RickRhoades', +} +
--- a/gpp/legacy/html2md.py Fri Dec 24 20:45:33 2010 +0000 +++ b/gpp/legacy/html2md.py Fri Dec 24 22:20:30 2010 +0000 @@ -120,7 +120,11 @@ """ def markdown(self): - url = self.attrs['href'] + try: + url = self.attrs['href'] + except KeyError: + return self.data if self.data else u'' + text = self.data if self.data else url return u'[%s](%s)' % (text, url) @@ -133,7 +137,10 @@ """ def markdown(self): - url = self.attrs['src'] + try: + url = self.attrs['src'] + except KeyError: + return u' (missing image) ' return u'![image](%s)' % url @@ -261,7 +268,12 @@ self.elem_stack.append(tag) def _pop_elem(self): - element = self.elem_stack.pop() + try: + element = self.elem_stack.pop() + except IndexError: + # pop from empty list => bad HTML input; ignore it + return + if isinstance(element, ListElement): self.list_nesting -= 1 if len(self.elem_stack):
--- a/gpp/legacy/management/commands/import_old_news.py Fri Dec 24 20:45:33 2010 +0000 +++ b/gpp/legacy/management/commands/import_old_news.py Fri Dec 24 22:20:30 2010 +0000 @@ -12,11 +12,8 @@ from news.models import Category, Story from legacy.phpbb import unescape +import legacy.data -KNOWN_USERNAME_CHANGES = { - 'cavefishbutchdelux': 'butchdelux', - 'Findicator1': 'WaveOhhh', -} class Command(LabelCommand): args = '<filename filename ...>' @@ -114,7 +111,8 @@ user = User.objects.get(username=username) except User.DoesNotExist: try: - user = User.objects.get(username=KNOWN_USERNAME_CHANGES[username]) + user = User.objects.get( + username=legacy.data.KNOWN_USERNAME_CHANGES[username]) except KeyError: raise User.DoesNotExist
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/gpp/legacy/management/commands/import_old_news_comments.py Fri Dec 24 22:20:30 2010 +0000 @@ -0,0 +1,124 @@ +""" +import_old_news_comments.py - For importing comments on news stories from SG101 1.0 as csv files. +""" +from __future__ import with_statement +import csv +import optparse +import sys +from datetime import datetime + +from django.core.management.base import LabelCommand, CommandError +from django.contrib.auth.models import User +from django.contrib.contenttypes.models import ContentType + +from comments.models import Comment +from news.models import Story +import legacy.data +from legacy.html2md import MarkdownWriter + + +class Command(LabelCommand): + args = '<filename filename ...>' + help = 'Imports news story comments from the old database in CSV format' + option_list = LabelCommand.option_list + ( + optparse.make_option("-p", "--progress", action="store_true", + help="Output a . after every 20 comments to show progress"), + ) + md_writer = MarkdownWriter() + + def handle_label(self, filename, **options): + """ + Process each line in the CSV file given by filename by + creating a new story comment. + + """ + self.show_progress = options.get('progress') + self.users = {} + + try: + with open(filename, "rb") as f: + self.reader = csv.DictReader(f) + num_rows = 0 + try: + for row in self.reader: + self.process_row(row) + num_rows += 1 + if self.show_progress and num_rows % 20 == 0: + sys.stdout.write('.') + sys.stdout.flush() + except csv.Error, e: + raise CommandError("CSV error: %s %s %s" % ( + filename, self.reader.line_num, e)) + + print + + except IOError: + raise CommandError("Could not open file: %s" % filename) + + def process_row(self, row): + """ + Process one row from the CSV file: create a Comment object for + the row and save it in the database. + + """ + row = dict((k, v if v != 'NULL' else '') for k, v in row.iteritems()) + + try: + user = self._get_user(row['name']) + except User.DoesNotExist: + print "Could not find user %s for comment %s; skipping." % ( + row['name'], row['tid']) + return + + try: + story = Story.objects.get(id=int(row['sid'])) + except Story.DoesNotExist: + print "Could not find story %s for comment %s; skipping." % ( + row['sid'], row['tid']) + return + + comment = Comment( + id=int(row['tid']), + content_type = ContentType.objects.get_for_model(story), + object_id = story.id, + user = user, + comment = self.to_markdown(row['comment']), + creation_date = datetime.strptime(row['date'], "%Y-%m-%d %H:%M:%S"), + ip_address = row['host_name'], + is_public = True, + is_removed = False, + ) + + comment.save() + + def _get_user(self, username): + """ + Returns the user object with the given username. + Throws User.DoesNotExist if not found. + + """ + try: + return self.users[username] + except KeyError: + pass + + try: + user = User.objects.get(username=username) + except User.DoesNotExist: + try: + user = User.objects.get( + username=legacy.data.KNOWN_USERNAME_CHANGES[username]) + except KeyError: + raise User.DoesNotExist + + self.users[username] = user + return user + + def to_markdown(self, s): + self.md_writer.reset() + + if not isinstance(s, unicode): + s = s.decode('utf-8', 'replace') + + self.md_writer.feed(s) + return self.md_writer.markdown()