annotate gpp/legacy/management/commands/import_old_news_comments.py @ 323:0c18dfb1da1c

Fixing #149; adding the ygroup application: an archive of the old Yahoo Group messages.
author Brian Neal <bgneal@gmail.com>
date Sun, 20 Feb 2011 00:31:54 +0000
parents 254db4cb6a86
children 4021ea1045f7
rev   line source
bgneal@292 1 """
bgneal@292 2 import_old_news_comments.py - For importing comments on news stories from SG101 1.0 as csv files.
bgneal@292 3 """
bgneal@292 4 from __future__ import with_statement
bgneal@292 5 import csv
bgneal@292 6 import optparse
bgneal@292 7 import sys
bgneal@292 8 from datetime import datetime
bgneal@292 9
bgneal@292 10 from django.core.management.base import LabelCommand, CommandError
bgneal@292 11 from django.contrib.auth.models import User
bgneal@292 12 from django.contrib.contenttypes.models import ContentType
bgneal@292 13
bgneal@292 14 from comments.models import Comment
bgneal@292 15 from news.models import Story
bgneal@292 16 import legacy.data
bgneal@292 17 from legacy.html2md import MarkdownWriter
bgneal@292 18
bgneal@292 19
bgneal@292 20 class Command(LabelCommand):
bgneal@292 21 args = '<filename filename ...>'
bgneal@292 22 help = 'Imports news story comments from the old database in CSV format'
bgneal@292 23 option_list = LabelCommand.option_list + (
bgneal@292 24 optparse.make_option("-p", "--progress", action="store_true",
bgneal@292 25 help="Output a . after every 20 comments to show progress"),
bgneal@292 26 )
bgneal@292 27 md_writer = MarkdownWriter()
bgneal@292 28
bgneal@292 29 def handle_label(self, filename, **options):
bgneal@292 30 """
bgneal@292 31 Process each line in the CSV file given by filename by
bgneal@292 32 creating a new story comment.
bgneal@292 33
bgneal@292 34 """
bgneal@292 35 self.show_progress = options.get('progress')
bgneal@292 36 self.users = {}
bgneal@292 37
bgneal@292 38 try:
bgneal@292 39 with open(filename, "rb") as f:
bgneal@292 40 self.reader = csv.DictReader(f)
bgneal@292 41 num_rows = 0
bgneal@292 42 try:
bgneal@292 43 for row in self.reader:
bgneal@292 44 self.process_row(row)
bgneal@292 45 num_rows += 1
bgneal@292 46 if self.show_progress and num_rows % 20 == 0:
bgneal@292 47 sys.stdout.write('.')
bgneal@292 48 sys.stdout.flush()
bgneal@292 49 except csv.Error, e:
bgneal@292 50 raise CommandError("CSV error: %s %s %s" % (
bgneal@292 51 filename, self.reader.line_num, e))
bgneal@292 52
bgneal@292 53 print
bgneal@292 54
bgneal@292 55 except IOError:
bgneal@292 56 raise CommandError("Could not open file: %s" % filename)
bgneal@292 57
bgneal@292 58 def process_row(self, row):
bgneal@292 59 """
bgneal@292 60 Process one row from the CSV file: create a Comment object for
bgneal@292 61 the row and save it in the database.
bgneal@292 62
bgneal@292 63 """
bgneal@292 64 row = dict((k, v if v != 'NULL' else '') for k, v in row.iteritems())
bgneal@292 65
bgneal@292 66 try:
bgneal@292 67 user = self._get_user(row['name'])
bgneal@292 68 except User.DoesNotExist:
bgneal@292 69 print "Could not find user %s for comment %s; skipping." % (
bgneal@292 70 row['name'], row['tid'])
bgneal@292 71 return
bgneal@292 72
bgneal@292 73 try:
bgneal@292 74 story = Story.objects.get(id=int(row['sid']))
bgneal@292 75 except Story.DoesNotExist:
bgneal@292 76 print "Could not find story %s for comment %s; skipping." % (
bgneal@292 77 row['sid'], row['tid'])
bgneal@292 78 return
bgneal@292 79
bgneal@292 80 comment = Comment(
bgneal@292 81 id=int(row['tid']),
bgneal@292 82 content_type = ContentType.objects.get_for_model(story),
bgneal@292 83 object_id = story.id,
bgneal@292 84 user = user,
bgneal@292 85 comment = self.to_markdown(row['comment']),
bgneal@292 86 creation_date = datetime.strptime(row['date'], "%Y-%m-%d %H:%M:%S"),
bgneal@292 87 ip_address = row['host_name'],
bgneal@292 88 is_public = True,
bgneal@292 89 is_removed = False,
bgneal@292 90 )
bgneal@292 91
bgneal@292 92 comment.save()
bgneal@292 93
bgneal@292 94 def _get_user(self, username):
bgneal@292 95 """
bgneal@292 96 Returns the user object with the given username.
bgneal@292 97 Throws User.DoesNotExist if not found.
bgneal@292 98
bgneal@292 99 """
bgneal@292 100 try:
bgneal@292 101 return self.users[username]
bgneal@292 102 except KeyError:
bgneal@292 103 pass
bgneal@292 104
bgneal@292 105 try:
bgneal@292 106 user = User.objects.get(username=username)
bgneal@292 107 except User.DoesNotExist:
bgneal@292 108 try:
bgneal@292 109 user = User.objects.get(
bgneal@292 110 username=legacy.data.KNOWN_USERNAME_CHANGES[username])
bgneal@292 111 except KeyError:
bgneal@292 112 raise User.DoesNotExist
bgneal@292 113
bgneal@292 114 self.users[username] = user
bgneal@292 115 return user
bgneal@292 116
bgneal@292 117 def to_markdown(self, s):
bgneal@292 118 self.md_writer.reset()
bgneal@292 119
bgneal@292 120 if not isinstance(s, unicode):
bgneal@294 121 s = s.decode('latin-1', 'replace')
bgneal@292 122
bgneal@292 123 self.md_writer.feed(s)
bgneal@292 124 return self.md_writer.markdown()