annotate legacy/management/commands/import_old_downloads.py @ 1203:8cd15df9b563

Controlling the xapian install script in tools.
author Brian Neal <bgneal@gmail.com>
date Sat, 04 Jan 2025 14:19:19 -0600
parents ee87ea74d46b
children
rev   line source
bgneal@412 1 """
bgneal@412 2 import_old_downloads.py - For importing downloads from SG101 1.0 as csv files.
bgneal@412 3 """
bgneal@412 4 from __future__ import with_statement
bgneal@412 5 import csv
bgneal@412 6 import datetime
bgneal@412 7
bgneal@412 8 from django.core.management.base import LabelCommand, CommandError
bgneal@412 9 from django.contrib.auth.models import User
bgneal@412 10
bgneal@412 11 from downloads.models import Download, Category
bgneal@412 12 from legacy.html2md import MarkdownWriter
bgneal@412 13
bgneal@412 14
bgneal@412 15 # downloads with these lid's will be excluded
bgneal@412 16 EXCLUDE_SET = set([1, 2, 3, 4, 277])
bgneal@412 17
bgneal@412 18 # Mapping of old category IDs to new; None means we don't plan on importing
bgneal@412 19 CAT_MAP = {
bgneal@412 20 4: None, # Misc
bgneal@412 21 3: None, # Music
bgneal@412 22 1: None, # Demos
bgneal@412 23 6: 2, # Gear Samples
bgneal@412 24 8: 4, # Ringtones
bgneal@412 25 9: 8, # Tablature
bgneal@412 26 10: 6, # Interviews
bgneal@412 27 11: None, # 2008 MP3 Comp
bgneal@412 28 12: 1, # Backing Tracks
bgneal@412 29 13: None, # 2009 MP3 Comp
bgneal@412 30 }
bgneal@412 31
bgneal@412 32 SG101_PREFIX = 'http://surfguitar101.com/'
bgneal@412 33
bgneal@412 34
bgneal@412 35 class Command(LabelCommand):
bgneal@412 36 args = '<filename filename ...>'
bgneal@412 37 help = 'Imports downloads from the old database in CSV format'
bgneal@412 38 md_writer = MarkdownWriter()
bgneal@412 39
bgneal@412 40 def handle_label(self, filename, **options):
bgneal@412 41 """
bgneal@412 42 Process each line in the CSV file given by filename by
bgneal@412 43 creating a new object and saving it to the database.
bgneal@412 44
bgneal@412 45 """
bgneal@412 46 self.cats = {}
bgneal@412 47 try:
bgneal@412 48 self.default_user = User.objects.get(pk=2)
bgneal@412 49 except User.DoesNotExist:
bgneal@412 50 raise CommandError("Need a default user with pk=2")
bgneal@412 51
bgneal@412 52 try:
bgneal@412 53 with open(filename, "rb") as f:
bgneal@412 54 self.reader = csv.DictReader(f)
bgneal@412 55 try:
bgneal@412 56 for row in self.reader:
bgneal@412 57 self.process_row(row)
bgneal@412 58 except csv.Error, e:
bgneal@412 59 raise CommandError("CSV error: %s %s %s" % (
bgneal@412 60 filename, self.reader.line_num, e))
bgneal@412 61
bgneal@412 62 except IOError:
bgneal@412 63 raise CommandError("Could not open file: %s" % filename)
bgneal@412 64
bgneal@412 65 def get_category(self, old_cat_id):
bgneal@412 66 """
bgneal@412 67 Return the Category object for the row.
bgneal@412 68
bgneal@412 69 """
bgneal@412 70 cat_id = CAT_MAP[old_cat_id]
bgneal@412 71 if cat_id not in self.cats:
bgneal@412 72 try:
bgneal@412 73 cat = Category.objects.get(pk=cat_id)
bgneal@412 74 except Category.DoesNotExist:
bgneal@412 75 raise CommandError("Category does not exist: %s on line %s" % (
bgneal@412 76 cat_id, self.reader.line_num))
bgneal@412 77 else:
bgneal@412 78 self.cats[cat_id] = cat
bgneal@412 79 return self.cats[cat_id]
bgneal@412 80
bgneal@412 81 def get_user(self, username):
bgneal@412 82 """
bgneal@412 83 Return the user object for the given username.
bgneal@412 84 If the user cannot be found, self.default_user is returned.
bgneal@412 85
bgneal@412 86 """
bgneal@412 87 try:
bgneal@412 88 return User.objects.get(username=username)
bgneal@412 89 except User.DoesNotExist:
bgneal@412 90 return self.default_user
bgneal@412 91
bgneal@412 92 def process_row(self, row):
bgneal@412 93 """
bgneal@412 94 Process one row from the CSV file: create an object for the row
bgneal@412 95 and save it in the database.
bgneal@412 96
bgneal@412 97 """
bgneal@412 98 lid = int(row['lid'])
bgneal@412 99 if lid in EXCLUDE_SET:
bgneal@412 100 return # skip
bgneal@412 101
bgneal@412 102 cat = int(row['cid'])
bgneal@412 103 if CAT_MAP.get(cat) is None:
bgneal@412 104 return # skip this one; we aren't carrying these over
bgneal@412 105
bgneal@412 106 dl_date = datetime.datetime.strptime(row['date'], "%Y-%m-%d %H:%M:%S")
bgneal@412 107 old_url = row['url'].decode('latin-1')
bgneal@412 108 if old_url.startswith(SG101_PREFIX):
bgneal@412 109 old_url = old_url[len(SG101_PREFIX):]
bgneal@412 110 if old_url.startswith('dls/'):
bgneal@412 111 old_url = old_url[4:]
bgneal@412 112 new_url = u'downloads/1.0/%s' % old_url
bgneal@412 113
bgneal@412 114 dl = Download(
bgneal@412 115 id=lid,
bgneal@412 116 title=row['title'].decode('latin-1'),
bgneal@412 117 category=self.get_category(cat),
bgneal@412 118 description=self.to_markdown(row['description'].decode('latin-1')),
bgneal@412 119 file=new_url,
bgneal@412 120 user=self.get_user(row['submitter']),
bgneal@412 121 date_added=dl_date,
bgneal@412 122 ip_address='127.0.0.1', # not available
bgneal@412 123 hits=int(row['hits']),
bgneal@412 124 average_score=float(row['downloadratingsummary']) / 2.0,
bgneal@412 125 total_votes=int(row['totalvotes']),
bgneal@412 126 is_public=True)
bgneal@412 127 dl.save()
bgneal@412 128 #print "cp %s %s" % (old_url, '/home/var/django-sites/sg101/sg101-trunk/media/' + new_url)
bgneal@412 129
bgneal@412 130 def to_markdown(self, s):
bgneal@412 131 self.md_writer.reset()
bgneal@412 132 self.md_writer.feed(s)
bgneal@412 133 return self.md_writer.markdown()