comparison tools/post_sub.py @ 359:e877b9c05740

Fixed #158; Wrote a script to replace text in forum posts. Used it to fix up the beta.surfguitar101.com/media/ problem that came up during the initial conversion to the new site.
author Brian Neal <bgneal@gmail.com>
date Sat, 05 Mar 2011 02:00:13 +0000
parents
children
comparison
equal deleted inserted replaced
358:768e3568addf 359:e877b9c05740
1 """
2 This script reads a .csv dump of the forums post table. It writes a new file,
3 performing a search and replace over a given field.
4
5 The output file can be imported into MySQL with:
6
7 LOAD DATA LOCAL INFILE 'forums_post.csv' REPLACE INTO TABLE forums_post
8 CHARACTER SET utf8
9 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY ''
10 LINES TERMINATED BY '\r\n';
11 SHOW WARNINGS;
12
13 """
14 from __future__ import with_statement
15 import csv
16 import re
17 import optparse
18 import sys
19
20
21 USAGE = "usage: %prog [options] infile outfile"
22 DESCRIPTION = """\
23 Performs a search and replace on a field in a forums post .csv file.
24 """
25
26 POST_FIELDS = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date',
27 'body', 'html', 'user_ip')
28
29
30 def main(argv=None):
31 parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION)
32 parser.set_defaults(
33 progress=False,
34 field='body',
35 )
36 parser.add_option("-p", "--progress", action="store_true",
37 help="Output a . after every 100 posts to show progress [default: %default]")
38 parser.add_option("-f", "--field",
39 help="Name of the field to search [default: %default]")
40 parser.add_option("-s", "--search", help="The search pattern")
41 parser.add_option("-r", "--replace", help="The replacement text")
42
43 opts, args = parser.parse_args(args=argv)
44
45 if len(args) != 2:
46 sys.exit("Please supply input and output file arguments.")
47
48 if opts.search is None:
49 sys.exit("Please specify a search pattern.")
50 search_re = re.compile(opts.search)
51
52 if opts.replace is None:
53 sys.exit("Please specify replacement text.")
54
55 with open(args[0], "rb") as infile:
56 reader = csv.DictReader(infile)
57 if opts.field not in reader.fieldnames:
58 sys.exit("Error, invalid field option: %s" % opts.field)
59
60 with open(args[1], "wb") as outfile:
61 writer = csv.DictWriter(outfile, POST_FIELDS)
62
63 n = 0
64 for row in reader:
65 row[opts.field] = search_re.sub(opts.replace, row[opts.field])
66 writer.writerow(row)
67
68 if n % 100 == 0:
69 sys.stdout.write('.')
70 sys.stdout.flush()
71
72 print
73
74 if __name__ == '__main__':
75 try:
76 main()
77 except IOError, ex:
78 sys.exit("IO Error: %s" % ex)
79 except KeyboardInterrupt:
80 sys.exit("Control-C interrupt")