Mercurial > public > sg101
comparison tools/post_sub.py @ 359:e877b9c05740
Fixed #158; Wrote a script to replace text in forum posts. Used it to fix up the beta.surfguitar101.com/media/ problem that came up during the initial conversion to the new site.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 05 Mar 2011 02:00:13 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
358:768e3568addf | 359:e877b9c05740 |
---|---|
1 """ | |
2 This script reads a .csv dump of the forums post table. It writes a new file, | |
3 performing a search and replace over a given field. | |
4 | |
5 The output file can be imported into MySQL with: | |
6 | |
7 LOAD DATA LOCAL INFILE 'forums_post.csv' REPLACE INTO TABLE forums_post | |
8 CHARACTER SET utf8 | |
9 FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '' | |
10 LINES TERMINATED BY '\r\n'; | |
11 SHOW WARNINGS; | |
12 | |
13 """ | |
14 from __future__ import with_statement | |
15 import csv | |
16 import re | |
17 import optparse | |
18 import sys | |
19 | |
20 | |
21 USAGE = "usage: %prog [options] infile outfile" | |
22 DESCRIPTION = """\ | |
23 Performs a search and replace on a field in a forums post .csv file. | |
24 """ | |
25 | |
26 POST_FIELDS = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date', | |
27 'body', 'html', 'user_ip') | |
28 | |
29 | |
30 def main(argv=None): | |
31 parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION) | |
32 parser.set_defaults( | |
33 progress=False, | |
34 field='body', | |
35 ) | |
36 parser.add_option("-p", "--progress", action="store_true", | |
37 help="Output a . after every 100 posts to show progress [default: %default]") | |
38 parser.add_option("-f", "--field", | |
39 help="Name of the field to search [default: %default]") | |
40 parser.add_option("-s", "--search", help="The search pattern") | |
41 parser.add_option("-r", "--replace", help="The replacement text") | |
42 | |
43 opts, args = parser.parse_args(args=argv) | |
44 | |
45 if len(args) != 2: | |
46 sys.exit("Please supply input and output file arguments.") | |
47 | |
48 if opts.search is None: | |
49 sys.exit("Please specify a search pattern.") | |
50 search_re = re.compile(opts.search) | |
51 | |
52 if opts.replace is None: | |
53 sys.exit("Please specify replacement text.") | |
54 | |
55 with open(args[0], "rb") as infile: | |
56 reader = csv.DictReader(infile) | |
57 if opts.field not in reader.fieldnames: | |
58 sys.exit("Error, invalid field option: %s" % opts.field) | |
59 | |
60 with open(args[1], "wb") as outfile: | |
61 writer = csv.DictWriter(outfile, POST_FIELDS) | |
62 | |
63 n = 0 | |
64 for row in reader: | |
65 row[opts.field] = search_re.sub(opts.replace, row[opts.field]) | |
66 writer.writerow(row) | |
67 | |
68 if n % 100 == 0: | |
69 sys.stdout.write('.') | |
70 sys.stdout.flush() | |
71 | |
72 print | |
73 | |
74 if __name__ == '__main__': | |
75 try: | |
76 main() | |
77 except IOError, ex: | |
78 sys.exit("IO Error: %s" % ex) | |
79 except KeyboardInterrupt: | |
80 sys.exit("Control-C interrupt") |