Mercurial > public > sg101
changeset 359:e877b9c05740
Fixed #158; Wrote a script to replace text in forum posts. Used it to fix up the beta.surfguitar101.com/media/ problem that came up during the initial conversion to the new site.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 05 Mar 2011 02:00:13 +0000 |
parents | 768e3568addf |
children | 2e90b63520b8 |
files | tools/post_sub.py |
diffstat | 1 files changed, 80 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/post_sub.py Sat Mar 05 02:00:13 2011 +0000 @@ -0,0 +1,80 @@ +""" +This script reads a .csv dump of the forums post table. It writes a new file, +performing a search and replace over a given field. + +The output file can be imported into MySQL with: + +LOAD DATA LOCAL INFILE 'forums_post.csv' REPLACE INTO TABLE forums_post +CHARACTER SET utf8 +FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '' +LINES TERMINATED BY '\r\n'; +SHOW WARNINGS; + +""" +from __future__ import with_statement +import csv +import re +import optparse +import sys + + +USAGE = "usage: %prog [options] infile outfile" +DESCRIPTION = """\ +Performs a search and replace on a field in a forums post .csv file. +""" + +POST_FIELDS = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date', + 'body', 'html', 'user_ip') + + +def main(argv=None): + parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION) + parser.set_defaults( + progress=False, + field='body', + ) + parser.add_option("-p", "--progress", action="store_true", + help="Output a . after every 100 posts to show progress [default: %default]") + parser.add_option("-f", "--field", + help="Name of the field to search [default: %default]") + parser.add_option("-s", "--search", help="The search pattern") + parser.add_option("-r", "--replace", help="The replacement text") + + opts, args = parser.parse_args(args=argv) + + if len(args) != 2: + sys.exit("Please supply input and output file arguments.") + + if opts.search is None: + sys.exit("Please specify a search pattern.") + search_re = re.compile(opts.search) + + if opts.replace is None: + sys.exit("Please specify replacement text.") + + with open(args[0], "rb") as infile: + reader = csv.DictReader(infile) + if opts.field not in reader.fieldnames: + sys.exit("Error, invalid field option: %s" % opts.field) + + with open(args[1], "wb") as outfile: + writer = csv.DictWriter(outfile, POST_FIELDS) + + n = 0 + for row in reader: + row[opts.field] = search_re.sub(opts.replace, row[opts.field]) + writer.writerow(row) + + if n % 100 == 0: + sys.stdout.write('.') + sys.stdout.flush() + + print + +if __name__ == '__main__': + try: + main() + except IOError, ex: + sys.exit("IO Error: %s" % ex) + except KeyboardInterrupt: + sys.exit("Control-C interrupt")