bgneal@359: """ bgneal@359: This script reads a .csv dump of the forums post table. It writes a new file, bgneal@359: performing a search and replace over a given field. bgneal@359: bgneal@359: The output file can be imported into MySQL with: bgneal@359: bgneal@359: LOAD DATA LOCAL INFILE 'forums_post.csv' REPLACE INTO TABLE forums_post bgneal@359: CHARACTER SET utf8 bgneal@359: FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY '' bgneal@359: LINES TERMINATED BY '\r\n'; bgneal@359: SHOW WARNINGS; bgneal@359: bgneal@359: """ bgneal@359: from __future__ import with_statement bgneal@359: import csv bgneal@359: import re bgneal@359: import optparse bgneal@359: import sys bgneal@359: bgneal@359: bgneal@359: USAGE = "usage: %prog [options] infile outfile" bgneal@359: DESCRIPTION = """\ bgneal@359: Performs a search and replace on a field in a forums post .csv file. bgneal@359: """ bgneal@359: bgneal@359: POST_FIELDS = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date', bgneal@359: 'body', 'html', 'user_ip') bgneal@359: bgneal@359: bgneal@359: def main(argv=None): bgneal@359: parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION) bgneal@359: parser.set_defaults( bgneal@359: progress=False, bgneal@359: field='body', bgneal@359: ) bgneal@359: parser.add_option("-p", "--progress", action="store_true", bgneal@359: help="Output a . after every 100 posts to show progress [default: %default]") bgneal@359: parser.add_option("-f", "--field", bgneal@359: help="Name of the field to search [default: %default]") bgneal@359: parser.add_option("-s", "--search", help="The search pattern") bgneal@359: parser.add_option("-r", "--replace", help="The replacement text") bgneal@359: bgneal@359: opts, args = parser.parse_args(args=argv) bgneal@359: bgneal@359: if len(args) != 2: bgneal@359: sys.exit("Please supply input and output file arguments.") bgneal@359: bgneal@359: if opts.search is None: bgneal@359: sys.exit("Please specify a search pattern.") bgneal@359: search_re = re.compile(opts.search) bgneal@359: bgneal@359: if opts.replace is None: bgneal@359: sys.exit("Please specify replacement text.") bgneal@359: bgneal@359: with open(args[0], "rb") as infile: bgneal@359: reader = csv.DictReader(infile) bgneal@359: if opts.field not in reader.fieldnames: bgneal@359: sys.exit("Error, invalid field option: %s" % opts.field) bgneal@359: bgneal@359: with open(args[1], "wb") as outfile: bgneal@359: writer = csv.DictWriter(outfile, POST_FIELDS) bgneal@359: bgneal@359: n = 0 bgneal@359: for row in reader: bgneal@359: row[opts.field] = search_re.sub(opts.replace, row[opts.field]) bgneal@359: writer.writerow(row) bgneal@359: bgneal@359: if n % 100 == 0: bgneal@359: sys.stdout.write('.') bgneal@359: sys.stdout.flush() bgneal@359: bgneal@359: print bgneal@359: bgneal@359: if __name__ == '__main__': bgneal@359: try: bgneal@359: main() bgneal@359: except IOError, ex: bgneal@359: sys.exit("IO Error: %s" % ex) bgneal@359: except KeyboardInterrupt: bgneal@359: sys.exit("Control-C interrupt")