bgneal@359: """
bgneal@359: This script reads a .csv dump of the forums post table. It writes a new file,
bgneal@359: performing a search and replace over a given field.
bgneal@359: 
bgneal@359: The output file can be imported into MySQL with:
bgneal@359: 
bgneal@359: LOAD DATA LOCAL INFILE 'forums_post.csv' REPLACE INTO TABLE forums_post
bgneal@359: CHARACTER SET utf8
bgneal@359: FIELDS TERMINATED BY ',' OPTIONALLY ENCLOSED BY '"' ESCAPED BY ''
bgneal@359: LINES TERMINATED BY '\r\n';
bgneal@359: SHOW WARNINGS;
bgneal@359: 
bgneal@359: """
bgneal@359: from __future__ import with_statement
bgneal@359: import csv
bgneal@359: import re
bgneal@359: import optparse
bgneal@359: import sys
bgneal@359: 
bgneal@359: 
bgneal@359: USAGE = "usage: %prog [options] infile outfile"
bgneal@359: DESCRIPTION = """\
bgneal@359: Performs a search and replace on a field in a forums post .csv file.
bgneal@359: """
bgneal@359: 
bgneal@359: POST_FIELDS = ('id', 'topic_id', 'user_id', 'creation_date', 'update_date',
bgneal@359:         'body', 'html', 'user_ip')
bgneal@359: 
bgneal@359: 
bgneal@359: def main(argv=None):
bgneal@359:     parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION)
bgneal@359:     parser.set_defaults(
bgneal@359:         progress=False,
bgneal@359:         field='body',
bgneal@359:     )
bgneal@359:     parser.add_option("-p", "--progress", action="store_true",
bgneal@359:         help="Output a . after every 100 posts to show progress [default: %default]")
bgneal@359:     parser.add_option("-f", "--field",
bgneal@359:         help="Name of the field to search [default: %default]")
bgneal@359:     parser.add_option("-s", "--search", help="The search pattern")
bgneal@359:     parser.add_option("-r", "--replace", help="The replacement text")
bgneal@359: 
bgneal@359:     opts, args = parser.parse_args(args=argv)
bgneal@359: 
bgneal@359:     if len(args) != 2:
bgneal@359:         sys.exit("Please supply input and output file arguments.")
bgneal@359: 
bgneal@359:     if opts.search is None:
bgneal@359:         sys.exit("Please specify a search pattern.")
bgneal@359:     search_re = re.compile(opts.search)
bgneal@359: 
bgneal@359:     if opts.replace is None:
bgneal@359:         sys.exit("Please specify replacement text.")
bgneal@359: 
bgneal@359:     with open(args[0], "rb") as infile:
bgneal@359:         reader = csv.DictReader(infile)
bgneal@359:         if opts.field not in reader.fieldnames:
bgneal@359:             sys.exit("Error, invalid field option: %s" % opts.field)
bgneal@359: 
bgneal@359:         with open(args[1], "wb") as outfile:
bgneal@359:             writer = csv.DictWriter(outfile, POST_FIELDS)
bgneal@359: 
bgneal@359:             n = 0
bgneal@359:             for row in reader:
bgneal@359:                 row[opts.field] = search_re.sub(opts.replace, row[opts.field])
bgneal@359:                 writer.writerow(row)
bgneal@359: 
bgneal@359:                 if n % 100 == 0:
bgneal@359:                     sys.stdout.write('.')
bgneal@359:                     sys.stdout.flush()
bgneal@359: 
bgneal@359:             print
bgneal@359: 
bgneal@359: if __name__ == '__main__':
bgneal@359:     try:
bgneal@359:         main()
bgneal@359:     except IOError, ex:
bgneal@359:         sys.exit("IO Error: %s" % ex)
bgneal@359:     except KeyboardInterrupt:
bgneal@359:         sys.exit("Control-C interrupt")