# HG changeset patch # User Brian Neal # Date 1298683702 0 # Node ID 6805d15cda13c03e3e3e4c80ae7ab2de8e2a4e53 # Parent 0bf5a567706758278b63e710181ad7c7b6553610 Adding a script I had to write on the fly to filter out posts from the posts csv file that had no parent topics. MyISAM let me get away with that, but InnoDB won't. diff -r 0bf5a5677067 -r 6805d15cda13 tools/filter_posts.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/filter_posts.py Sat Feb 26 01:28:22 2011 +0000 @@ -0,0 +1,59 @@ +""" +filter_posts.py - A script to filter out posts that have no parent topic in the +new database. + +""" +from __future__ import with_statement +import csv +import optparse +import sys + + +USAGE = "usage: %prog [options]" +DESCRIPTION = """Filters out posts that have no parent topic.""" + + +def main(argv=None): + + parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION) + parser.set_defaults( + topic_file='forums_topic.csv', + post_file='forums_post.csv', + output_file='forums_post_filtered.csv', + ) + parser.add_option("-s", "--progress", action="store_true", + help="Output a . after every 100 posts to show progress [default: %default]") + parser.add_option("-t", "--topic-file", + help="Name of the topics csv file [default: %default]") + parser.add_option("-p", "--post-file", + help="Name of the posts csv file [default: %default]") + parser.add_option("-o", "--output-file", + help="Name of the output posts csv file [default: %default]") + + opts, args = parser.parse_args(args=argv) + + topics = set() + with open(opts.topic_file, "rb") as topic_file: + reader = csv.reader(topic_file) + for row in reader: + topics.add(int(row[0])) + + with open(opts.post_file, "rb") as post_file: + reader = csv.reader(post_file) + # skip first row + print reader.next() + with open(opts.output_file, "wb") as output_file: + writer = csv.writer(output_file) + for row in reader: + topic = int(row[1]) + if topic in topics: + writer.writerow(row) + + +if __name__ == '__main__': + try: + main() + except IOError, ex: + sys.exit("IO Error: %s" % ex) + except KeyboardInterrupt: + sys.exit("Control-C interrupt")