Mercurial > public > sg101
view tools/filter_posts.py @ 339:b871892264f2
Adding the sg101 IRC bot code to SVN. This code is pretty rough and needs love, but it gets the job done (one of my first Python apps). This fixes #150.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Sat, 26 Feb 2011 21:27:49 +0000 |
parents | 6805d15cda13 |
children |
line wrap: on
line source
""" filter_posts.py - A script to filter out posts that have no parent topic in the new database. """ from __future__ import with_statement import csv import optparse import sys USAGE = "usage: %prog [options]" DESCRIPTION = """Filters out posts that have no parent topic.""" def main(argv=None): parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION) parser.set_defaults( topic_file='forums_topic.csv', post_file='forums_post.csv', output_file='forums_post_filtered.csv', ) parser.add_option("-s", "--progress", action="store_true", help="Output a . after every 100 posts to show progress [default: %default]") parser.add_option("-t", "--topic-file", help="Name of the topics csv file [default: %default]") parser.add_option("-p", "--post-file", help="Name of the posts csv file [default: %default]") parser.add_option("-o", "--output-file", help="Name of the output posts csv file [default: %default]") opts, args = parser.parse_args(args=argv) topics = set() with open(opts.topic_file, "rb") as topic_file: reader = csv.reader(topic_file) for row in reader: topics.add(int(row[0])) with open(opts.post_file, "rb") as post_file: reader = csv.reader(post_file) # skip first row print reader.next() with open(opts.output_file, "wb") as output_file: writer = csv.writer(output_file) for row in reader: topic = int(row[1]) if topic in topics: writer.writerow(row) if __name__ == '__main__': try: main() except IOError, ex: sys.exit("IO Error: %s" % ex) except KeyboardInterrupt: sys.exit("Control-C interrupt")