Mercurial > public > sg101
view tools/filter_posts.py @ 629:f4c043cf55ac
Wiki integration. Requests don't always have sessions.
In particular this occurs when a request is made without a trailing slash.
The Common middleware redirects when this happens, and the middleware
process_request() processing stops before a session can get added.
So just set an attribute on the request object for each operation.
This seemed weird to me at first, but there are plenty of examples of this
in the Django code base already.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Tue, 13 Nov 2012 13:50:06 -0600 |
parents | 6805d15cda13 |
children |
line wrap: on
line source
""" filter_posts.py - A script to filter out posts that have no parent topic in the new database. """ from __future__ import with_statement import csv import optparse import sys USAGE = "usage: %prog [options]" DESCRIPTION = """Filters out posts that have no parent topic.""" def main(argv=None): parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION) parser.set_defaults( topic_file='forums_topic.csv', post_file='forums_post.csv', output_file='forums_post_filtered.csv', ) parser.add_option("-s", "--progress", action="store_true", help="Output a . after every 100 posts to show progress [default: %default]") parser.add_option("-t", "--topic-file", help="Name of the topics csv file [default: %default]") parser.add_option("-p", "--post-file", help="Name of the posts csv file [default: %default]") parser.add_option("-o", "--output-file", help="Name of the output posts csv file [default: %default]") opts, args = parser.parse_args(args=argv) topics = set() with open(opts.topic_file, "rb") as topic_file: reader = csv.reader(topic_file) for row in reader: topics.add(int(row[0])) with open(opts.post_file, "rb") as post_file: reader = csv.reader(post_file) # skip first row print reader.next() with open(opts.output_file, "wb") as output_file: writer = csv.writer(output_file) for row in reader: topic = int(row[1]) if topic in topics: writer.writerow(row) if __name__ == '__main__': try: main() except IOError, ex: sys.exit("IO Error: %s" % ex) except KeyboardInterrupt: sys.exit("Control-C interrupt")