annotate tools/filter_posts.py @ 887:9a15f7c27526

Actually save model object upon change. This commit was tested on the comments model. Additional logging added. Added check for Markdown image references. Added TODOs after observing behavior on comments.
author Brian Neal <bgneal@gmail.com>
date Tue, 03 Feb 2015 21:09:44 -0600
parents 6805d15cda13
children
rev   line source
bgneal@334 1 """
bgneal@334 2 filter_posts.py - A script to filter out posts that have no parent topic in the
bgneal@334 3 new database.
bgneal@334 4
bgneal@334 5 """
bgneal@334 6 from __future__ import with_statement
bgneal@334 7 import csv
bgneal@334 8 import optparse
bgneal@334 9 import sys
bgneal@334 10
bgneal@334 11
bgneal@334 12 USAGE = "usage: %prog [options]"
bgneal@334 13 DESCRIPTION = """Filters out posts that have no parent topic."""
bgneal@334 14
bgneal@334 15
bgneal@334 16 def main(argv=None):
bgneal@334 17
bgneal@334 18 parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION)
bgneal@334 19 parser.set_defaults(
bgneal@334 20 topic_file='forums_topic.csv',
bgneal@334 21 post_file='forums_post.csv',
bgneal@334 22 output_file='forums_post_filtered.csv',
bgneal@334 23 )
bgneal@334 24 parser.add_option("-s", "--progress", action="store_true",
bgneal@334 25 help="Output a . after every 100 posts to show progress [default: %default]")
bgneal@334 26 parser.add_option("-t", "--topic-file",
bgneal@334 27 help="Name of the topics csv file [default: %default]")
bgneal@334 28 parser.add_option("-p", "--post-file",
bgneal@334 29 help="Name of the posts csv file [default: %default]")
bgneal@334 30 parser.add_option("-o", "--output-file",
bgneal@334 31 help="Name of the output posts csv file [default: %default]")
bgneal@334 32
bgneal@334 33 opts, args = parser.parse_args(args=argv)
bgneal@334 34
bgneal@334 35 topics = set()
bgneal@334 36 with open(opts.topic_file, "rb") as topic_file:
bgneal@334 37 reader = csv.reader(topic_file)
bgneal@334 38 for row in reader:
bgneal@334 39 topics.add(int(row[0]))
bgneal@334 40
bgneal@334 41 with open(opts.post_file, "rb") as post_file:
bgneal@334 42 reader = csv.reader(post_file)
bgneal@334 43 # skip first row
bgneal@334 44 print reader.next()
bgneal@334 45 with open(opts.output_file, "wb") as output_file:
bgneal@334 46 writer = csv.writer(output_file)
bgneal@334 47 for row in reader:
bgneal@334 48 topic = int(row[1])
bgneal@334 49 if topic in topics:
bgneal@334 50 writer.writerow(row)
bgneal@334 51
bgneal@334 52
bgneal@334 53 if __name__ == '__main__':
bgneal@334 54 try:
bgneal@334 55 main()
bgneal@334 56 except IOError, ex:
bgneal@334 57 sys.exit("IO Error: %s" % ex)
bgneal@334 58 except KeyboardInterrupt:
bgneal@334 59 sys.exit("Control-C interrupt")