annotate tools/filter_posts.py @ 821:71db8076dc3d

Bandmap WIP: geocoding integrated with add form. Add form works. Before submitting the form, client side JS makes a geocode request to Google and populates hidden lat/lon fields with the result. Successfully created a model instance on the server side. Still need to update admin dashboard, admin approval, and give out badges for adding bands to the map. Once that is done, then work on displaying the map with filtering.
author Brian Neal <bgneal@gmail.com>
date Tue, 23 Sep 2014 20:40:31 -0500
parents 6805d15cda13
children
rev   line source
bgneal@334 1 """
bgneal@334 2 filter_posts.py - A script to filter out posts that have no parent topic in the
bgneal@334 3 new database.
bgneal@334 4
bgneal@334 5 """
bgneal@334 6 from __future__ import with_statement
bgneal@334 7 import csv
bgneal@334 8 import optparse
bgneal@334 9 import sys
bgneal@334 10
bgneal@334 11
bgneal@334 12 USAGE = "usage: %prog [options]"
bgneal@334 13 DESCRIPTION = """Filters out posts that have no parent topic."""
bgneal@334 14
bgneal@334 15
bgneal@334 16 def main(argv=None):
bgneal@334 17
bgneal@334 18 parser = optparse.OptionParser(usage=USAGE, description=DESCRIPTION)
bgneal@334 19 parser.set_defaults(
bgneal@334 20 topic_file='forums_topic.csv',
bgneal@334 21 post_file='forums_post.csv',
bgneal@334 22 output_file='forums_post_filtered.csv',
bgneal@334 23 )
bgneal@334 24 parser.add_option("-s", "--progress", action="store_true",
bgneal@334 25 help="Output a . after every 100 posts to show progress [default: %default]")
bgneal@334 26 parser.add_option("-t", "--topic-file",
bgneal@334 27 help="Name of the topics csv file [default: %default]")
bgneal@334 28 parser.add_option("-p", "--post-file",
bgneal@334 29 help="Name of the posts csv file [default: %default]")
bgneal@334 30 parser.add_option("-o", "--output-file",
bgneal@334 31 help="Name of the output posts csv file [default: %default]")
bgneal@334 32
bgneal@334 33 opts, args = parser.parse_args(args=argv)
bgneal@334 34
bgneal@334 35 topics = set()
bgneal@334 36 with open(opts.topic_file, "rb") as topic_file:
bgneal@334 37 reader = csv.reader(topic_file)
bgneal@334 38 for row in reader:
bgneal@334 39 topics.add(int(row[0]))
bgneal@334 40
bgneal@334 41 with open(opts.post_file, "rb") as post_file:
bgneal@334 42 reader = csv.reader(post_file)
bgneal@334 43 # skip first row
bgneal@334 44 print reader.next()
bgneal@334 45 with open(opts.output_file, "wb") as output_file:
bgneal@334 46 writer = csv.writer(output_file)
bgneal@334 47 for row in reader:
bgneal@334 48 topic = int(row[1])
bgneal@334 49 if topic in topics:
bgneal@334 50 writer.writerow(row)
bgneal@334 51
bgneal@334 52
bgneal@334 53 if __name__ == '__main__':
bgneal@334 54 try:
bgneal@334 55 main()
bgneal@334 56 except IOError, ex:
bgneal@334 57 sys.exit("IO Error: %s" % ex)
bgneal@334 58 except KeyboardInterrupt:
bgneal@334 59 sys.exit("Control-C interrupt")