annotate ch5ex7.py @ 42:039249efe42f

Chapter 6, exercise 2, #4. Wrote a program to output the center column of a rule 30 CA as a stream of bytes. It is very slow though. It has to run a very long time to produce enough data for dieharder. Committing it now but will have to let it run overnight or something to generate a large file.
author Brian Neal <bgneal@gmail.com>
date Sun, 13 Jan 2013 16:24:00 -0600
parents d876711b88b5
children
rev   line source
bgneal@37 1 """Chapter 5.6, exercise 7 in Allen Downey's Think Complexity book.
bgneal@37 2
bgneal@37 3 "The Stanford Large Network Dataset Collection is a repository of datasets from
bgneal@37 4 a variety of networks, including social networks, communication and
bgneal@37 5 collaboration, Internet and road networks. See
bgneal@37 6 http://snap.stanford.edu/data/index.html. Download one of these datasets and
bgneal@37 7 explore. Is there evidence of small-world behavior? Is the network scale-free?
bgneal@37 8 What else can you discover?"
bgneal@37 9
bgneal@37 10 Here we will analyze the Texas road network:
bgneal@37 11 http://snap.stanford.edu/data/roadNet-TX.html
bgneal@37 12
bgneal@37 13 """
bgneal@37 14 import sys
bgneal@37 15 from collections import defaultdict
bgneal@37 16
bgneal@37 17 from matplotlib import pyplot
bgneal@37 18
bgneal@37 19 from Graph import Edge, Vertex, Graph
bgneal@37 20
bgneal@37 21
bgneal@37 22 def main(script, datafile):
bgneal@37 23
bgneal@37 24 node_map = defaultdict(Vertex)
bgneal@37 25 g = Graph()
bgneal@37 26 print 'Reading road network data...'
bgneal@37 27 with open(datafile, 'r') as fp:
bgneal@37 28 for line in fp:
bgneal@37 29 if line.startswith('#'):
bgneal@37 30 continue
bgneal@37 31 vn, wn = line.split()
bgneal@37 32
bgneal@37 33 v = node_map[vn]
bgneal@37 34 w = node_map[wn]
bgneal@37 35
bgneal@37 36 if v not in g:
bgneal@37 37 g.add_vertex(v)
bgneal@37 38 if w not in g:
bgneal@37 39 g.add_vertex(w)
bgneal@37 40
bgneal@37 41 g.add_edge(Edge(v, w))
bgneal@37 42
bgneal@37 43 print 'Calculating P(k)...'
bgneal@37 44 # retrieve probabilities
bgneal@37 45 p = g.get_p()
bgneal@37 46
bgneal@37 47 # plot P(k) versus k on a log-log scale
bgneal@37 48
bgneal@37 49 vals = p.items()
bgneal@37 50 print 'Sorting data...'
bgneal@37 51 vals.sort(key=lambda t: t[0])
bgneal@37 52 x, y = zip(*vals)
bgneal@37 53
bgneal@37 54 assert abs(sum(y) - 1.0) < 1e-6
bgneal@37 55
bgneal@37 56 print 'Calculating clustering coefficient...'
bgneal@37 57 c = g.clustering_coefficient()
bgneal@37 58 print 'c =', c
bgneal@38 59
bgneal@38 60 # This may take a looong time......
bgneal@38 61 #print 'Setting edge lengths...'
bgneal@38 62 #g.set_edge_length(1)
bgneal@38 63 #print 'Calculating characteristic length L...'
bgneal@38 64 #l3 = g.big_l3()
bgneal@38 65 #print 'L =', l3
bgneal@37 66
bgneal@37 67 print 'Plotting data...'
bgneal@37 68 pyplot.clf()
bgneal@37 69 pyplot.xscale('log')
bgneal@37 70 pyplot.yscale('log')
bgneal@37 71 pyplot.title('P(k) versus k')
bgneal@37 72 pyplot.xlabel('k')
bgneal@37 73 pyplot.ylabel('P(k)')
bgneal@37 74 pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3)
bgneal@37 75 pyplot.legend(loc='upper right')
bgneal@37 76 pyplot.show()
bgneal@37 77
bgneal@37 78
bgneal@37 79 if __name__ == '__main__':
bgneal@37 80 main(*sys.argv)