changeset 37:931f60dee99e

Chapter 5.6, exercise 7. Exploring the Texas road network.
author Brian Neal <bgneal@gmail.com>
date Thu, 10 Jan 2013 20:23:52 -0600
parents 305cc03c2750
children d876711b88b5
files ch5ex7.py
diffstat 1 files changed, 78 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ch5ex7.py	Thu Jan 10 20:23:52 2013 -0600
@@ -0,0 +1,78 @@
+"""Chapter 5.6, exercise 7 in Allen Downey's Think Complexity book.
+
+"The Stanford Large Network Dataset Collection is a repository of datasets from
+a variety of networks, including social networks, communication and
+collaboration, Internet and road networks. See
+http://snap.stanford.edu/data/index.html.  Download one of these datasets and
+explore. Is there evidence of small-world behavior? Is the network scale-free?
+What else can you discover?"
+
+Here we will analyze the Texas road network:
+http://snap.stanford.edu/data/roadNet-TX.html
+
+"""
+import sys
+from collections import defaultdict
+
+from matplotlib import pyplot
+
+from Graph import Edge, Vertex, Graph
+
+
+def main(script, datafile):
+
+    node_map = defaultdict(Vertex)
+    g = Graph()
+    print 'Reading road network data...'
+    with open(datafile, 'r') as fp:
+        for line in fp:
+            if line.startswith('#'):
+                continue
+            vn, wn = line.split()
+
+            v = node_map[vn]
+            w = node_map[wn]
+
+            if v not in g:
+                g.add_vertex(v)
+            if w not in g:
+                g.add_vertex(w)
+
+            g.add_edge(Edge(v, w))
+
+    print 'Calculating P(k)...'
+    # retrieve probabilities
+    p = g.get_p()
+
+    # plot P(k) versus k on a log-log scale
+
+    vals = p.items()
+    print 'Sorting data...'
+    vals.sort(key=lambda t: t[0])
+    x, y = zip(*vals)
+
+    assert abs(sum(y) - 1.0) < 1e-6
+
+    print 'Calculating clustering coefficient...'
+    c = g.clustering_coefficient()
+    print 'c =', c
+    print 'Setting edge lengths...'
+    g.set_edge_length(1)
+    print 'Calculating characteristic length L...'
+    l3 = g.big_l3()
+    print 'L =', l3
+
+    print 'Plotting data...'
+    pyplot.clf()
+    pyplot.xscale('log')
+    pyplot.yscale('log')
+    pyplot.title('P(k) versus k')
+    pyplot.xlabel('k')
+    pyplot.ylabel('P(k)')
+    pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3)
+    pyplot.legend(loc='upper right')
+    pyplot.show()
+
+
+if __name__ == '__main__':
+    main(*sys.argv)