bgneal@37: """Chapter 5.6, exercise 7 in Allen Downey's Think Complexity book. bgneal@37: bgneal@37: "The Stanford Large Network Dataset Collection is a repository of datasets from bgneal@37: a variety of networks, including social networks, communication and bgneal@37: collaboration, Internet and road networks. See bgneal@37: http://snap.stanford.edu/data/index.html. Download one of these datasets and bgneal@37: explore. Is there evidence of small-world behavior? Is the network scale-free? bgneal@37: What else can you discover?" bgneal@37: bgneal@37: Here we will analyze the Texas road network: bgneal@37: http://snap.stanford.edu/data/roadNet-TX.html bgneal@37: bgneal@37: """ bgneal@37: import sys bgneal@37: from collections import defaultdict bgneal@37: bgneal@37: from matplotlib import pyplot bgneal@37: bgneal@37: from Graph import Edge, Vertex, Graph bgneal@37: bgneal@37: bgneal@37: def main(script, datafile): bgneal@37: bgneal@37: node_map = defaultdict(Vertex) bgneal@37: g = Graph() bgneal@37: print 'Reading road network data...' bgneal@37: with open(datafile, 'r') as fp: bgneal@37: for line in fp: bgneal@37: if line.startswith('#'): bgneal@37: continue bgneal@37: vn, wn = line.split() bgneal@37: bgneal@37: v = node_map[vn] bgneal@37: w = node_map[wn] bgneal@37: bgneal@37: if v not in g: bgneal@37: g.add_vertex(v) bgneal@37: if w not in g: bgneal@37: g.add_vertex(w) bgneal@37: bgneal@37: g.add_edge(Edge(v, w)) bgneal@37: bgneal@37: print 'Calculating P(k)...' bgneal@37: # retrieve probabilities bgneal@37: p = g.get_p() bgneal@37: bgneal@37: # plot P(k) versus k on a log-log scale bgneal@37: bgneal@37: vals = p.items() bgneal@37: print 'Sorting data...' bgneal@37: vals.sort(key=lambda t: t[0]) bgneal@37: x, y = zip(*vals) bgneal@37: bgneal@37: assert abs(sum(y) - 1.0) < 1e-6 bgneal@37: bgneal@37: print 'Calculating clustering coefficient...' bgneal@37: c = g.clustering_coefficient() bgneal@37: print 'c =', c bgneal@38: bgneal@38: # This may take a looong time...... bgneal@38: #print 'Setting edge lengths...' bgneal@38: #g.set_edge_length(1) bgneal@38: #print 'Calculating characteristic length L...' bgneal@38: #l3 = g.big_l3() bgneal@38: #print 'L =', l3 bgneal@37: bgneal@37: print 'Plotting data...' bgneal@37: pyplot.clf() bgneal@37: pyplot.xscale('log') bgneal@37: pyplot.yscale('log') bgneal@37: pyplot.title('P(k) versus k') bgneal@37: pyplot.xlabel('k') bgneal@37: pyplot.ylabel('P(k)') bgneal@37: pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3) bgneal@37: pyplot.legend(loc='upper right') bgneal@37: pyplot.show() bgneal@37: bgneal@37: bgneal@37: if __name__ == '__main__': bgneal@37: main(*sys.argv)