bgneal@37
|
1 """Chapter 5.6, exercise 7 in Allen Downey's Think Complexity book.
|
bgneal@37
|
2
|
bgneal@37
|
3 "The Stanford Large Network Dataset Collection is a repository of datasets from
|
bgneal@37
|
4 a variety of networks, including social networks, communication and
|
bgneal@37
|
5 collaboration, Internet and road networks. See
|
bgneal@37
|
6 http://snap.stanford.edu/data/index.html. Download one of these datasets and
|
bgneal@37
|
7 explore. Is there evidence of small-world behavior? Is the network scale-free?
|
bgneal@37
|
8 What else can you discover?"
|
bgneal@37
|
9
|
bgneal@37
|
10 Here we will analyze the Texas road network:
|
bgneal@37
|
11 http://snap.stanford.edu/data/roadNet-TX.html
|
bgneal@37
|
12
|
bgneal@37
|
13 """
|
bgneal@37
|
14 import sys
|
bgneal@37
|
15 from collections import defaultdict
|
bgneal@37
|
16
|
bgneal@37
|
17 from matplotlib import pyplot
|
bgneal@37
|
18
|
bgneal@37
|
19 from Graph import Edge, Vertex, Graph
|
bgneal@37
|
20
|
bgneal@37
|
21
|
bgneal@37
|
22 def main(script, datafile):
|
bgneal@37
|
23
|
bgneal@37
|
24 node_map = defaultdict(Vertex)
|
bgneal@37
|
25 g = Graph()
|
bgneal@37
|
26 print 'Reading road network data...'
|
bgneal@37
|
27 with open(datafile, 'r') as fp:
|
bgneal@37
|
28 for line in fp:
|
bgneal@37
|
29 if line.startswith('#'):
|
bgneal@37
|
30 continue
|
bgneal@37
|
31 vn, wn = line.split()
|
bgneal@37
|
32
|
bgneal@37
|
33 v = node_map[vn]
|
bgneal@37
|
34 w = node_map[wn]
|
bgneal@37
|
35
|
bgneal@37
|
36 if v not in g:
|
bgneal@37
|
37 g.add_vertex(v)
|
bgneal@37
|
38 if w not in g:
|
bgneal@37
|
39 g.add_vertex(w)
|
bgneal@37
|
40
|
bgneal@37
|
41 g.add_edge(Edge(v, w))
|
bgneal@37
|
42
|
bgneal@37
|
43 print 'Calculating P(k)...'
|
bgneal@37
|
44 # retrieve probabilities
|
bgneal@37
|
45 p = g.get_p()
|
bgneal@37
|
46
|
bgneal@37
|
47 # plot P(k) versus k on a log-log scale
|
bgneal@37
|
48
|
bgneal@37
|
49 vals = p.items()
|
bgneal@37
|
50 print 'Sorting data...'
|
bgneal@37
|
51 vals.sort(key=lambda t: t[0])
|
bgneal@37
|
52 x, y = zip(*vals)
|
bgneal@37
|
53
|
bgneal@37
|
54 assert abs(sum(y) - 1.0) < 1e-6
|
bgneal@37
|
55
|
bgneal@37
|
56 print 'Calculating clustering coefficient...'
|
bgneal@37
|
57 c = g.clustering_coefficient()
|
bgneal@37
|
58 print 'c =', c
|
bgneal@37
|
59 print 'Setting edge lengths...'
|
bgneal@37
|
60 g.set_edge_length(1)
|
bgneal@37
|
61 print 'Calculating characteristic length L...'
|
bgneal@37
|
62 l3 = g.big_l3()
|
bgneal@37
|
63 print 'L =', l3
|
bgneal@37
|
64
|
bgneal@37
|
65 print 'Plotting data...'
|
bgneal@37
|
66 pyplot.clf()
|
bgneal@37
|
67 pyplot.xscale('log')
|
bgneal@37
|
68 pyplot.yscale('log')
|
bgneal@37
|
69 pyplot.title('P(k) versus k')
|
bgneal@37
|
70 pyplot.xlabel('k')
|
bgneal@37
|
71 pyplot.ylabel('P(k)')
|
bgneal@37
|
72 pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3)
|
bgneal@37
|
73 pyplot.legend(loc='upper right')
|
bgneal@37
|
74 pyplot.show()
|
bgneal@37
|
75
|
bgneal@37
|
76
|
bgneal@37
|
77 if __name__ == '__main__':
|
bgneal@37
|
78 main(*sys.argv)
|