view ch5ex7.py @ 38:d876711b88b5

Comment out the calculation of L as it may take a long time.
author Brian Neal <bgneal@gmail.com>
date Thu, 10 Jan 2013 20:24:56 -0600
parents 931f60dee99e
children
line wrap: on
line source
"""Chapter 5.6, exercise 7 in Allen Downey's Think Complexity book.

"The Stanford Large Network Dataset Collection is a repository of datasets from
a variety of networks, including social networks, communication and
collaboration, Internet and road networks. See
http://snap.stanford.edu/data/index.html.  Download one of these datasets and
explore. Is there evidence of small-world behavior? Is the network scale-free?
What else can you discover?"

Here we will analyze the Texas road network:
http://snap.stanford.edu/data/roadNet-TX.html

"""
import sys
from collections import defaultdict

from matplotlib import pyplot

from Graph import Edge, Vertex, Graph


def main(script, datafile):

    node_map = defaultdict(Vertex)
    g = Graph()
    print 'Reading road network data...'
    with open(datafile, 'r') as fp:
        for line in fp:
            if line.startswith('#'):
                continue
            vn, wn = line.split()

            v = node_map[vn]
            w = node_map[wn]

            if v not in g:
                g.add_vertex(v)
            if w not in g:
                g.add_vertex(w)

            g.add_edge(Edge(v, w))

    print 'Calculating P(k)...'
    # retrieve probabilities
    p = g.get_p()

    # plot P(k) versus k on a log-log scale

    vals = p.items()
    print 'Sorting data...'
    vals.sort(key=lambda t: t[0])
    x, y = zip(*vals)

    assert abs(sum(y) - 1.0) < 1e-6

    print 'Calculating clustering coefficient...'
    c = g.clustering_coefficient()
    print 'c =', c

    # This may take a looong time......
    #print 'Setting edge lengths...'
    #g.set_edge_length(1)
    #print 'Calculating characteristic length L...'
    #l3 = g.big_l3()
    #print 'L =', l3

    print 'Plotting data...'
    pyplot.clf()
    pyplot.xscale('log')
    pyplot.yscale('log')
    pyplot.title('P(k) versus k')
    pyplot.xlabel('k')
    pyplot.ylabel('P(k)')
    pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3)
    pyplot.legend(loc='upper right')
    pyplot.show()


if __name__ == '__main__':
    main(*sys.argv)