view ch5ex6.py @ 34:66a5e7f7c10f

Added a check to see if we were calculating the probabilities correctly.
author Brian Neal <bgneal@gmail.com>
date Wed, 09 Jan 2013 20:19:49 -0600
parents a13c00c0dfe5
children 10db8c3a6b83
line wrap: on
line source
# -*- coding: utf-8 -*-
"""Chapter 5.5, exercise 6 in Allen Downey's Think Complexity book.

1. Read Barabási and Albert’s paper and implement their algorithm for generating
   graphs. See if you can replicate their Figure 2(A), which shows P(k) versus
   k for a graph with 150 000 vertices.

"""
import collections
import random
import sys

from matplotlib import pyplot

from Graph import Graph, Vertex, Edge


class BAGraph(Graph):
    """BAGraph implements the algorithm described in the Barabási and
    Albert paper "Emergence of Scaling in Random Networks" from Science
    Magazine Vol. 286, 15 October 1999.

    """
    def __init__(self, m0, m):
        """Create a graph with m0 vertices and m connecting edges.

        """
        self.m0 = m0
        self.m = m
        self.histogram = []

        initial_vertices = [Vertex() for i in xrange(m0)]

        super(BAGraph, self).__init__(vs=initial_vertices)

        # Add initial edges between nodes randomly
        n = 0
        while n != m:
            # pick two vertices at random
            v = random.choice(initial_vertices)
            w = random.choice(initial_vertices)

            # if they aren't the same and don't already have an edge, add one
            if v is not w and not self.get_edge(v, w):
                self.add_edge(Edge(v, w))
                n += 1

    def add_edge(self, edge):
        """Our version of add_edge() adds the two vertices on either end of the
        edge to a histogram. This allows us to more easily pick popular vertices
        when adding edges as part of the step() method.

        """
        super(BAGraph, self).add_edge(edge)     # invoke base class version

        v, w = edge
        self.histogram.append(v)
        self.histogram.append(w)

    def step(self):
        """This method adds a new vertex to the graph, then adds m edges that
        link the new vertex to m different vertices already present in the
        graph. Preferential treatment is given towards vertices that already
        have high connectivity.

        The paper describes this preferential choosing as creating a probability
        P that a new vertex will be connected to vertex i depends on the
        connectivity ki of that vertex, so that P(ki) = ki / sum(kj).

        We implement that by keeping a list of vertices (histogram), where we
        insert a vertex into this list whenever it gets an edge added to it. We
        then randomly choose a vertex from this list. Thus the vertices with
        more edges will be more likely chosen.

        """
        # pick m unique vertices to attach to the new vertex
        vs = set()
        while len(vs) < self.m:
            w = random.choice(self.histogram)
            if w not in vs:
                vs.add(w)

        # Add the new vertex to the graph and create edges
        v = Vertex()
        self.add_vertex(v)
        for w in vs:
            self.add_edge(Edge(v, w))

    def get_p(self):
        """This method returns a dictionary of probabilities where each key is
        the connectivity k and the value is the probability [0-1] for this
        graph.

        """
        # First, for each vertex, count up how many neighbors it has
        vs = self.vertices()

        c = collections.Counter()
        for v in vs:
            n = len(self.out_vertices(v))
            c[n] += 1

        n = len(vs)
        if n > 0:
            for k in c:
                c[k] = float(c[k]) / n

        return c


def main(script, m0, m, n):

    # create a BAGraph with parameters m0 & m:
    m0, m, n = int(m0), int(m), int(n)
    g = BAGraph(m0, m)

    # step the graph n times:
    for i in xrange(n):
        g.step()

    # retrieve probabilities
    p = g.get_p()

    # plot P(k) versus k on a log-log scale

    vals = p.items()
    vals.sort(key=lambda t: t[0])
    x, y = zip(*vals)

    assert abs(sum(y) - 1.0) < 1e-6

    pyplot.clf()
    pyplot.xscale('log')
    pyplot.yscale('log')
    pyplot.title('P(k) versus k')
    pyplot.xlabel('k')
    pyplot.ylabel('P(k)')
    pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3)
    pyplot.legend(loc='upper right')
    pyplot.show()


if __name__ == '__main__':
    main(*sys.argv)