diff ch5ex6.py @ 32:a13c00c0dfe5

Chapter 5, exercise 6, #1: Barab?si and Albert.
author Brian Neal <bgneal@gmail.com>
date Wed, 09 Jan 2013 20:09:19 -0600
parents
children 66a5e7f7c10f
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ch5ex6.py	Wed Jan 09 20:09:19 2013 -0600
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+"""Chapter 5.5, exercise 6 in Allen Downey's Think Complexity book.
+
+1. Read Barabási and Albert’s paper and implement their algorithm for generating
+   graphs. See if you can replicate their Figure 2(A), which shows P(k) versus
+   k for a graph with 150 000 vertices.
+
+"""
+import collections
+import random
+import sys
+
+from matplotlib import pyplot
+
+from Graph import Graph, Vertex, Edge
+
+
+class BAGraph(Graph):
+    """BAGraph implements the algorithm described in the Barabási and
+    Albert paper "Emergence of Scaling in Random Networks" from Science
+    Magazine Vol. 286, 15 October 1999.
+
+    """
+    def __init__(self, m0, m):
+        """Create a graph with m0 vertices and m connecting edges.
+
+        """
+        self.m0 = m0
+        self.m = m
+        self.histogram = []
+
+        initial_vertices = [Vertex() for i in xrange(m0)]
+
+        super(BAGraph, self).__init__(vs=initial_vertices)
+
+        # Add initial edges between nodes randomly
+        n = 0
+        while n != m:
+            # pick two vertices at random
+            v = random.choice(initial_vertices)
+            w = random.choice(initial_vertices)
+
+            # if they aren't the same and don't already have an edge, add one
+            if v is not w and not self.get_edge(v, w):
+                self.add_edge(Edge(v, w))
+                n += 1
+
+    def add_edge(self, edge):
+        """Our version of add_edge() adds the two vertices on either end of the
+        edge to a histogram. This allows us to more easily pick popular vertices
+        when adding edges as part of the step() method.
+
+        """
+        super(BAGraph, self).add_edge(edge)     # invoke base class version
+
+        v, w = edge
+        self.histogram.append(v)
+        self.histogram.append(w)
+
+    def step(self):
+        """This method adds a new vertex to the graph, then adds m edges that
+        link the new vertex to m different vertices already present in the
+        graph. Preferential treatment is given towards vertices that already
+        have high connectivity.
+
+        The paper describes this preferential choosing as creating a probability
+        P that a new vertex will be connected to vertex i depends on the
+        connectivity ki of that vertex, so that P(ki) = ki / sum(kj).
+
+        We implement that by keeping a list of vertices (histogram), where we
+        insert a vertex into this list whenever it gets an edge added to it. We
+        then randomly choose a vertex from this list. Thus the vertices with
+        more edges will be more likely chosen.
+
+        """
+        # pick m unique vertices to attach to the new vertex
+        vs = set()
+        while len(vs) < self.m:
+            w = random.choice(self.histogram)
+            if w not in vs:
+                vs.add(w)
+
+        # Add the new vertex to the graph and create edges
+        v = Vertex()
+        self.add_vertex(v)
+        for w in vs:
+            self.add_edge(Edge(v, w))
+
+    def get_p(self):
+        """This method returns a dictionary of probabilities where each key is
+        the connectivity k and the value is the probability [0-1] for this
+        graph.
+
+        """
+        # First, for each vertex, count up how many neighbors it has
+        vs = self.vertices()
+
+        c = collections.Counter()
+        for v in vs:
+            n = len(self.out_vertices(v))
+            c[n] += 1
+
+        n = len(vs)
+        if n > 0:
+            for k in c:
+                c[k] = float(c[k]) / n
+
+        return c
+
+
+def main(script, m0, m, n):
+
+    # create a BAGraph with parameters m0 & m:
+    m0, m, n = int(m0), int(m), int(n)
+    g = BAGraph(m0, m)
+
+    # step the graph n times:
+    for i in xrange(n):
+        g.step()
+
+    # retrieve probabilities
+    p = g.get_p()
+
+    # plot P(k) versus k on a log-log scale
+
+    vals = p.items()
+    vals.sort(key=lambda t: t[0])
+    x, y = zip(*vals)
+
+    pyplot.clf()
+    pyplot.xscale('log')
+    pyplot.yscale('log')
+    pyplot.title('P(k) versus k')
+    pyplot.xlabel('k')
+    pyplot.ylabel('P(k)')
+    pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3)
+    pyplot.legend(loc='upper right')
+    pyplot.show()
+
+
+if __name__ == '__main__':
+    main(*sys.argv)