annotate ch5ex6.py @ 34:66a5e7f7c10f

Added a check to see if we were calculating the probabilities correctly.
author Brian Neal <bgneal@gmail.com>
date Wed, 09 Jan 2013 20:19:49 -0600
parents a13c00c0dfe5
children 10db8c3a6b83
rev   line source
bgneal@32 1 # -*- coding: utf-8 -*-
bgneal@32 2 """Chapter 5.5, exercise 6 in Allen Downey's Think Complexity book.
bgneal@32 3
bgneal@32 4 1. Read Barabási and Albert’s paper and implement their algorithm for generating
bgneal@32 5 graphs. See if you can replicate their Figure 2(A), which shows P(k) versus
bgneal@32 6 k for a graph with 150 000 vertices.
bgneal@32 7
bgneal@32 8 """
bgneal@32 9 import collections
bgneal@32 10 import random
bgneal@32 11 import sys
bgneal@32 12
bgneal@32 13 from matplotlib import pyplot
bgneal@32 14
bgneal@32 15 from Graph import Graph, Vertex, Edge
bgneal@32 16
bgneal@32 17
bgneal@32 18 class BAGraph(Graph):
bgneal@32 19 """BAGraph implements the algorithm described in the Barabási and
bgneal@32 20 Albert paper "Emergence of Scaling in Random Networks" from Science
bgneal@32 21 Magazine Vol. 286, 15 October 1999.
bgneal@32 22
bgneal@32 23 """
bgneal@32 24 def __init__(self, m0, m):
bgneal@32 25 """Create a graph with m0 vertices and m connecting edges.
bgneal@32 26
bgneal@32 27 """
bgneal@32 28 self.m0 = m0
bgneal@32 29 self.m = m
bgneal@32 30 self.histogram = []
bgneal@32 31
bgneal@32 32 initial_vertices = [Vertex() for i in xrange(m0)]
bgneal@32 33
bgneal@32 34 super(BAGraph, self).__init__(vs=initial_vertices)
bgneal@32 35
bgneal@32 36 # Add initial edges between nodes randomly
bgneal@32 37 n = 0
bgneal@32 38 while n != m:
bgneal@32 39 # pick two vertices at random
bgneal@32 40 v = random.choice(initial_vertices)
bgneal@32 41 w = random.choice(initial_vertices)
bgneal@32 42
bgneal@32 43 # if they aren't the same and don't already have an edge, add one
bgneal@32 44 if v is not w and not self.get_edge(v, w):
bgneal@32 45 self.add_edge(Edge(v, w))
bgneal@32 46 n += 1
bgneal@32 47
bgneal@32 48 def add_edge(self, edge):
bgneal@32 49 """Our version of add_edge() adds the two vertices on either end of the
bgneal@32 50 edge to a histogram. This allows us to more easily pick popular vertices
bgneal@32 51 when adding edges as part of the step() method.
bgneal@32 52
bgneal@32 53 """
bgneal@32 54 super(BAGraph, self).add_edge(edge) # invoke base class version
bgneal@32 55
bgneal@32 56 v, w = edge
bgneal@32 57 self.histogram.append(v)
bgneal@32 58 self.histogram.append(w)
bgneal@32 59
bgneal@32 60 def step(self):
bgneal@32 61 """This method adds a new vertex to the graph, then adds m edges that
bgneal@32 62 link the new vertex to m different vertices already present in the
bgneal@32 63 graph. Preferential treatment is given towards vertices that already
bgneal@32 64 have high connectivity.
bgneal@32 65
bgneal@32 66 The paper describes this preferential choosing as creating a probability
bgneal@32 67 P that a new vertex will be connected to vertex i depends on the
bgneal@32 68 connectivity ki of that vertex, so that P(ki) = ki / sum(kj).
bgneal@32 69
bgneal@32 70 We implement that by keeping a list of vertices (histogram), where we
bgneal@32 71 insert a vertex into this list whenever it gets an edge added to it. We
bgneal@32 72 then randomly choose a vertex from this list. Thus the vertices with
bgneal@32 73 more edges will be more likely chosen.
bgneal@32 74
bgneal@32 75 """
bgneal@32 76 # pick m unique vertices to attach to the new vertex
bgneal@32 77 vs = set()
bgneal@32 78 while len(vs) < self.m:
bgneal@32 79 w = random.choice(self.histogram)
bgneal@32 80 if w not in vs:
bgneal@32 81 vs.add(w)
bgneal@32 82
bgneal@32 83 # Add the new vertex to the graph and create edges
bgneal@32 84 v = Vertex()
bgneal@32 85 self.add_vertex(v)
bgneal@32 86 for w in vs:
bgneal@32 87 self.add_edge(Edge(v, w))
bgneal@32 88
bgneal@32 89 def get_p(self):
bgneal@32 90 """This method returns a dictionary of probabilities where each key is
bgneal@32 91 the connectivity k and the value is the probability [0-1] for this
bgneal@32 92 graph.
bgneal@32 93
bgneal@32 94 """
bgneal@32 95 # First, for each vertex, count up how many neighbors it has
bgneal@32 96 vs = self.vertices()
bgneal@32 97
bgneal@32 98 c = collections.Counter()
bgneal@32 99 for v in vs:
bgneal@32 100 n = len(self.out_vertices(v))
bgneal@32 101 c[n] += 1
bgneal@32 102
bgneal@32 103 n = len(vs)
bgneal@32 104 if n > 0:
bgneal@32 105 for k in c:
bgneal@32 106 c[k] = float(c[k]) / n
bgneal@32 107
bgneal@32 108 return c
bgneal@32 109
bgneal@32 110
bgneal@32 111 def main(script, m0, m, n):
bgneal@32 112
bgneal@32 113 # create a BAGraph with parameters m0 & m:
bgneal@32 114 m0, m, n = int(m0), int(m), int(n)
bgneal@32 115 g = BAGraph(m0, m)
bgneal@32 116
bgneal@32 117 # step the graph n times:
bgneal@32 118 for i in xrange(n):
bgneal@32 119 g.step()
bgneal@32 120
bgneal@32 121 # retrieve probabilities
bgneal@32 122 p = g.get_p()
bgneal@32 123
bgneal@32 124 # plot P(k) versus k on a log-log scale
bgneal@32 125
bgneal@32 126 vals = p.items()
bgneal@32 127 vals.sort(key=lambda t: t[0])
bgneal@32 128 x, y = zip(*vals)
bgneal@32 129
bgneal@34 130 assert abs(sum(y) - 1.0) < 1e-6
bgneal@34 131
bgneal@32 132 pyplot.clf()
bgneal@32 133 pyplot.xscale('log')
bgneal@32 134 pyplot.yscale('log')
bgneal@32 135 pyplot.title('P(k) versus k')
bgneal@32 136 pyplot.xlabel('k')
bgneal@32 137 pyplot.ylabel('P(k)')
bgneal@32 138 pyplot.plot(x, y, label='P(k) vs. k', color='green', linewidth=3)
bgneal@32 139 pyplot.legend(loc='upper right')
bgneal@32 140 pyplot.show()
bgneal@32 141
bgneal@32 142
bgneal@32 143 if __name__ == '__main__':
bgneal@32 144 main(*sys.argv)