annotate SmallWorldGraph.py @ 27:78116556b491

Exercise 1 in chapter 5.1 (Zipf's law).
author Brian Neal <bgneal@gmail.com>
date Sat, 05 Jan 2013 16:38:24 -0600
parents f6073c187926
children 305cc03c2750
rev   line source
bgneal@23 1 """This module contains the SmallWorldGraph class for 4.4, exercise 4.
bgneal@23 2
bgneal@23 3 """
bgneal@26 4 import heapq
bgneal@23 5 import itertools
bgneal@23 6 import random
bgneal@23 7
bgneal@23 8 from Graph import Edge
bgneal@23 9 from RandomGraph import RandomGraph
bgneal@23 10
bgneal@23 11
bgneal@24 12 INFINITY = float('Inf')
bgneal@24 13
bgneal@23 14 class SmallWorldGraph(RandomGraph):
bgneal@23 15 """A small world graph for 4.4, exercise 4 in Think Complexity."""
bgneal@23 16
bgneal@23 17 def __init__(self, vs, k, p):
bgneal@23 18 """Create a small world graph. Parameters:
bgneal@23 19 vs - a list of vertices
bgneal@23 20 k - regular graph parameter k: the number of edges between vertices
bgneal@23 21 p - probability for rewiring.
bgneal@23 22
bgneal@23 23 First a regular graph is created from the list of vertices and parameter
bgneal@23 24 k, which specifies how many edges each vertex should have. Then the
bgneal@23 25 rewire() method is called with parameter p to rewire the graph according
bgneal@23 26 to the algorithm by Watts and Strogatz.
bgneal@23 27
bgneal@23 28 """
bgneal@23 29 super(SmallWorldGraph, self).__init__(vs=vs)
bgneal@23 30 self.add_regular_edges(k)
bgneal@23 31 self.rewire(p)
bgneal@23 32
bgneal@24 33 # give each edge a default length of 1; this is used by the
bgneal@24 34 # shortest_path_tree() method
bgneal@24 35 for e in self.edges():
bgneal@24 36 e.length = 1
bgneal@24 37
bgneal@23 38 def rewire(self, p):
bgneal@23 39 """Assuming the graph is initially a regular graph, rewires the graph
bgneal@23 40 using the Watts and Strogatz algorithm.
bgneal@23 41
bgneal@23 42 """
bgneal@23 43 # We iterate over the edges in a random order:
bgneal@23 44 edges = self.edges()
bgneal@23 45 random.shuffle(edges)
bgneal@23 46 vertices = self.vertices()
bgneal@23 47
bgneal@23 48 for e in edges:
bgneal@23 49 if random.random() < p:
bgneal@23 50 v, w = e # remember the 2 vertices this edge connected
bgneal@23 51 self.remove_edge(e) # remove from graph
bgneal@23 52
bgneal@23 53 # pick another vertex to connect to v; duplicate edges are
bgneal@23 54 # forbidden
bgneal@23 55 while True:
bgneal@23 56 x = random.choice(vertices)
bgneal@23 57 if v is not x and not self.get_edge(v, x):
bgneal@23 58 self.add_edge(Edge(v, x))
bgneal@23 59 break
bgneal@23 60
bgneal@23 61 def clustering_coefficient(self):
bgneal@23 62 """Compute the clustering coefficient for this graph as defined by Watts
bgneal@23 63 and Strogatz.
bgneal@23 64
bgneal@23 65 """
bgneal@23 66 cv = {}
bgneal@23 67 for v in self:
bgneal@23 68 # consider a node and its neighbors
bgneal@23 69 nodes = self.out_vertices(v)
bgneal@23 70 nodes.append(v)
bgneal@23 71
bgneal@23 72 # compute the maximum number of possible edges between these nodes
bgneal@23 73 # if they were all connected to each other:
bgneal@23 74 n = len(nodes)
bgneal@23 75 if n == 1:
bgneal@23 76 # edge case of only 1 node; handle this case to avoid division
bgneal@23 77 # by zero in the general case
bgneal@23 78 cv[v] = 1.0
bgneal@23 79 continue
bgneal@23 80
bgneal@23 81 possible = n * (n - 1) / 2.0
bgneal@23 82
bgneal@23 83 # now compute how many edges actually exist between the nodes
bgneal@23 84 actual = 0
bgneal@23 85 for x, y in itertools.combinations(nodes, 2):
bgneal@23 86 if self.get_edge(x, y):
bgneal@23 87 actual += 1
bgneal@23 88
bgneal@23 89 # the fraction of actual / possible is this nodes C sub v value
bgneal@23 90 cv[v] = actual / possible
bgneal@23 91
bgneal@23 92 # The clustering coefficient is the average of all C sub v values
bgneal@23 93 if len(cv):
bgneal@23 94 return sum(cv.values()) / float(len(cv))
bgneal@23 95 return 0.0
bgneal@24 96
bgneal@24 97 def shortest_path_tree(self, source, hint=None):
bgneal@24 98 """Finds the length of the shortest path from the source vertex to all
bgneal@24 99 other vertices in the graph. This length is stored on the vertices as an
bgneal@24 100 attribute named 'dist'. The algorithm used is Dijkstra's.
bgneal@24 101
bgneal@24 102 hint: if provided, must be a dictionary mapping tuples to already known
bgneal@24 103 shortest path distances. This can be used to speed up the algorithm.
bgneal@24 104
bgneal@24 105 """
bgneal@24 106 if not hint:
bgneal@24 107 hint = {}
bgneal@24 108
bgneal@24 109 for v in self.vertices():
bgneal@24 110 v.dist = hint.get((source, v), INFINITY)
bgneal@24 111 source.dist = 0
bgneal@24 112
bgneal@24 113 queue = [v for v in self.vertices() if v.dist < INFINITY]
bgneal@24 114 sort_flag = True
bgneal@24 115 while len(queue):
bgneal@24 116
bgneal@24 117 if sort_flag:
bgneal@24 118 queue.sort(key=lambda v: v.dist)
bgneal@24 119 sort_flag = False
bgneal@24 120
bgneal@24 121 v = queue.pop(0)
bgneal@24 122
bgneal@24 123 # for each neighbor of v, see if we found a new shortest path
bgneal@24 124 for w, e in self[v].iteritems():
bgneal@24 125 d = v.dist + e.length
bgneal@24 126 if d < w.dist:
bgneal@24 127 w.dist = d
bgneal@24 128 queue.append(w)
bgneal@24 129 sort_flag = True
bgneal@24 130
bgneal@26 131 def shortest_path_tree2(self, source):
bgneal@26 132 """Finds the length of the shortest path from the source vertex to all
bgneal@26 133 other vertices in the graph. This length is stored on the vertices as an
bgneal@26 134 attribute named 'dist'. The algorithm used is Dijkstra's with a Heap
bgneal@26 135 used to sort/store pending nodes to be examined.
bgneal@26 136
bgneal@26 137 """
bgneal@26 138 for v in self.vertices():
bgneal@26 139 v.dist = INFINITY
bgneal@26 140 source.dist = 0
bgneal@26 141
bgneal@26 142 queue = []
bgneal@26 143 heapq.heappush(queue, (0, source))
bgneal@26 144 while len(queue):
bgneal@26 145
bgneal@26 146 _, v = heapq.heappop(queue)
bgneal@26 147
bgneal@26 148 # for each neighbor of v, see if we found a new shortest path
bgneal@26 149 for w, e in self[v].iteritems():
bgneal@26 150 d = v.dist + e.length
bgneal@26 151 if d < w.dist:
bgneal@26 152 w.dist = d
bgneal@26 153 heapq.heappush(queue, (d, w))
bgneal@26 154
bgneal@25 155 def all_pairs_floyd_warshall(self):
bgneal@25 156 """Finds the shortest paths between all pairs of vertices using the
bgneal@25 157 Floyd-Warshall algorithm.
bgneal@25 158
bgneal@25 159 http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm
bgneal@25 160
bgneal@25 161 """
bgneal@25 162 vertices = self.vertices()
bgneal@25 163 dist = {}
bgneal@25 164 for i in vertices:
bgneal@25 165 for j in vertices:
bgneal@25 166 if i is j:
bgneal@25 167 dist[i, j] = 0.0
bgneal@25 168 else:
bgneal@25 169 e = self.get_edge(i, j)
bgneal@25 170 dist[i, j] = e.length if e else INFINITY
bgneal@25 171
bgneal@25 172 for k in vertices:
bgneal@25 173 for i in vertices:
bgneal@25 174 for j in vertices:
bgneal@25 175 d_ik = dist[i, k]
bgneal@25 176 d_kj = dist[k, j]
bgneal@25 177 new_cost = d_ik + d_kj
bgneal@25 178 if new_cost < dist[i, j]:
bgneal@25 179 dist[i, j] = new_cost
bgneal@25 180
bgneal@25 181 return dist
bgneal@25 182
bgneal@24 183 def big_l(self):
bgneal@24 184 """Computes the "big-L" value for the graph as per Watts & Strogatz.
bgneal@24 185
bgneal@26 186 L is defined as the number of edges in the shortest path between
bgneal@24 187 two vertices, averaged over all vertices.
bgneal@24 188
bgneal@25 189 Uses the shortest_path_tree() method, called once for every node.
bgneal@25 190
bgneal@24 191 """
bgneal@24 192 d = {}
bgneal@24 193 for v in self.vertices():
bgneal@24 194 self.shortest_path_tree(v, d)
bgneal@24 195 t = [((w, v), w.dist) for w in self.vertices() if v is not w]
bgneal@24 196 d.update(t)
bgneal@24 197
bgneal@24 198 if len(d):
bgneal@24 199 return sum(d.values()) / float(len(d))
bgneal@24 200 return 0.0
bgneal@25 201
bgneal@25 202 def big_l2(self):
bgneal@25 203 """Computes the "big-L" value for the graph as per Watts & Strogatz.
bgneal@25 204
bgneal@26 205 L is defined as the number of edges in the shortest path between
bgneal@25 206 two vertices, averaged over all vertices.
bgneal@25 207
bgneal@25 208 Uses the all_pairs_floyd_warshall() method.
bgneal@25 209
bgneal@25 210 """
bgneal@25 211 dist = self.all_pairs_floyd_warshall()
bgneal@25 212 vertices = self.vertices()
bgneal@25 213 result = [dist[i, j] for i in vertices for j in vertices if i is not j]
bgneal@25 214
bgneal@25 215 if len(result):
bgneal@25 216 return sum(result) / float(len(result))
bgneal@25 217 return 0.0
bgneal@26 218
bgneal@26 219 def big_l3(self):
bgneal@26 220 """Computes the "big-L" value for the graph as per Watts & Strogatz.
bgneal@26 221
bgneal@26 222 L is defined as the number of edges in the shortest path between
bgneal@26 223 two vertices, averaged over all vertices.
bgneal@26 224
bgneal@26 225 Uses the shortest_path_tree2() method, called once for every node.
bgneal@26 226
bgneal@26 227 """
bgneal@26 228 d = {}
bgneal@26 229 for v in self.vertices():
bgneal@26 230 self.shortest_path_tree2(v)
bgneal@26 231 t = [((v, w), w.dist) for w in self.vertices() if v is not w]
bgneal@26 232 d.update(t)
bgneal@26 233
bgneal@26 234 if len(d):
bgneal@26 235 return sum(d.values()) / float(len(d))
bgneal@26 236 return 0.0
bgneal@26 237