annotate Graph.py @ 36:305cc03c2750

Chapter 5.5, exercise 6, #3: compute WS clustering coefficient and characteristic length on a BA model graph.
author Brian Neal <bgneal@gmail.com>
date Thu, 10 Jan 2013 19:24:02 -0600
parents 10db8c3a6b83
children
rev   line source
bgneal@1 1 """ Code example from Complexity and Computation, a book about
bgneal@1 2 exploring complexity science with Python. Available free from
bgneal@1 3
bgneal@1 4 http://greenteapress.com/complexity
bgneal@1 5
bgneal@1 6 Copyright 2011 Allen B. Downey.
bgneal@1 7 Distributed under the GNU General Public License at gnu.org/licenses/gpl.html.
bgneal@1 8 """
bgneal@36 9 import heapq
bgneal@1 10 import itertools
bgneal@35 11 from collections import deque, Counter
bgneal@1 12
bgneal@1 13
bgneal@36 14 INFINITY = float('Inf')
bgneal@36 15
bgneal@36 16
bgneal@5 17 class GraphError(Exception):
bgneal@5 18 """Exception for Graph errors"""
bgneal@5 19 pass
bgneal@5 20
bgneal@5 21
bgneal@1 22 class Vertex(object):
bgneal@1 23 """A Vertex is a node in a graph."""
bgneal@1 24
bgneal@1 25 def __init__(self, label=''):
bgneal@1 26 self.label = label
bgneal@1 27
bgneal@1 28 def __repr__(self):
bgneal@1 29 """Returns a string representation of this object that can
bgneal@1 30 be evaluated as a Python expression."""
bgneal@1 31 return 'Vertex(%s)' % repr(self.label)
bgneal@1 32
bgneal@1 33 __str__ = __repr__
bgneal@1 34 """The str and repr forms of this object are the same."""
bgneal@1 35
bgneal@1 36
bgneal@1 37 class Edge(tuple):
bgneal@1 38 """An Edge is a list of two vertices."""
bgneal@1 39
bgneal@1 40 def __new__(cls, *vs):
bgneal@1 41 """The Edge constructor takes two vertices."""
bgneal@1 42 if len(vs) != 2:
bgneal@1 43 raise ValueError, 'Edges must connect exactly two vertices.'
bgneal@1 44 return tuple.__new__(cls, vs)
bgneal@1 45
bgneal@1 46 def __repr__(self):
bgneal@1 47 """Return a string representation of this object that can
bgneal@1 48 be evaluated as a Python expression."""
bgneal@1 49 return 'Edge(%s, %s)' % (repr(self[0]), repr(self[1]))
bgneal@1 50
bgneal@1 51 __str__ = __repr__
bgneal@1 52 """The str and repr forms of this object are the same."""
bgneal@1 53
bgneal@1 54
bgneal@1 55 class Graph(dict):
bgneal@1 56 """A Graph is a dictionary of dictionaries. The outer
bgneal@1 57 dictionary maps from a vertex to an inner dictionary.
bgneal@1 58 The inner dictionary maps from other vertices to edges.
bgneal@1 59
bgneal@1 60 For vertices a and b, graph[a][b] maps
bgneal@1 61 to the edge that connects a->b, if it exists."""
bgneal@1 62
bgneal@6 63 def __init__(self, vs=None, es=None):
bgneal@1 64 """Creates a new graph.
bgneal@1 65 vs: list of vertices;
bgneal@1 66 es: list of edges.
bgneal@1 67 """
bgneal@6 68 if vs:
bgneal@6 69 for v in vs:
bgneal@6 70 self.add_vertex(v)
bgneal@1 71
bgneal@6 72 if es:
bgneal@6 73 for e in es:
bgneal@6 74 self.add_edge(e)
bgneal@1 75
bgneal@36 76 def set_edge_length(self, n=1):
bgneal@36 77 """Give each edge a length of n; this is used by the
bgneal@36 78 shortest_path_tree() method.
bgneal@36 79
bgneal@36 80 """
bgneal@36 81 for e in self.edges():
bgneal@36 82 e.length = n
bgneal@36 83
bgneal@1 84 def add_vertex(self, v):
bgneal@1 85 """Add a vertex to the graph."""
bgneal@1 86 self[v] = {}
bgneal@1 87
bgneal@1 88 def add_edge(self, e):
bgneal@1 89 """Adds and edge to the graph by adding an entry in both directions.
bgneal@1 90
bgneal@1 91 If there is already an edge connecting these Vertices, the
bgneal@1 92 new edge replaces it.
bgneal@1 93 """
bgneal@1 94 v, w = e
bgneal@1 95 self[v][w] = e
bgneal@1 96 self[w][v] = e
bgneal@1 97
bgneal@1 98 def get_edge(self, v, w):
bgneal@1 99 """Returns the edge object that exists between the two vertices v & w,
bgneal@1 100 or None if no such edge exists.
bgneal@1 101
bgneal@1 102 """
bgneal@1 103 try:
bgneal@1 104 return self[v][w]
bgneal@1 105 except KeyError:
bgneal@1 106 return None
bgneal@1 107
bgneal@1 108 def remove_edge(self, e):
bgneal@1 109 """Removes the edge e from the graph."""
bgneal@1 110
bgneal@4 111 v, w = e
bgneal@4 112 del self[v][w]
bgneal@4 113 del self[w][v]
bgneal@1 114
bgneal@1 115 def vertices(self):
bgneal@1 116 """Returns a list of the vertices in the graph."""
bgneal@1 117
bgneal@1 118 return self.keys()
bgneal@1 119
bgneal@1 120 def edges(self):
bgneal@1 121 """"Returns a list of the edges in the graph."""
bgneal@1 122
bgneal@1 123 edge_set = set()
bgneal@4 124 for d in self.itervalues():
bgneal@4 125 edge_set.update(d.itervalues())
bgneal@1 126
bgneal@1 127 return list(edge_set)
bgneal@1 128
bgneal@1 129 def out_vertices(self, v):
bgneal@1 130 """Returns a list of vertices that are adjacent to the given vertex v.
bgneal@1 131
bgneal@1 132 """
bgneal@1 133 return self[v].keys()
bgneal@1 134
bgneal@1 135 def out_edges(self, v):
bgneal@1 136 """Returns a list of edges connected to a given vertex v."""
bgneal@1 137
bgneal@1 138 return self[v].values()
bgneal@1 139
bgneal@5 140 def remove_all_edges(self):
bgneal@5 141 """Removes all edges in the graph."""
bgneal@5 142 for v in self.iterkeys():
bgneal@5 143 self[v] = {}
bgneal@5 144
bgneal@1 145 def add_all_edges(self):
bgneal@1 146 """Makes the graph complete by adding edges between all pairs of
bgneal@1 147 vertices.
bgneal@1 148
bgneal@1 149 """
bgneal@1 150 # Clear all edges first
bgneal@5 151 self.remove_all_edges()
bgneal@1 152
bgneal@5 153 # For each combination of 2 vertices, create an edge between them:
bgneal@1 154 for v, w in itertools.combinations(self.iterkeys(), 2):
bgneal@6 155 self.add_edge(Edge(v, w))
bgneal@1 156
bgneal@5 157 def add_regular_edges(self, k):
bgneal@5 158 """Makes the graph regular by making every vertex have k edges.
bgneal@5 159
bgneal@5 160 It is not always possible to create a regular graph with a given degree.
bgneal@5 161 If a graph has n vertices, then a regular graph can be constructed with
bgneal@5 162 degree k if n >= k + 1 and n * k is even. If these conditions are not
bgneal@5 163 met a GraphError exception is raised.
bgneal@5 164
bgneal@5 165 """
bgneal@5 166 n = len(self.vertices())
bgneal@5 167 if n < k + 1:
bgneal@5 168 raise GraphError("Can't make a regular graph with degree >= number"
bgneal@5 169 " of vertices")
bgneal@5 170 if (n * k) % 2 != 0:
bgneal@5 171 raise GraphError("Can't make a regular graph of degree k and"
bgneal@5 172 " order n where k * n is odd")
bgneal@5 173
bgneal@5 174 # Remove all edges first
bgneal@5 175 self.remove_all_edges()
bgneal@5 176
bgneal@5 177 if k % 2 != 0: # if k is odd
bgneal@5 178 self._add_regular_edges_even(k - 1)
bgneal@5 179 self._add_regular_edges_odd()
bgneal@5 180 else:
bgneal@5 181 self._add_regular_edges_even(k)
bgneal@5 182
bgneal@5 183 def _add_regular_edges_even(self, k):
bgneal@5 184 """Make a regular graph with degree k. k must be even."""
bgneal@5 185
bgneal@5 186 vs = self.vertices()
bgneal@5 187 vs2 = vs * 2
bgneal@5 188
bgneal@5 189 for i, v in enumerate(vs):
bgneal@5 190 for j in range(1, k / 2 + 1):
bgneal@5 191 w = vs2[i + j]
bgneal@5 192 self.add_edge(Edge(v, w))
bgneal@5 193
bgneal@5 194 def _add_regular_edges_odd(self):
bgneal@5 195 """Adds an extra edge across the graph to finish off a regular graph
bgneal@5 196 with odd degree. The number of vertices must be even.
bgneal@5 197
bgneal@5 198 """
bgneal@5 199 vs = self.vertices()
bgneal@5 200 vs2 = vs * 2
bgneal@5 201 n = len(vs)
bgneal@5 202
bgneal@5 203 for i in range(n / 2):
bgneal@5 204 v = vs2[i]
bgneal@5 205 w = vs2[i + n / 2]
bgneal@5 206 self.add_edge(Edge(v, w))
bgneal@5 207
bgneal@7 208 def bfs(self, start, visit_func=None):
bgneal@7 209 """Perform a breadth first search starting at node start.
bgneal@7 210
bgneal@7 211 The function visit_func, if supplied, is invoked on each node.
bgneal@7 212
bgneal@8 213 The set of visited nodes is returned.
bgneal@8 214
bgneal@7 215 """
bgneal@8 216 visited = set()
bgneal@7 217
bgneal@7 218 # Create a work queue consisting initially of the starting node
bgneal@8 219 queue = deque([start])
bgneal@7 220
bgneal@7 221 while queue:
bgneal@7 222 # retrieve first item from the queue
bgneal@8 223 v = queue.popleft()
bgneal@7 224
bgneal@8 225 if v in visited:
bgneal@7 226 continue # Skip this one if we've seen it before
bgneal@7 227
bgneal@7 228 # Mark it as visited and invoke user's function on it
bgneal@8 229 visited.add(v)
bgneal@7 230 if visit_func:
bgneal@7 231 visit_func(v)
bgneal@7 232
bgneal@7 233 # Add the adjacent neigbors to the node to the queue
bgneal@22 234 queue.extend(c for c in self.out_vertices(v) if c not in visited)
bgneal@7 235
bgneal@8 236 return visited
bgneal@8 237
bgneal@7 238 def is_connected(self):
bgneal@7 239 """Returns True if the graph is connected (there is a path from every
bgneal@7 240 node to every other node) and False otherwise.
bgneal@7 241
bgneal@7 242 """
bgneal@7 243 vs = self.vertices()
bgneal@7 244 if len(vs):
bgneal@8 245 visited = self.bfs(vs[0])
bgneal@7 246 # See if all nodes have been visited
bgneal@8 247 return len(vs) == len(visited)
bgneal@7 248
bgneal@7 249 return False # Graph is empty
bgneal@7 250
bgneal@35 251 def get_p(self):
bgneal@35 252 """This method returns a dictionary of probabilities where each key is
bgneal@35 253 the connectivity k and the value is the probability [0-1] for this
bgneal@35 254 graph.
bgneal@35 255
bgneal@35 256 """
bgneal@35 257 # First, for each vertex, count up how many neighbors it has
bgneal@35 258 vs = self.vertices()
bgneal@35 259
bgneal@35 260 c = Counter()
bgneal@35 261 for v in vs:
bgneal@35 262 n = len(self.out_vertices(v))
bgneal@35 263 c[n] += 1
bgneal@35 264
bgneal@35 265 n = len(vs)
bgneal@35 266 if n > 0:
bgneal@35 267 for k in c:
bgneal@35 268 c[k] = float(c[k]) / n
bgneal@35 269
bgneal@35 270 return c
bgneal@35 271
bgneal@36 272 def clustering_coefficient(self):
bgneal@36 273 """Compute the clustering coefficient for this graph as defined by Watts
bgneal@36 274 and Strogatz.
bgneal@36 275
bgneal@36 276 """
bgneal@36 277 cv = {}
bgneal@36 278 for v in self:
bgneal@36 279 # consider a node and its neighbors
bgneal@36 280 nodes = self.out_vertices(v)
bgneal@36 281 nodes.append(v)
bgneal@36 282
bgneal@36 283 # compute the maximum number of possible edges between these nodes
bgneal@36 284 # if they were all connected to each other:
bgneal@36 285 n = len(nodes)
bgneal@36 286 if n == 1:
bgneal@36 287 # edge case of only 1 node; handle this case to avoid division
bgneal@36 288 # by zero in the general case
bgneal@36 289 cv[v] = 1.0
bgneal@36 290 continue
bgneal@36 291
bgneal@36 292 possible = n * (n - 1) / 2.0
bgneal@36 293
bgneal@36 294 # now compute how many edges actually exist between the nodes
bgneal@36 295 actual = 0
bgneal@36 296 for x, y in itertools.combinations(nodes, 2):
bgneal@36 297 if self.get_edge(x, y):
bgneal@36 298 actual += 1
bgneal@36 299
bgneal@36 300 # the fraction of actual / possible is this nodes C sub v value
bgneal@36 301 cv[v] = actual / possible
bgneal@36 302
bgneal@36 303 # The clustering coefficient is the average of all C sub v values
bgneal@36 304 if len(cv):
bgneal@36 305 return sum(cv.values()) / float(len(cv))
bgneal@36 306 return 0.0
bgneal@36 307
bgneal@36 308 def shortest_path_tree(self, source, hint=None):
bgneal@36 309 """Finds the length of the shortest path from the source vertex to all
bgneal@36 310 other vertices in the graph. This length is stored on the vertices as an
bgneal@36 311 attribute named 'dist'. The algorithm used is Dijkstra's.
bgneal@36 312
bgneal@36 313 hint: if provided, must be a dictionary mapping tuples to already known
bgneal@36 314 shortest path distances. This can be used to speed up the algorithm.
bgneal@36 315
bgneal@36 316 """
bgneal@36 317 if not hint:
bgneal@36 318 hint = {}
bgneal@36 319
bgneal@36 320 for v in self.vertices():
bgneal@36 321 v.dist = hint.get((source, v), INFINITY)
bgneal@36 322 source.dist = 0
bgneal@36 323
bgneal@36 324 queue = [v for v in self.vertices() if v.dist < INFINITY]
bgneal@36 325 sort_flag = True
bgneal@36 326 while len(queue):
bgneal@36 327
bgneal@36 328 if sort_flag:
bgneal@36 329 queue.sort(key=lambda v: v.dist)
bgneal@36 330 sort_flag = False
bgneal@36 331
bgneal@36 332 v = queue.pop(0)
bgneal@36 333
bgneal@36 334 # for each neighbor of v, see if we found a new shortest path
bgneal@36 335 for w, e in self[v].iteritems():
bgneal@36 336 d = v.dist + e.length
bgneal@36 337 if d < w.dist:
bgneal@36 338 w.dist = d
bgneal@36 339 queue.append(w)
bgneal@36 340 sort_flag = True
bgneal@36 341
bgneal@36 342 def shortest_path_tree2(self, source):
bgneal@36 343 """Finds the length of the shortest path from the source vertex to all
bgneal@36 344 other vertices in the graph. This length is stored on the vertices as an
bgneal@36 345 attribute named 'dist'. The algorithm used is Dijkstra's with a Heap
bgneal@36 346 used to sort/store pending nodes to be examined.
bgneal@36 347
bgneal@36 348 """
bgneal@36 349 for v in self.vertices():
bgneal@36 350 v.dist = INFINITY
bgneal@36 351 source.dist = 0
bgneal@36 352
bgneal@36 353 queue = []
bgneal@36 354 heapq.heappush(queue, (0, source))
bgneal@36 355 while len(queue):
bgneal@36 356
bgneal@36 357 _, v = heapq.heappop(queue)
bgneal@36 358
bgneal@36 359 # for each neighbor of v, see if we found a new shortest path
bgneal@36 360 for w, e in self[v].iteritems():
bgneal@36 361 d = v.dist + e.length
bgneal@36 362 if d < w.dist:
bgneal@36 363 w.dist = d
bgneal@36 364 heapq.heappush(queue, (d, w))
bgneal@36 365
bgneal@36 366 def all_pairs_floyd_warshall(self):
bgneal@36 367 """Finds the shortest paths between all pairs of vertices using the
bgneal@36 368 Floyd-Warshall algorithm.
bgneal@36 369
bgneal@36 370 http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm
bgneal@36 371
bgneal@36 372 """
bgneal@36 373 vertices = self.vertices()
bgneal@36 374 dist = {}
bgneal@36 375 for i in vertices:
bgneal@36 376 for j in vertices:
bgneal@36 377 if i is j:
bgneal@36 378 dist[i, j] = 0.0
bgneal@36 379 else:
bgneal@36 380 e = self.get_edge(i, j)
bgneal@36 381 dist[i, j] = e.length if e else INFINITY
bgneal@36 382
bgneal@36 383 for k in vertices:
bgneal@36 384 for i in vertices:
bgneal@36 385 for j in vertices:
bgneal@36 386 d_ik = dist[i, k]
bgneal@36 387 d_kj = dist[k, j]
bgneal@36 388 new_cost = d_ik + d_kj
bgneal@36 389 if new_cost < dist[i, j]:
bgneal@36 390 dist[i, j] = new_cost
bgneal@36 391
bgneal@36 392 return dist
bgneal@36 393
bgneal@36 394 def big_l(self):
bgneal@36 395 """Computes the "big-L" value for the graph as per Watts & Strogatz.
bgneal@36 396
bgneal@36 397 L is defined as the number of edges in the shortest path between
bgneal@36 398 two vertices, averaged over all vertices.
bgneal@36 399
bgneal@36 400 Uses the shortest_path_tree() method, called once for every node.
bgneal@36 401
bgneal@36 402 """
bgneal@36 403 d = {}
bgneal@36 404 for v in self.vertices():
bgneal@36 405 self.shortest_path_tree(v, d)
bgneal@36 406 t = [((w, v), w.dist) for w in self.vertices() if v is not w]
bgneal@36 407 d.update(t)
bgneal@36 408
bgneal@36 409 if len(d):
bgneal@36 410 return sum(d.values()) / float(len(d))
bgneal@36 411 return 0.0
bgneal@36 412
bgneal@36 413 def big_l2(self):
bgneal@36 414 """Computes the "big-L" value for the graph as per Watts & Strogatz.
bgneal@36 415
bgneal@36 416 L is defined as the number of edges in the shortest path between
bgneal@36 417 two vertices, averaged over all vertices.
bgneal@36 418
bgneal@36 419 Uses the all_pairs_floyd_warshall() method.
bgneal@36 420
bgneal@36 421 """
bgneal@36 422 dist = self.all_pairs_floyd_warshall()
bgneal@36 423 vertices = self.vertices()
bgneal@36 424 result = [dist[i, j] for i in vertices for j in vertices if i is not j]
bgneal@36 425
bgneal@36 426 if len(result):
bgneal@36 427 return sum(result) / float(len(result))
bgneal@36 428 return 0.0
bgneal@36 429
bgneal@36 430 def big_l3(self):
bgneal@36 431 """Computes the "big-L" value for the graph as per Watts & Strogatz.
bgneal@36 432
bgneal@36 433 L is defined as the number of edges in the shortest path between
bgneal@36 434 two vertices, averaged over all vertices.
bgneal@36 435
bgneal@36 436 Uses the shortest_path_tree2() method, called once for every node.
bgneal@36 437
bgneal@36 438 """
bgneal@36 439 d = {}
bgneal@36 440 for v in self.vertices():
bgneal@36 441 self.shortest_path_tree2(v)
bgneal@36 442 t = [((v, w), w.dist) for w in self.vertices() if v is not w]
bgneal@36 443 d.update(t)
bgneal@36 444
bgneal@36 445 if len(d):
bgneal@36 446 return sum(d.values()) / float(len(d))
bgneal@36 447 return 0.0
bgneal@36 448
bgneal@1 449
bgneal@1 450 def main(script, *args):
bgneal@1 451 import pprint
bgneal@1 452
bgneal@1 453 v = Vertex('v')
bgneal@1 454 print v
bgneal@1 455 w = Vertex('w')
bgneal@1 456 print w
bgneal@1 457 e = Edge(v, w)
bgneal@1 458 print e
bgneal@1 459 g = Graph([v,w], [e])
bgneal@1 460 pprint.pprint(g)
bgneal@1 461
bgneal@1 462 print "g.get_edge(v, w): ", g.get_edge(v, w)
bgneal@1 463 x = Vertex('x')
bgneal@1 464 print "g.get_edge(v, x): ", g.get_edge(v, x)
bgneal@1 465
bgneal@1 466 g.remove_edge(e)
bgneal@1 467 pprint.pprint(g)
bgneal@1 468
bgneal@1 469 print "vertices: ", g.vertices()
bgneal@1 470 print "edges: ", g.edges()
bgneal@1 471
bgneal@1 472 g.add_edge(e)
bgneal@1 473 u = Vertex('u')
bgneal@1 474 e1 = Edge(u, v)
bgneal@1 475 e2 = Edge(u, w)
bgneal@1 476 g.add_vertex(u)
bgneal@1 477 g.add_edge(e1)
bgneal@1 478 g.add_edge(e2)
bgneal@1 479 print "Adding vertex u and edges:"
bgneal@1 480 pprint.pprint(g)
bgneal@1 481 print "vertices: ", g.vertices()
bgneal@1 482 print "edges: ", g.edges()
bgneal@1 483
bgneal@1 484 print "Out vertices for v: ", g.out_vertices(v)
bgneal@1 485 print "Out edges for v: ", g.out_edges(v)
bgneal@1 486
bgneal@1 487 x = Vertex('x')
bgneal@1 488 g.add_vertex(x)
bgneal@1 489 g.add_all_edges()
bgneal@1 490 pprint.pprint(g)
bgneal@1 491
bgneal@7 492 print "g is connected?", g.is_connected()
bgneal@7 493 edges = g.out_edges(v)
bgneal@7 494 for e in edges:
bgneal@7 495 g.remove_edge(e)
bgneal@7 496 pprint.pprint(g)
bgneal@7 497 print "g is connected?", g.is_connected()
bgneal@7 498
bgneal@7 499 # Isolate v and check is_connected() again
bgneal@7 500
bgneal@1 501
bgneal@1 502 if __name__ == '__main__':
bgneal@1 503 import sys
bgneal@1 504 main(*sys.argv)