comparison Graph.py @ 36:305cc03c2750

Chapter 5.5, exercise 6, #3: compute WS clustering coefficient and characteristic length on a BA model graph.
author Brian Neal <bgneal@gmail.com>
date Thu, 10 Jan 2013 19:24:02 -0600
parents 10db8c3a6b83
children
comparison
equal deleted inserted replaced
35:10db8c3a6b83 36:305cc03c2750
4 http://greenteapress.com/complexity 4 http://greenteapress.com/complexity
5 5
6 Copyright 2011 Allen B. Downey. 6 Copyright 2011 Allen B. Downey.
7 Distributed under the GNU General Public License at gnu.org/licenses/gpl.html. 7 Distributed under the GNU General Public License at gnu.org/licenses/gpl.html.
8 """ 8 """
9 import heapq
9 import itertools 10 import itertools
10 from collections import deque, Counter 11 from collections import deque, Counter
12
13
14 INFINITY = float('Inf')
11 15
12 16
13 class GraphError(Exception): 17 class GraphError(Exception):
14 """Exception for Graph errors""" 18 """Exception for Graph errors"""
15 pass 19 pass
66 self.add_vertex(v) 70 self.add_vertex(v)
67 71
68 if es: 72 if es:
69 for e in es: 73 for e in es:
70 self.add_edge(e) 74 self.add_edge(e)
75
76 def set_edge_length(self, n=1):
77 """Give each edge a length of n; this is used by the
78 shortest_path_tree() method.
79
80 """
81 for e in self.edges():
82 e.length = n
71 83
72 def add_vertex(self, v): 84 def add_vertex(self, v):
73 """Add a vertex to the graph.""" 85 """Add a vertex to the graph."""
74 self[v] = {} 86 self[v] = {}
75 87
254 if n > 0: 266 if n > 0:
255 for k in c: 267 for k in c:
256 c[k] = float(c[k]) / n 268 c[k] = float(c[k]) / n
257 269
258 return c 270 return c
271
272 def clustering_coefficient(self):
273 """Compute the clustering coefficient for this graph as defined by Watts
274 and Strogatz.
275
276 """
277 cv = {}
278 for v in self:
279 # consider a node and its neighbors
280 nodes = self.out_vertices(v)
281 nodes.append(v)
282
283 # compute the maximum number of possible edges between these nodes
284 # if they were all connected to each other:
285 n = len(nodes)
286 if n == 1:
287 # edge case of only 1 node; handle this case to avoid division
288 # by zero in the general case
289 cv[v] = 1.0
290 continue
291
292 possible = n * (n - 1) / 2.0
293
294 # now compute how many edges actually exist between the nodes
295 actual = 0
296 for x, y in itertools.combinations(nodes, 2):
297 if self.get_edge(x, y):
298 actual += 1
299
300 # the fraction of actual / possible is this nodes C sub v value
301 cv[v] = actual / possible
302
303 # The clustering coefficient is the average of all C sub v values
304 if len(cv):
305 return sum(cv.values()) / float(len(cv))
306 return 0.0
307
308 def shortest_path_tree(self, source, hint=None):
309 """Finds the length of the shortest path from the source vertex to all
310 other vertices in the graph. This length is stored on the vertices as an
311 attribute named 'dist'. The algorithm used is Dijkstra's.
312
313 hint: if provided, must be a dictionary mapping tuples to already known
314 shortest path distances. This can be used to speed up the algorithm.
315
316 """
317 if not hint:
318 hint = {}
319
320 for v in self.vertices():
321 v.dist = hint.get((source, v), INFINITY)
322 source.dist = 0
323
324 queue = [v for v in self.vertices() if v.dist < INFINITY]
325 sort_flag = True
326 while len(queue):
327
328 if sort_flag:
329 queue.sort(key=lambda v: v.dist)
330 sort_flag = False
331
332 v = queue.pop(0)
333
334 # for each neighbor of v, see if we found a new shortest path
335 for w, e in self[v].iteritems():
336 d = v.dist + e.length
337 if d < w.dist:
338 w.dist = d
339 queue.append(w)
340 sort_flag = True
341
342 def shortest_path_tree2(self, source):
343 """Finds the length of the shortest path from the source vertex to all
344 other vertices in the graph. This length is stored on the vertices as an
345 attribute named 'dist'. The algorithm used is Dijkstra's with a Heap
346 used to sort/store pending nodes to be examined.
347
348 """
349 for v in self.vertices():
350 v.dist = INFINITY
351 source.dist = 0
352
353 queue = []
354 heapq.heappush(queue, (0, source))
355 while len(queue):
356
357 _, v = heapq.heappop(queue)
358
359 # for each neighbor of v, see if we found a new shortest path
360 for w, e in self[v].iteritems():
361 d = v.dist + e.length
362 if d < w.dist:
363 w.dist = d
364 heapq.heappush(queue, (d, w))
365
366 def all_pairs_floyd_warshall(self):
367 """Finds the shortest paths between all pairs of vertices using the
368 Floyd-Warshall algorithm.
369
370 http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm
371
372 """
373 vertices = self.vertices()
374 dist = {}
375 for i in vertices:
376 for j in vertices:
377 if i is j:
378 dist[i, j] = 0.0
379 else:
380 e = self.get_edge(i, j)
381 dist[i, j] = e.length if e else INFINITY
382
383 for k in vertices:
384 for i in vertices:
385 for j in vertices:
386 d_ik = dist[i, k]
387 d_kj = dist[k, j]
388 new_cost = d_ik + d_kj
389 if new_cost < dist[i, j]:
390 dist[i, j] = new_cost
391
392 return dist
393
394 def big_l(self):
395 """Computes the "big-L" value for the graph as per Watts & Strogatz.
396
397 L is defined as the number of edges in the shortest path between
398 two vertices, averaged over all vertices.
399
400 Uses the shortest_path_tree() method, called once for every node.
401
402 """
403 d = {}
404 for v in self.vertices():
405 self.shortest_path_tree(v, d)
406 t = [((w, v), w.dist) for w in self.vertices() if v is not w]
407 d.update(t)
408
409 if len(d):
410 return sum(d.values()) / float(len(d))
411 return 0.0
412
413 def big_l2(self):
414 """Computes the "big-L" value for the graph as per Watts & Strogatz.
415
416 L is defined as the number of edges in the shortest path between
417 two vertices, averaged over all vertices.
418
419 Uses the all_pairs_floyd_warshall() method.
420
421 """
422 dist = self.all_pairs_floyd_warshall()
423 vertices = self.vertices()
424 result = [dist[i, j] for i in vertices for j in vertices if i is not j]
425
426 if len(result):
427 return sum(result) / float(len(result))
428 return 0.0
429
430 def big_l3(self):
431 """Computes the "big-L" value for the graph as per Watts & Strogatz.
432
433 L is defined as the number of edges in the shortest path between
434 two vertices, averaged over all vertices.
435
436 Uses the shortest_path_tree2() method, called once for every node.
437
438 """
439 d = {}
440 for v in self.vertices():
441 self.shortest_path_tree2(v)
442 t = [((v, w), w.dist) for w in self.vertices() if v is not w]
443 d.update(t)
444
445 if len(d):
446 return sum(d.values()) / float(len(d))
447 return 0.0
259 448
260 449
261 def main(script, *args): 450 def main(script, *args):
262 import pprint 451 import pprint
263 452