Mercurial > public > think_complexity
comparison Graph.py @ 36:305cc03c2750
Chapter 5.5, exercise 6, #3: compute WS clustering coefficient
and characteristic length on a BA model graph.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Thu, 10 Jan 2013 19:24:02 -0600 |
parents | 10db8c3a6b83 |
children |
comparison
equal
deleted
inserted
replaced
35:10db8c3a6b83 | 36:305cc03c2750 |
---|---|
4 http://greenteapress.com/complexity | 4 http://greenteapress.com/complexity |
5 | 5 |
6 Copyright 2011 Allen B. Downey. | 6 Copyright 2011 Allen B. Downey. |
7 Distributed under the GNU General Public License at gnu.org/licenses/gpl.html. | 7 Distributed under the GNU General Public License at gnu.org/licenses/gpl.html. |
8 """ | 8 """ |
9 import heapq | |
9 import itertools | 10 import itertools |
10 from collections import deque, Counter | 11 from collections import deque, Counter |
12 | |
13 | |
14 INFINITY = float('Inf') | |
11 | 15 |
12 | 16 |
13 class GraphError(Exception): | 17 class GraphError(Exception): |
14 """Exception for Graph errors""" | 18 """Exception for Graph errors""" |
15 pass | 19 pass |
66 self.add_vertex(v) | 70 self.add_vertex(v) |
67 | 71 |
68 if es: | 72 if es: |
69 for e in es: | 73 for e in es: |
70 self.add_edge(e) | 74 self.add_edge(e) |
75 | |
76 def set_edge_length(self, n=1): | |
77 """Give each edge a length of n; this is used by the | |
78 shortest_path_tree() method. | |
79 | |
80 """ | |
81 for e in self.edges(): | |
82 e.length = n | |
71 | 83 |
72 def add_vertex(self, v): | 84 def add_vertex(self, v): |
73 """Add a vertex to the graph.""" | 85 """Add a vertex to the graph.""" |
74 self[v] = {} | 86 self[v] = {} |
75 | 87 |
254 if n > 0: | 266 if n > 0: |
255 for k in c: | 267 for k in c: |
256 c[k] = float(c[k]) / n | 268 c[k] = float(c[k]) / n |
257 | 269 |
258 return c | 270 return c |
271 | |
272 def clustering_coefficient(self): | |
273 """Compute the clustering coefficient for this graph as defined by Watts | |
274 and Strogatz. | |
275 | |
276 """ | |
277 cv = {} | |
278 for v in self: | |
279 # consider a node and its neighbors | |
280 nodes = self.out_vertices(v) | |
281 nodes.append(v) | |
282 | |
283 # compute the maximum number of possible edges between these nodes | |
284 # if they were all connected to each other: | |
285 n = len(nodes) | |
286 if n == 1: | |
287 # edge case of only 1 node; handle this case to avoid division | |
288 # by zero in the general case | |
289 cv[v] = 1.0 | |
290 continue | |
291 | |
292 possible = n * (n - 1) / 2.0 | |
293 | |
294 # now compute how many edges actually exist between the nodes | |
295 actual = 0 | |
296 for x, y in itertools.combinations(nodes, 2): | |
297 if self.get_edge(x, y): | |
298 actual += 1 | |
299 | |
300 # the fraction of actual / possible is this nodes C sub v value | |
301 cv[v] = actual / possible | |
302 | |
303 # The clustering coefficient is the average of all C sub v values | |
304 if len(cv): | |
305 return sum(cv.values()) / float(len(cv)) | |
306 return 0.0 | |
307 | |
308 def shortest_path_tree(self, source, hint=None): | |
309 """Finds the length of the shortest path from the source vertex to all | |
310 other vertices in the graph. This length is stored on the vertices as an | |
311 attribute named 'dist'. The algorithm used is Dijkstra's. | |
312 | |
313 hint: if provided, must be a dictionary mapping tuples to already known | |
314 shortest path distances. This can be used to speed up the algorithm. | |
315 | |
316 """ | |
317 if not hint: | |
318 hint = {} | |
319 | |
320 for v in self.vertices(): | |
321 v.dist = hint.get((source, v), INFINITY) | |
322 source.dist = 0 | |
323 | |
324 queue = [v for v in self.vertices() if v.dist < INFINITY] | |
325 sort_flag = True | |
326 while len(queue): | |
327 | |
328 if sort_flag: | |
329 queue.sort(key=lambda v: v.dist) | |
330 sort_flag = False | |
331 | |
332 v = queue.pop(0) | |
333 | |
334 # for each neighbor of v, see if we found a new shortest path | |
335 for w, e in self[v].iteritems(): | |
336 d = v.dist + e.length | |
337 if d < w.dist: | |
338 w.dist = d | |
339 queue.append(w) | |
340 sort_flag = True | |
341 | |
342 def shortest_path_tree2(self, source): | |
343 """Finds the length of the shortest path from the source vertex to all | |
344 other vertices in the graph. This length is stored on the vertices as an | |
345 attribute named 'dist'. The algorithm used is Dijkstra's with a Heap | |
346 used to sort/store pending nodes to be examined. | |
347 | |
348 """ | |
349 for v in self.vertices(): | |
350 v.dist = INFINITY | |
351 source.dist = 0 | |
352 | |
353 queue = [] | |
354 heapq.heappush(queue, (0, source)) | |
355 while len(queue): | |
356 | |
357 _, v = heapq.heappop(queue) | |
358 | |
359 # for each neighbor of v, see if we found a new shortest path | |
360 for w, e in self[v].iteritems(): | |
361 d = v.dist + e.length | |
362 if d < w.dist: | |
363 w.dist = d | |
364 heapq.heappush(queue, (d, w)) | |
365 | |
366 def all_pairs_floyd_warshall(self): | |
367 """Finds the shortest paths between all pairs of vertices using the | |
368 Floyd-Warshall algorithm. | |
369 | |
370 http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm | |
371 | |
372 """ | |
373 vertices = self.vertices() | |
374 dist = {} | |
375 for i in vertices: | |
376 for j in vertices: | |
377 if i is j: | |
378 dist[i, j] = 0.0 | |
379 else: | |
380 e = self.get_edge(i, j) | |
381 dist[i, j] = e.length if e else INFINITY | |
382 | |
383 for k in vertices: | |
384 for i in vertices: | |
385 for j in vertices: | |
386 d_ik = dist[i, k] | |
387 d_kj = dist[k, j] | |
388 new_cost = d_ik + d_kj | |
389 if new_cost < dist[i, j]: | |
390 dist[i, j] = new_cost | |
391 | |
392 return dist | |
393 | |
394 def big_l(self): | |
395 """Computes the "big-L" value for the graph as per Watts & Strogatz. | |
396 | |
397 L is defined as the number of edges in the shortest path between | |
398 two vertices, averaged over all vertices. | |
399 | |
400 Uses the shortest_path_tree() method, called once for every node. | |
401 | |
402 """ | |
403 d = {} | |
404 for v in self.vertices(): | |
405 self.shortest_path_tree(v, d) | |
406 t = [((w, v), w.dist) for w in self.vertices() if v is not w] | |
407 d.update(t) | |
408 | |
409 if len(d): | |
410 return sum(d.values()) / float(len(d)) | |
411 return 0.0 | |
412 | |
413 def big_l2(self): | |
414 """Computes the "big-L" value for the graph as per Watts & Strogatz. | |
415 | |
416 L is defined as the number of edges in the shortest path between | |
417 two vertices, averaged over all vertices. | |
418 | |
419 Uses the all_pairs_floyd_warshall() method. | |
420 | |
421 """ | |
422 dist = self.all_pairs_floyd_warshall() | |
423 vertices = self.vertices() | |
424 result = [dist[i, j] for i in vertices for j in vertices if i is not j] | |
425 | |
426 if len(result): | |
427 return sum(result) / float(len(result)) | |
428 return 0.0 | |
429 | |
430 def big_l3(self): | |
431 """Computes the "big-L" value for the graph as per Watts & Strogatz. | |
432 | |
433 L is defined as the number of edges in the shortest path between | |
434 two vertices, averaged over all vertices. | |
435 | |
436 Uses the shortest_path_tree2() method, called once for every node. | |
437 | |
438 """ | |
439 d = {} | |
440 for v in self.vertices(): | |
441 self.shortest_path_tree2(v) | |
442 t = [((v, w), w.dist) for w in self.vertices() if v is not w] | |
443 d.update(t) | |
444 | |
445 if len(d): | |
446 return sum(d.values()) / float(len(d)) | |
447 return 0.0 | |
259 | 448 |
260 | 449 |
261 def main(script, *args): | 450 def main(script, *args): |
262 import pprint | 451 import pprint |
263 | 452 |