Mercurial > public > think_complexity
diff Graph.py @ 36:305cc03c2750
Chapter 5.5, exercise 6, #3: compute WS clustering coefficient
and characteristic length on a BA model graph.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Thu, 10 Jan 2013 19:24:02 -0600 |
parents | 10db8c3a6b83 |
children |
line wrap: on
line diff
--- a/Graph.py Wed Jan 09 20:51:16 2013 -0600 +++ b/Graph.py Thu Jan 10 19:24:02 2013 -0600 @@ -6,10 +6,14 @@ Copyright 2011 Allen B. Downey. Distributed under the GNU General Public License at gnu.org/licenses/gpl.html. """ +import heapq import itertools from collections import deque, Counter +INFINITY = float('Inf') + + class GraphError(Exception): """Exception for Graph errors""" pass @@ -69,6 +73,14 @@ for e in es: self.add_edge(e) + def set_edge_length(self, n=1): + """Give each edge a length of n; this is used by the + shortest_path_tree() method. + + """ + for e in self.edges(): + e.length = n + def add_vertex(self, v): """Add a vertex to the graph.""" self[v] = {} @@ -257,6 +269,183 @@ return c + def clustering_coefficient(self): + """Compute the clustering coefficient for this graph as defined by Watts + and Strogatz. + + """ + cv = {} + for v in self: + # consider a node and its neighbors + nodes = self.out_vertices(v) + nodes.append(v) + + # compute the maximum number of possible edges between these nodes + # if they were all connected to each other: + n = len(nodes) + if n == 1: + # edge case of only 1 node; handle this case to avoid division + # by zero in the general case + cv[v] = 1.0 + continue + + possible = n * (n - 1) / 2.0 + + # now compute how many edges actually exist between the nodes + actual = 0 + for x, y in itertools.combinations(nodes, 2): + if self.get_edge(x, y): + actual += 1 + + # the fraction of actual / possible is this nodes C sub v value + cv[v] = actual / possible + + # The clustering coefficient is the average of all C sub v values + if len(cv): + return sum(cv.values()) / float(len(cv)) + return 0.0 + + def shortest_path_tree(self, source, hint=None): + """Finds the length of the shortest path from the source vertex to all + other vertices in the graph. This length is stored on the vertices as an + attribute named 'dist'. The algorithm used is Dijkstra's. + + hint: if provided, must be a dictionary mapping tuples to already known + shortest path distances. This can be used to speed up the algorithm. + + """ + if not hint: + hint = {} + + for v in self.vertices(): + v.dist = hint.get((source, v), INFINITY) + source.dist = 0 + + queue = [v for v in self.vertices() if v.dist < INFINITY] + sort_flag = True + while len(queue): + + if sort_flag: + queue.sort(key=lambda v: v.dist) + sort_flag = False + + v = queue.pop(0) + + # for each neighbor of v, see if we found a new shortest path + for w, e in self[v].iteritems(): + d = v.dist + e.length + if d < w.dist: + w.dist = d + queue.append(w) + sort_flag = True + + def shortest_path_tree2(self, source): + """Finds the length of the shortest path from the source vertex to all + other vertices in the graph. This length is stored on the vertices as an + attribute named 'dist'. The algorithm used is Dijkstra's with a Heap + used to sort/store pending nodes to be examined. + + """ + for v in self.vertices(): + v.dist = INFINITY + source.dist = 0 + + queue = [] + heapq.heappush(queue, (0, source)) + while len(queue): + + _, v = heapq.heappop(queue) + + # for each neighbor of v, see if we found a new shortest path + for w, e in self[v].iteritems(): + d = v.dist + e.length + if d < w.dist: + w.dist = d + heapq.heappush(queue, (d, w)) + + def all_pairs_floyd_warshall(self): + """Finds the shortest paths between all pairs of vertices using the + Floyd-Warshall algorithm. + + http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm + + """ + vertices = self.vertices() + dist = {} + for i in vertices: + for j in vertices: + if i is j: + dist[i, j] = 0.0 + else: + e = self.get_edge(i, j) + dist[i, j] = e.length if e else INFINITY + + for k in vertices: + for i in vertices: + for j in vertices: + d_ik = dist[i, k] + d_kj = dist[k, j] + new_cost = d_ik + d_kj + if new_cost < dist[i, j]: + dist[i, j] = new_cost + + return dist + + def big_l(self): + """Computes the "big-L" value for the graph as per Watts & Strogatz. + + L is defined as the number of edges in the shortest path between + two vertices, averaged over all vertices. + + Uses the shortest_path_tree() method, called once for every node. + + """ + d = {} + for v in self.vertices(): + self.shortest_path_tree(v, d) + t = [((w, v), w.dist) for w in self.vertices() if v is not w] + d.update(t) + + if len(d): + return sum(d.values()) / float(len(d)) + return 0.0 + + def big_l2(self): + """Computes the "big-L" value for the graph as per Watts & Strogatz. + + L is defined as the number of edges in the shortest path between + two vertices, averaged over all vertices. + + Uses the all_pairs_floyd_warshall() method. + + """ + dist = self.all_pairs_floyd_warshall() + vertices = self.vertices() + result = [dist[i, j] for i in vertices for j in vertices if i is not j] + + if len(result): + return sum(result) / float(len(result)) + return 0.0 + + def big_l3(self): + """Computes the "big-L" value for the graph as per Watts & Strogatz. + + L is defined as the number of edges in the shortest path between + two vertices, averaged over all vertices. + + Uses the shortest_path_tree2() method, called once for every node. + + """ + d = {} + for v in self.vertices(): + self.shortest_path_tree2(v) + t = [((v, w), w.dist) for w in self.vertices() if v is not w] + d.update(t) + + if len(d): + return sum(d.values()) / float(len(d)) + return 0.0 + def main(script, *args): import pprint