Mercurial > public > think_complexity
changeset 36:305cc03c2750
Chapter 5.5, exercise 6, #3: compute WS clustering coefficient
and characteristic length on a BA model graph.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Thu, 10 Jan 2013 19:24:02 -0600 |
parents | 10db8c3a6b83 |
children | 931f60dee99e |
files | Graph.py SmallWorldGraph.py ch5ex6-3.py |
diffstat | 3 files changed, 209 insertions(+), 186 deletions(-) [+] |
line wrap: on
line diff
--- a/Graph.py Wed Jan 09 20:51:16 2013 -0600 +++ b/Graph.py Thu Jan 10 19:24:02 2013 -0600 @@ -6,10 +6,14 @@ Copyright 2011 Allen B. Downey. Distributed under the GNU General Public License at gnu.org/licenses/gpl.html. """ +import heapq import itertools from collections import deque, Counter +INFINITY = float('Inf') + + class GraphError(Exception): """Exception for Graph errors""" pass @@ -69,6 +73,14 @@ for e in es: self.add_edge(e) + def set_edge_length(self, n=1): + """Give each edge a length of n; this is used by the + shortest_path_tree() method. + + """ + for e in self.edges(): + e.length = n + def add_vertex(self, v): """Add a vertex to the graph.""" self[v] = {} @@ -257,6 +269,183 @@ return c + def clustering_coefficient(self): + """Compute the clustering coefficient for this graph as defined by Watts + and Strogatz. + + """ + cv = {} + for v in self: + # consider a node and its neighbors + nodes = self.out_vertices(v) + nodes.append(v) + + # compute the maximum number of possible edges between these nodes + # if they were all connected to each other: + n = len(nodes) + if n == 1: + # edge case of only 1 node; handle this case to avoid division + # by zero in the general case + cv[v] = 1.0 + continue + + possible = n * (n - 1) / 2.0 + + # now compute how many edges actually exist between the nodes + actual = 0 + for x, y in itertools.combinations(nodes, 2): + if self.get_edge(x, y): + actual += 1 + + # the fraction of actual / possible is this nodes C sub v value + cv[v] = actual / possible + + # The clustering coefficient is the average of all C sub v values + if len(cv): + return sum(cv.values()) / float(len(cv)) + return 0.0 + + def shortest_path_tree(self, source, hint=None): + """Finds the length of the shortest path from the source vertex to all + other vertices in the graph. This length is stored on the vertices as an + attribute named 'dist'. The algorithm used is Dijkstra's. + + hint: if provided, must be a dictionary mapping tuples to already known + shortest path distances. This can be used to speed up the algorithm. + + """ + if not hint: + hint = {} + + for v in self.vertices(): + v.dist = hint.get((source, v), INFINITY) + source.dist = 0 + + queue = [v for v in self.vertices() if v.dist < INFINITY] + sort_flag = True + while len(queue): + + if sort_flag: + queue.sort(key=lambda v: v.dist) + sort_flag = False + + v = queue.pop(0) + + # for each neighbor of v, see if we found a new shortest path + for w, e in self[v].iteritems(): + d = v.dist + e.length + if d < w.dist: + w.dist = d + queue.append(w) + sort_flag = True + + def shortest_path_tree2(self, source): + """Finds the length of the shortest path from the source vertex to all + other vertices in the graph. This length is stored on the vertices as an + attribute named 'dist'. The algorithm used is Dijkstra's with a Heap + used to sort/store pending nodes to be examined. + + """ + for v in self.vertices(): + v.dist = INFINITY + source.dist = 0 + + queue = [] + heapq.heappush(queue, (0, source)) + while len(queue): + + _, v = heapq.heappop(queue) + + # for each neighbor of v, see if we found a new shortest path + for w, e in self[v].iteritems(): + d = v.dist + e.length + if d < w.dist: + w.dist = d + heapq.heappush(queue, (d, w)) + + def all_pairs_floyd_warshall(self): + """Finds the shortest paths between all pairs of vertices using the + Floyd-Warshall algorithm. + + http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm + + """ + vertices = self.vertices() + dist = {} + for i in vertices: + for j in vertices: + if i is j: + dist[i, j] = 0.0 + else: + e = self.get_edge(i, j) + dist[i, j] = e.length if e else INFINITY + + for k in vertices: + for i in vertices: + for j in vertices: + d_ik = dist[i, k] + d_kj = dist[k, j] + new_cost = d_ik + d_kj + if new_cost < dist[i, j]: + dist[i, j] = new_cost + + return dist + + def big_l(self): + """Computes the "big-L" value for the graph as per Watts & Strogatz. + + L is defined as the number of edges in the shortest path between + two vertices, averaged over all vertices. + + Uses the shortest_path_tree() method, called once for every node. + + """ + d = {} + for v in self.vertices(): + self.shortest_path_tree(v, d) + t = [((w, v), w.dist) for w in self.vertices() if v is not w] + d.update(t) + + if len(d): + return sum(d.values()) / float(len(d)) + return 0.0 + + def big_l2(self): + """Computes the "big-L" value for the graph as per Watts & Strogatz. + + L is defined as the number of edges in the shortest path between + two vertices, averaged over all vertices. + + Uses the all_pairs_floyd_warshall() method. + + """ + dist = self.all_pairs_floyd_warshall() + vertices = self.vertices() + result = [dist[i, j] for i in vertices for j in vertices if i is not j] + + if len(result): + return sum(result) / float(len(result)) + return 0.0 + + def big_l3(self): + """Computes the "big-L" value for the graph as per Watts & Strogatz. + + L is defined as the number of edges in the shortest path between + two vertices, averaged over all vertices. + + Uses the shortest_path_tree2() method, called once for every node. + + """ + d = {} + for v in self.vertices(): + self.shortest_path_tree2(v) + t = [((v, w), w.dist) for w in self.vertices() if v is not w] + d.update(t) + + if len(d): + return sum(d.values()) / float(len(d)) + return 0.0 + def main(script, *args): import pprint
--- a/SmallWorldGraph.py Wed Jan 09 20:51:16 2013 -0600 +++ b/SmallWorldGraph.py Thu Jan 10 19:24:02 2013 -0600 @@ -1,16 +1,12 @@ """This module contains the SmallWorldGraph class for 4.4, exercise 4. """ -import heapq -import itertools import random from Graph import Edge from RandomGraph import RandomGraph -INFINITY = float('Inf') - class SmallWorldGraph(RandomGraph): """A small world graph for 4.4, exercise 4 in Think Complexity.""" @@ -29,11 +25,7 @@ super(SmallWorldGraph, self).__init__(vs=vs) self.add_regular_edges(k) self.rewire(p) - - # give each edge a default length of 1; this is used by the - # shortest_path_tree() method - for e in self.edges(): - e.length = 1 + self.set_edge_length(1) def rewire(self, p): """Assuming the graph is initially a regular graph, rewires the graph @@ -58,180 +50,3 @@ self.add_edge(Edge(v, x)) break - def clustering_coefficient(self): - """Compute the clustering coefficient for this graph as defined by Watts - and Strogatz. - - """ - cv = {} - for v in self: - # consider a node and its neighbors - nodes = self.out_vertices(v) - nodes.append(v) - - # compute the maximum number of possible edges between these nodes - # if they were all connected to each other: - n = len(nodes) - if n == 1: - # edge case of only 1 node; handle this case to avoid division - # by zero in the general case - cv[v] = 1.0 - continue - - possible = n * (n - 1) / 2.0 - - # now compute how many edges actually exist between the nodes - actual = 0 - for x, y in itertools.combinations(nodes, 2): - if self.get_edge(x, y): - actual += 1 - - # the fraction of actual / possible is this nodes C sub v value - cv[v] = actual / possible - - # The clustering coefficient is the average of all C sub v values - if len(cv): - return sum(cv.values()) / float(len(cv)) - return 0.0 - - def shortest_path_tree(self, source, hint=None): - """Finds the length of the shortest path from the source vertex to all - other vertices in the graph. This length is stored on the vertices as an - attribute named 'dist'. The algorithm used is Dijkstra's. - - hint: if provided, must be a dictionary mapping tuples to already known - shortest path distances. This can be used to speed up the algorithm. - - """ - if not hint: - hint = {} - - for v in self.vertices(): - v.dist = hint.get((source, v), INFINITY) - source.dist = 0 - - queue = [v for v in self.vertices() if v.dist < INFINITY] - sort_flag = True - while len(queue): - - if sort_flag: - queue.sort(key=lambda v: v.dist) - sort_flag = False - - v = queue.pop(0) - - # for each neighbor of v, see if we found a new shortest path - for w, e in self[v].iteritems(): - d = v.dist + e.length - if d < w.dist: - w.dist = d - queue.append(w) - sort_flag = True - - def shortest_path_tree2(self, source): - """Finds the length of the shortest path from the source vertex to all - other vertices in the graph. This length is stored on the vertices as an - attribute named 'dist'. The algorithm used is Dijkstra's with a Heap - used to sort/store pending nodes to be examined. - - """ - for v in self.vertices(): - v.dist = INFINITY - source.dist = 0 - - queue = [] - heapq.heappush(queue, (0, source)) - while len(queue): - - _, v = heapq.heappop(queue) - - # for each neighbor of v, see if we found a new shortest path - for w, e in self[v].iteritems(): - d = v.dist + e.length - if d < w.dist: - w.dist = d - heapq.heappush(queue, (d, w)) - - def all_pairs_floyd_warshall(self): - """Finds the shortest paths between all pairs of vertices using the - Floyd-Warshall algorithm. - - http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm - - """ - vertices = self.vertices() - dist = {} - for i in vertices: - for j in vertices: - if i is j: - dist[i, j] = 0.0 - else: - e = self.get_edge(i, j) - dist[i, j] = e.length if e else INFINITY - - for k in vertices: - for i in vertices: - for j in vertices: - d_ik = dist[i, k] - d_kj = dist[k, j] - new_cost = d_ik + d_kj - if new_cost < dist[i, j]: - dist[i, j] = new_cost - - return dist - - def big_l(self): - """Computes the "big-L" value for the graph as per Watts & Strogatz. - - L is defined as the number of edges in the shortest path between - two vertices, averaged over all vertices. - - Uses the shortest_path_tree() method, called once for every node. - - """ - d = {} - for v in self.vertices(): - self.shortest_path_tree(v, d) - t = [((w, v), w.dist) for w in self.vertices() if v is not w] - d.update(t) - - if len(d): - return sum(d.values()) / float(len(d)) - return 0.0 - - def big_l2(self): - """Computes the "big-L" value for the graph as per Watts & Strogatz. - - L is defined as the number of edges in the shortest path between - two vertices, averaged over all vertices. - - Uses the all_pairs_floyd_warshall() method. - - """ - dist = self.all_pairs_floyd_warshall() - vertices = self.vertices() - result = [dist[i, j] for i in vertices for j in vertices if i is not j] - - if len(result): - return sum(result) / float(len(result)) - return 0.0 - - def big_l3(self): - """Computes the "big-L" value for the graph as per Watts & Strogatz. - - L is defined as the number of edges in the shortest path between - two vertices, averaged over all vertices. - - Uses the shortest_path_tree2() method, called once for every node. - - """ - d = {} - for v in self.vertices(): - self.shortest_path_tree2(v) - t = [((v, w), w.dist) for w in self.vertices() if v is not w] - d.update(t) - - if len(d): - return sum(d.values()) / float(len(d)) - return 0.0 -
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/ch5ex6-3.py Thu Jan 10 19:24:02 2013 -0600 @@ -0,0 +1,19 @@ +"""Chapter 5.5, exercise 6 in Allen Downey's Think Complexity book. + +3. Use the BA model to generate a graph with about 1000 vertices and compute the + characteristic length and clustering coefficient as defined in the Watts and + Strogatz paper. Do scale-free networks have the characteristics of + a small-world graph? + +""" + +from ch5ex6 import BAGraph + +g = BAGraph(5, 5) + +for i in xrange(1000): + g.step() + +g.set_edge_length(1) +print "Clustering coefficient:", g.clustering_coefficient() +print "Characteristic length:", g.big_l3()