changeset 36:305cc03c2750

Chapter 5.5, exercise 6, #3: compute WS clustering coefficient and characteristic length on a BA model graph.
author Brian Neal <bgneal@gmail.com>
date Thu, 10 Jan 2013 19:24:02 -0600
parents 10db8c3a6b83
children 931f60dee99e
files Graph.py SmallWorldGraph.py ch5ex6-3.py
diffstat 3 files changed, 209 insertions(+), 186 deletions(-) [+]
line wrap: on
line diff
--- a/Graph.py	Wed Jan 09 20:51:16 2013 -0600
+++ b/Graph.py	Thu Jan 10 19:24:02 2013 -0600
@@ -6,10 +6,14 @@
 Copyright 2011 Allen B. Downey.
 Distributed under the GNU General Public License at gnu.org/licenses/gpl.html.
 """
+import heapq
 import itertools
 from collections import deque, Counter
 
 
+INFINITY = float('Inf')
+
+
 class GraphError(Exception):
     """Exception for Graph errors"""
     pass
@@ -69,6 +73,14 @@
             for e in es:
                 self.add_edge(e)
 
+    def set_edge_length(self, n=1):
+        """Give each edge a length of n; this is used by the
+        shortest_path_tree() method.
+
+        """
+        for e in self.edges():
+            e.length = n
+
     def add_vertex(self, v):
         """Add a vertex to the graph."""
         self[v] = {}
@@ -257,6 +269,183 @@
 
         return c
 
+    def clustering_coefficient(self):
+        """Compute the clustering coefficient for this graph as defined by Watts
+        and Strogatz.
+
+        """
+        cv = {}
+        for v in self:
+            # consider a node and its neighbors
+            nodes = self.out_vertices(v)
+            nodes.append(v)
+
+            # compute the maximum number of possible edges between these nodes
+            # if they were all connected to each other:
+            n = len(nodes)
+            if n == 1:
+                # edge case of only 1 node; handle this case to avoid division
+                # by zero in the general case
+                cv[v] = 1.0
+                continue
+
+            possible = n * (n - 1) / 2.0
+
+            # now compute how many edges actually exist between the nodes
+            actual = 0
+            for x, y in itertools.combinations(nodes, 2):
+                if self.get_edge(x, y):
+                    actual += 1
+
+            # the fraction of actual / possible is this nodes C sub v value
+            cv[v] = actual / possible
+
+        # The clustering coefficient is the average of all C sub v values
+        if len(cv):
+            return sum(cv.values()) / float(len(cv))
+        return 0.0
+
+    def shortest_path_tree(self, source, hint=None):
+        """Finds the length of the shortest path from the source vertex to all
+        other vertices in the graph. This length is stored on the vertices as an
+        attribute named 'dist'. The algorithm used is Dijkstra's.
+
+        hint: if provided, must be a dictionary mapping tuples to already known
+        shortest path distances. This can be used to speed up the algorithm.
+
+        """
+        if not hint:
+            hint = {}
+
+        for v in self.vertices():
+            v.dist = hint.get((source, v), INFINITY)
+        source.dist = 0
+
+        queue = [v for v in self.vertices() if v.dist < INFINITY]
+        sort_flag = True
+        while len(queue):
+
+            if sort_flag:
+                queue.sort(key=lambda v: v.dist)
+                sort_flag = False
+
+            v = queue.pop(0)
+
+            # for each neighbor of v, see if we found a new shortest path
+            for w, e in self[v].iteritems():
+                d = v.dist + e.length
+                if d < w.dist:
+                    w.dist = d
+                    queue.append(w)
+                    sort_flag = True
+
+    def shortest_path_tree2(self, source):
+        """Finds the length of the shortest path from the source vertex to all
+        other vertices in the graph. This length is stored on the vertices as an
+        attribute named 'dist'. The algorithm used is Dijkstra's with a Heap
+        used to sort/store pending nodes to be examined.
+
+        """
+        for v in self.vertices():
+            v.dist = INFINITY
+        source.dist = 0
+
+        queue = []
+        heapq.heappush(queue, (0, source))
+        while len(queue):
+
+            _, v = heapq.heappop(queue)
+
+            # for each neighbor of v, see if we found a new shortest path
+            for w, e in self[v].iteritems():
+                d = v.dist + e.length
+                if d < w.dist:
+                    w.dist = d
+                    heapq.heappush(queue, (d, w))
+
+    def all_pairs_floyd_warshall(self):
+        """Finds the shortest paths between all pairs of vertices using the
+        Floyd-Warshall algorithm.
+
+        http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm
+
+        """
+        vertices = self.vertices()
+        dist = {}
+        for i in vertices:
+            for j in vertices:
+                if i is j:
+                    dist[i, j] = 0.0
+                else:
+                    e = self.get_edge(i, j)
+                    dist[i, j] = e.length if e else INFINITY
+
+        for k in vertices:
+            for i in vertices:
+                for j in vertices:
+                    d_ik = dist[i, k]
+                    d_kj = dist[k, j]
+                    new_cost = d_ik + d_kj
+                    if new_cost < dist[i, j]:
+                        dist[i, j] = new_cost
+
+        return dist
+
+    def big_l(self):
+        """Computes the "big-L" value for the graph as per Watts & Strogatz.
+
+        L is defined as the number of edges in the shortest path between
+        two vertices, averaged over all vertices.
+
+        Uses the shortest_path_tree() method, called once for every node.
+
+        """
+        d = {}
+        for v in self.vertices():
+            self.shortest_path_tree(v, d)
+            t = [((w, v), w.dist) for w in self.vertices() if v is not w]
+            d.update(t)
+
+        if len(d):
+            return sum(d.values()) / float(len(d))
+        return 0.0
+
+    def big_l2(self):
+        """Computes the "big-L" value for the graph as per Watts & Strogatz.
+
+        L is defined as the number of edges in the shortest path between
+        two vertices, averaged over all vertices.
+
+        Uses the all_pairs_floyd_warshall() method.
+
+        """
+        dist = self.all_pairs_floyd_warshall()
+        vertices = self.vertices()
+        result = [dist[i, j] for i in vertices for j in vertices if i is not j]
+
+        if len(result):
+            return sum(result) / float(len(result))
+        return 0.0
+
+    def big_l3(self):
+        """Computes the "big-L" value for the graph as per Watts & Strogatz.
+
+        L is defined as the number of edges in the shortest path between
+        two vertices, averaged over all vertices.
+
+        Uses the shortest_path_tree2() method, called once for every node.
+
+        """
+        d = {}
+        for v in self.vertices():
+            self.shortest_path_tree2(v)
+            t = [((v, w), w.dist) for w in self.vertices() if v is not w]
+            d.update(t)
+
+        if len(d):
+            return sum(d.values()) / float(len(d))
+        return 0.0
+
 
 def main(script, *args):
     import pprint
--- a/SmallWorldGraph.py	Wed Jan 09 20:51:16 2013 -0600
+++ b/SmallWorldGraph.py	Thu Jan 10 19:24:02 2013 -0600
@@ -1,16 +1,12 @@
 """This module contains the SmallWorldGraph class for 4.4, exercise 4.
 
 """
-import heapq
-import itertools
 import random
 
 from Graph import Edge
 from RandomGraph import RandomGraph
 
 
-INFINITY = float('Inf')
-
 class SmallWorldGraph(RandomGraph):
     """A small world graph for 4.4, exercise 4 in Think Complexity."""
 
@@ -29,11 +25,7 @@
         super(SmallWorldGraph, self).__init__(vs=vs)
         self.add_regular_edges(k)
         self.rewire(p)
-
-        # give each edge a default length of 1; this is used by the
-        # shortest_path_tree() method
-        for e in self.edges():
-            e.length = 1
+        self.set_edge_length(1)
 
     def rewire(self, p):
         """Assuming the graph is initially a regular graph, rewires the graph
@@ -58,180 +50,3 @@
                         self.add_edge(Edge(v, x))
                         break
 
-    def clustering_coefficient(self):
-        """Compute the clustering coefficient for this graph as defined by Watts
-        and Strogatz.
-
-        """
-        cv = {}
-        for v in self:
-            # consider a node and its neighbors
-            nodes = self.out_vertices(v)
-            nodes.append(v)
-
-            # compute the maximum number of possible edges between these nodes
-            # if they were all connected to each other:
-            n = len(nodes)
-            if n == 1:
-                # edge case of only 1 node; handle this case to avoid division
-                # by zero in the general case
-                cv[v] = 1.0
-                continue
-
-            possible = n * (n - 1) / 2.0
-
-            # now compute how many edges actually exist between the nodes
-            actual = 0
-            for x, y in itertools.combinations(nodes, 2):
-                if self.get_edge(x, y):
-                    actual += 1
-
-            # the fraction of actual / possible is this nodes C sub v value
-            cv[v] = actual / possible
-
-        # The clustering coefficient is the average of all C sub v values
-        if len(cv):
-            return sum(cv.values()) / float(len(cv))
-        return 0.0
-
-    def shortest_path_tree(self, source, hint=None):
-        """Finds the length of the shortest path from the source vertex to all
-        other vertices in the graph. This length is stored on the vertices as an
-        attribute named 'dist'. The algorithm used is Dijkstra's.
-
-        hint: if provided, must be a dictionary mapping tuples to already known
-        shortest path distances. This can be used to speed up the algorithm.
-
-        """
-        if not hint:
-            hint = {}
-
-        for v in self.vertices():
-            v.dist = hint.get((source, v), INFINITY)
-        source.dist = 0
-
-        queue = [v for v in self.vertices() if v.dist < INFINITY]
-        sort_flag = True
-        while len(queue):
-
-            if sort_flag:
-                queue.sort(key=lambda v: v.dist)
-                sort_flag = False
-
-            v = queue.pop(0)
-
-            # for each neighbor of v, see if we found a new shortest path
-            for w, e in self[v].iteritems():
-                d = v.dist + e.length
-                if d < w.dist:
-                    w.dist = d
-                    queue.append(w)
-                    sort_flag = True
-
-    def shortest_path_tree2(self, source):
-        """Finds the length of the shortest path from the source vertex to all
-        other vertices in the graph. This length is stored on the vertices as an
-        attribute named 'dist'. The algorithm used is Dijkstra's with a Heap
-        used to sort/store pending nodes to be examined.
-
-        """
-        for v in self.vertices():
-            v.dist = INFINITY
-        source.dist = 0
-
-        queue = []
-        heapq.heappush(queue, (0, source))
-        while len(queue):
-
-            _, v = heapq.heappop(queue)
-
-            # for each neighbor of v, see if we found a new shortest path
-            for w, e in self[v].iteritems():
-                d = v.dist + e.length
-                if d < w.dist:
-                    w.dist = d
-                    heapq.heappush(queue, (d, w))
-
-    def all_pairs_floyd_warshall(self):
-        """Finds the shortest paths between all pairs of vertices using the
-        Floyd-Warshall algorithm.
-
-        http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm
-
-        """
-        vertices = self.vertices()
-        dist = {}
-        for i in vertices:
-            for j in vertices:
-                if i is j:
-                    dist[i, j] = 0.0
-                else:
-                    e = self.get_edge(i, j)
-                    dist[i, j] = e.length if e else INFINITY
-
-        for k in vertices:
-            for i in vertices:
-                for j in vertices:
-                    d_ik = dist[i, k]
-                    d_kj = dist[k, j]
-                    new_cost = d_ik + d_kj
-                    if new_cost < dist[i, j]:
-                        dist[i, j] = new_cost
-
-        return dist
-
-    def big_l(self):
-        """Computes the "big-L" value for the graph as per Watts & Strogatz.
-
-        L is defined as the number of edges in the shortest path between
-        two vertices, averaged over all vertices.
-
-        Uses the shortest_path_tree() method, called once for every node.
-
-        """
-        d = {}
-        for v in self.vertices():
-            self.shortest_path_tree(v, d)
-            t = [((w, v), w.dist) for w in self.vertices() if v is not w]
-            d.update(t)
-
-        if len(d):
-            return sum(d.values()) / float(len(d))
-        return 0.0
-
-    def big_l2(self):
-        """Computes the "big-L" value for the graph as per Watts & Strogatz.
-
-        L is defined as the number of edges in the shortest path between
-        two vertices, averaged over all vertices.
-
-        Uses the all_pairs_floyd_warshall() method.
-
-        """
-        dist = self.all_pairs_floyd_warshall()
-        vertices = self.vertices()
-        result = [dist[i, j] for i in vertices for j in vertices if i is not j]
-
-        if len(result):
-            return sum(result) / float(len(result))
-        return 0.0
-
-    def big_l3(self):
-        """Computes the "big-L" value for the graph as per Watts & Strogatz.
-
-        L is defined as the number of edges in the shortest path between
-        two vertices, averaged over all vertices.
-
-        Uses the shortest_path_tree2() method, called once for every node.
-
-        """
-        d = {}
-        for v in self.vertices():
-            self.shortest_path_tree2(v)
-            t = [((v, w), w.dist) for w in self.vertices() if v is not w]
-            d.update(t)
-
-        if len(d):
-            return sum(d.values()) / float(len(d))
-        return 0.0
-
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ch5ex6-3.py	Thu Jan 10 19:24:02 2013 -0600
@@ -0,0 +1,19 @@
+"""Chapter 5.5, exercise 6 in Allen Downey's Think Complexity book.
+
+3. Use the BA model to generate a graph with about 1000 vertices and compute the
+   characteristic length and clustering coefficient as defined in the Watts and
+   Strogatz paper. Do scale-free networks have the characteristics of
+   a small-world graph?
+
+"""
+
+from ch5ex6 import BAGraph
+
+g = BAGraph(5, 5)
+
+for i in xrange(1000):
+    g.step()
+
+g.set_edge_length(1)
+print "Clustering coefficient:", g.clustering_coefficient()
+print "Characteristic length:", g.big_l3()