diff Graph.py @ 36:305cc03c2750

Chapter 5.5, exercise 6, #3: compute WS clustering coefficient and characteristic length on a BA model graph.
author Brian Neal <bgneal@gmail.com>
date Thu, 10 Jan 2013 19:24:02 -0600
parents 10db8c3a6b83
children
line wrap: on
line diff
--- a/Graph.py	Wed Jan 09 20:51:16 2013 -0600
+++ b/Graph.py	Thu Jan 10 19:24:02 2013 -0600
@@ -6,10 +6,14 @@
 Copyright 2011 Allen B. Downey.
 Distributed under the GNU General Public License at gnu.org/licenses/gpl.html.
 """
+import heapq
 import itertools
 from collections import deque, Counter
 
 
+INFINITY = float('Inf')
+
+
 class GraphError(Exception):
     """Exception for Graph errors"""
     pass
@@ -69,6 +73,14 @@
             for e in es:
                 self.add_edge(e)
 
+    def set_edge_length(self, n=1):
+        """Give each edge a length of n; this is used by the
+        shortest_path_tree() method.
+
+        """
+        for e in self.edges():
+            e.length = n
+
     def add_vertex(self, v):
         """Add a vertex to the graph."""
         self[v] = {}
@@ -257,6 +269,183 @@
 
         return c
 
+    def clustering_coefficient(self):
+        """Compute the clustering coefficient for this graph as defined by Watts
+        and Strogatz.
+
+        """
+        cv = {}
+        for v in self:
+            # consider a node and its neighbors
+            nodes = self.out_vertices(v)
+            nodes.append(v)
+
+            # compute the maximum number of possible edges between these nodes
+            # if they were all connected to each other:
+            n = len(nodes)
+            if n == 1:
+                # edge case of only 1 node; handle this case to avoid division
+                # by zero in the general case
+                cv[v] = 1.0
+                continue
+
+            possible = n * (n - 1) / 2.0
+
+            # now compute how many edges actually exist between the nodes
+            actual = 0
+            for x, y in itertools.combinations(nodes, 2):
+                if self.get_edge(x, y):
+                    actual += 1
+
+            # the fraction of actual / possible is this nodes C sub v value
+            cv[v] = actual / possible
+
+        # The clustering coefficient is the average of all C sub v values
+        if len(cv):
+            return sum(cv.values()) / float(len(cv))
+        return 0.0
+
+    def shortest_path_tree(self, source, hint=None):
+        """Finds the length of the shortest path from the source vertex to all
+        other vertices in the graph. This length is stored on the vertices as an
+        attribute named 'dist'. The algorithm used is Dijkstra's.
+
+        hint: if provided, must be a dictionary mapping tuples to already known
+        shortest path distances. This can be used to speed up the algorithm.
+
+        """
+        if not hint:
+            hint = {}
+
+        for v in self.vertices():
+            v.dist = hint.get((source, v), INFINITY)
+        source.dist = 0
+
+        queue = [v for v in self.vertices() if v.dist < INFINITY]
+        sort_flag = True
+        while len(queue):
+
+            if sort_flag:
+                queue.sort(key=lambda v: v.dist)
+                sort_flag = False
+
+            v = queue.pop(0)
+
+            # for each neighbor of v, see if we found a new shortest path
+            for w, e in self[v].iteritems():
+                d = v.dist + e.length
+                if d < w.dist:
+                    w.dist = d
+                    queue.append(w)
+                    sort_flag = True
+
+    def shortest_path_tree2(self, source):
+        """Finds the length of the shortest path from the source vertex to all
+        other vertices in the graph. This length is stored on the vertices as an
+        attribute named 'dist'. The algorithm used is Dijkstra's with a Heap
+        used to sort/store pending nodes to be examined.
+
+        """
+        for v in self.vertices():
+            v.dist = INFINITY
+        source.dist = 0
+
+        queue = []
+        heapq.heappush(queue, (0, source))
+        while len(queue):
+
+            _, v = heapq.heappop(queue)
+
+            # for each neighbor of v, see if we found a new shortest path
+            for w, e in self[v].iteritems():
+                d = v.dist + e.length
+                if d < w.dist:
+                    w.dist = d
+                    heapq.heappush(queue, (d, w))
+
+    def all_pairs_floyd_warshall(self):
+        """Finds the shortest paths between all pairs of vertices using the
+        Floyd-Warshall algorithm.
+
+        http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm
+
+        """
+        vertices = self.vertices()
+        dist = {}
+        for i in vertices:
+            for j in vertices:
+                if i is j:
+                    dist[i, j] = 0.0
+                else:
+                    e = self.get_edge(i, j)
+                    dist[i, j] = e.length if e else INFINITY
+
+        for k in vertices:
+            for i in vertices:
+                for j in vertices:
+                    d_ik = dist[i, k]
+                    d_kj = dist[k, j]
+                    new_cost = d_ik + d_kj
+                    if new_cost < dist[i, j]:
+                        dist[i, j] = new_cost
+
+        return dist
+
+    def big_l(self):
+        """Computes the "big-L" value for the graph as per Watts & Strogatz.
+
+        L is defined as the number of edges in the shortest path between
+        two vertices, averaged over all vertices.
+
+        Uses the shortest_path_tree() method, called once for every node.
+
+        """
+        d = {}
+        for v in self.vertices():
+            self.shortest_path_tree(v, d)
+            t = [((w, v), w.dist) for w in self.vertices() if v is not w]
+            d.update(t)
+
+        if len(d):
+            return sum(d.values()) / float(len(d))
+        return 0.0
+
+    def big_l2(self):
+        """Computes the "big-L" value for the graph as per Watts & Strogatz.
+
+        L is defined as the number of edges in the shortest path between
+        two vertices, averaged over all vertices.
+
+        Uses the all_pairs_floyd_warshall() method.
+
+        """
+        dist = self.all_pairs_floyd_warshall()
+        vertices = self.vertices()
+        result = [dist[i, j] for i in vertices for j in vertices if i is not j]
+
+        if len(result):
+            return sum(result) / float(len(result))
+        return 0.0
+
+    def big_l3(self):
+        """Computes the "big-L" value for the graph as per Watts & Strogatz.
+
+        L is defined as the number of edges in the shortest path between
+        two vertices, averaged over all vertices.
+
+        Uses the shortest_path_tree2() method, called once for every node.
+
+        """
+        d = {}
+        for v in self.vertices():
+            self.shortest_path_tree2(v)
+            t = [((v, w), w.dist) for w in self.vertices() if v is not w]
+            d.update(t)
+
+        if len(d):
+            return sum(d.values()) / float(len(d))
+        return 0.0
+
 
 def main(script, *args):
     import pprint