Mercurial > public > think_complexity
view Graph.py @ 36:305cc03c2750
Chapter 5.5, exercise 6, #3: compute WS clustering coefficient
and characteristic length on a BA model graph.
author | Brian Neal <bgneal@gmail.com> |
---|---|
date | Thu, 10 Jan 2013 19:24:02 -0600 |
parents | 10db8c3a6b83 |
children |
line wrap: on
line source
""" Code example from Complexity and Computation, a book about exploring complexity science with Python. Available free from http://greenteapress.com/complexity Copyright 2011 Allen B. Downey. Distributed under the GNU General Public License at gnu.org/licenses/gpl.html. """ import heapq import itertools from collections import deque, Counter INFINITY = float('Inf') class GraphError(Exception): """Exception for Graph errors""" pass class Vertex(object): """A Vertex is a node in a graph.""" def __init__(self, label=''): self.label = label def __repr__(self): """Returns a string representation of this object that can be evaluated as a Python expression.""" return 'Vertex(%s)' % repr(self.label) __str__ = __repr__ """The str and repr forms of this object are the same.""" class Edge(tuple): """An Edge is a list of two vertices.""" def __new__(cls, *vs): """The Edge constructor takes two vertices.""" if len(vs) != 2: raise ValueError, 'Edges must connect exactly two vertices.' return tuple.__new__(cls, vs) def __repr__(self): """Return a string representation of this object that can be evaluated as a Python expression.""" return 'Edge(%s, %s)' % (repr(self[0]), repr(self[1])) __str__ = __repr__ """The str and repr forms of this object are the same.""" class Graph(dict): """A Graph is a dictionary of dictionaries. The outer dictionary maps from a vertex to an inner dictionary. The inner dictionary maps from other vertices to edges. For vertices a and b, graph[a][b] maps to the edge that connects a->b, if it exists.""" def __init__(self, vs=None, es=None): """Creates a new graph. vs: list of vertices; es: list of edges. """ if vs: for v in vs: self.add_vertex(v) if es: for e in es: self.add_edge(e) def set_edge_length(self, n=1): """Give each edge a length of n; this is used by the shortest_path_tree() method. """ for e in self.edges(): e.length = n def add_vertex(self, v): """Add a vertex to the graph.""" self[v] = {} def add_edge(self, e): """Adds and edge to the graph by adding an entry in both directions. If there is already an edge connecting these Vertices, the new edge replaces it. """ v, w = e self[v][w] = e self[w][v] = e def get_edge(self, v, w): """Returns the edge object that exists between the two vertices v & w, or None if no such edge exists. """ try: return self[v][w] except KeyError: return None def remove_edge(self, e): """Removes the edge e from the graph.""" v, w = e del self[v][w] del self[w][v] def vertices(self): """Returns a list of the vertices in the graph.""" return self.keys() def edges(self): """"Returns a list of the edges in the graph.""" edge_set = set() for d in self.itervalues(): edge_set.update(d.itervalues()) return list(edge_set) def out_vertices(self, v): """Returns a list of vertices that are adjacent to the given vertex v. """ return self[v].keys() def out_edges(self, v): """Returns a list of edges connected to a given vertex v.""" return self[v].values() def remove_all_edges(self): """Removes all edges in the graph.""" for v in self.iterkeys(): self[v] = {} def add_all_edges(self): """Makes the graph complete by adding edges between all pairs of vertices. """ # Clear all edges first self.remove_all_edges() # For each combination of 2 vertices, create an edge between them: for v, w in itertools.combinations(self.iterkeys(), 2): self.add_edge(Edge(v, w)) def add_regular_edges(self, k): """Makes the graph regular by making every vertex have k edges. It is not always possible to create a regular graph with a given degree. If a graph has n vertices, then a regular graph can be constructed with degree k if n >= k + 1 and n * k is even. If these conditions are not met a GraphError exception is raised. """ n = len(self.vertices()) if n < k + 1: raise GraphError("Can't make a regular graph with degree >= number" " of vertices") if (n * k) % 2 != 0: raise GraphError("Can't make a regular graph of degree k and" " order n where k * n is odd") # Remove all edges first self.remove_all_edges() if k % 2 != 0: # if k is odd self._add_regular_edges_even(k - 1) self._add_regular_edges_odd() else: self._add_regular_edges_even(k) def _add_regular_edges_even(self, k): """Make a regular graph with degree k. k must be even.""" vs = self.vertices() vs2 = vs * 2 for i, v in enumerate(vs): for j in range(1, k / 2 + 1): w = vs2[i + j] self.add_edge(Edge(v, w)) def _add_regular_edges_odd(self): """Adds an extra edge across the graph to finish off a regular graph with odd degree. The number of vertices must be even. """ vs = self.vertices() vs2 = vs * 2 n = len(vs) for i in range(n / 2): v = vs2[i] w = vs2[i + n / 2] self.add_edge(Edge(v, w)) def bfs(self, start, visit_func=None): """Perform a breadth first search starting at node start. The function visit_func, if supplied, is invoked on each node. The set of visited nodes is returned. """ visited = set() # Create a work queue consisting initially of the starting node queue = deque([start]) while queue: # retrieve first item from the queue v = queue.popleft() if v in visited: continue # Skip this one if we've seen it before # Mark it as visited and invoke user's function on it visited.add(v) if visit_func: visit_func(v) # Add the adjacent neigbors to the node to the queue queue.extend(c for c in self.out_vertices(v) if c not in visited) return visited def is_connected(self): """Returns True if the graph is connected (there is a path from every node to every other node) and False otherwise. """ vs = self.vertices() if len(vs): visited = self.bfs(vs[0]) # See if all nodes have been visited return len(vs) == len(visited) return False # Graph is empty def get_p(self): """This method returns a dictionary of probabilities where each key is the connectivity k and the value is the probability [0-1] for this graph. """ # First, for each vertex, count up how many neighbors it has vs = self.vertices() c = Counter() for v in vs: n = len(self.out_vertices(v)) c[n] += 1 n = len(vs) if n > 0: for k in c: c[k] = float(c[k]) / n return c def clustering_coefficient(self): """Compute the clustering coefficient for this graph as defined by Watts and Strogatz. """ cv = {} for v in self: # consider a node and its neighbors nodes = self.out_vertices(v) nodes.append(v) # compute the maximum number of possible edges between these nodes # if they were all connected to each other: n = len(nodes) if n == 1: # edge case of only 1 node; handle this case to avoid division # by zero in the general case cv[v] = 1.0 continue possible = n * (n - 1) / 2.0 # now compute how many edges actually exist between the nodes actual = 0 for x, y in itertools.combinations(nodes, 2): if self.get_edge(x, y): actual += 1 # the fraction of actual / possible is this nodes C sub v value cv[v] = actual / possible # The clustering coefficient is the average of all C sub v values if len(cv): return sum(cv.values()) / float(len(cv)) return 0.0 def shortest_path_tree(self, source, hint=None): """Finds the length of the shortest path from the source vertex to all other vertices in the graph. This length is stored on the vertices as an attribute named 'dist'. The algorithm used is Dijkstra's. hint: if provided, must be a dictionary mapping tuples to already known shortest path distances. This can be used to speed up the algorithm. """ if not hint: hint = {} for v in self.vertices(): v.dist = hint.get((source, v), INFINITY) source.dist = 0 queue = [v for v in self.vertices() if v.dist < INFINITY] sort_flag = True while len(queue): if sort_flag: queue.sort(key=lambda v: v.dist) sort_flag = False v = queue.pop(0) # for each neighbor of v, see if we found a new shortest path for w, e in self[v].iteritems(): d = v.dist + e.length if d < w.dist: w.dist = d queue.append(w) sort_flag = True def shortest_path_tree2(self, source): """Finds the length of the shortest path from the source vertex to all other vertices in the graph. This length is stored on the vertices as an attribute named 'dist'. The algorithm used is Dijkstra's with a Heap used to sort/store pending nodes to be examined. """ for v in self.vertices(): v.dist = INFINITY source.dist = 0 queue = [] heapq.heappush(queue, (0, source)) while len(queue): _, v = heapq.heappop(queue) # for each neighbor of v, see if we found a new shortest path for w, e in self[v].iteritems(): d = v.dist + e.length if d < w.dist: w.dist = d heapq.heappush(queue, (d, w)) def all_pairs_floyd_warshall(self): """Finds the shortest paths between all pairs of vertices using the Floyd-Warshall algorithm. http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm """ vertices = self.vertices() dist = {} for i in vertices: for j in vertices: if i is j: dist[i, j] = 0.0 else: e = self.get_edge(i, j) dist[i, j] = e.length if e else INFINITY for k in vertices: for i in vertices: for j in vertices: d_ik = dist[i, k] d_kj = dist[k, j] new_cost = d_ik + d_kj if new_cost < dist[i, j]: dist[i, j] = new_cost return dist def big_l(self): """Computes the "big-L" value for the graph as per Watts & Strogatz. L is defined as the number of edges in the shortest path between two vertices, averaged over all vertices. Uses the shortest_path_tree() method, called once for every node. """ d = {} for v in self.vertices(): self.shortest_path_tree(v, d) t = [((w, v), w.dist) for w in self.vertices() if v is not w] d.update(t) if len(d): return sum(d.values()) / float(len(d)) return 0.0 def big_l2(self): """Computes the "big-L" value for the graph as per Watts & Strogatz. L is defined as the number of edges in the shortest path between two vertices, averaged over all vertices. Uses the all_pairs_floyd_warshall() method. """ dist = self.all_pairs_floyd_warshall() vertices = self.vertices() result = [dist[i, j] for i in vertices for j in vertices if i is not j] if len(result): return sum(result) / float(len(result)) return 0.0 def big_l3(self): """Computes the "big-L" value for the graph as per Watts & Strogatz. L is defined as the number of edges in the shortest path between two vertices, averaged over all vertices. Uses the shortest_path_tree2() method, called once for every node. """ d = {} for v in self.vertices(): self.shortest_path_tree2(v) t = [((v, w), w.dist) for w in self.vertices() if v is not w] d.update(t) if len(d): return sum(d.values()) / float(len(d)) return 0.0 def main(script, *args): import pprint v = Vertex('v') print v w = Vertex('w') print w e = Edge(v, w) print e g = Graph([v,w], [e]) pprint.pprint(g) print "g.get_edge(v, w): ", g.get_edge(v, w) x = Vertex('x') print "g.get_edge(v, x): ", g.get_edge(v, x) g.remove_edge(e) pprint.pprint(g) print "vertices: ", g.vertices() print "edges: ", g.edges() g.add_edge(e) u = Vertex('u') e1 = Edge(u, v) e2 = Edge(u, w) g.add_vertex(u) g.add_edge(e1) g.add_edge(e2) print "Adding vertex u and edges:" pprint.pprint(g) print "vertices: ", g.vertices() print "edges: ", g.edges() print "Out vertices for v: ", g.out_vertices(v) print "Out edges for v: ", g.out_edges(v) x = Vertex('x') g.add_vertex(x) g.add_all_edges() pprint.pprint(g) print "g is connected?", g.is_connected() edges = g.out_edges(v) for e in edges: g.remove_edge(e) pprint.pprint(g) print "g is connected?", g.is_connected() # Isolate v and check is_connected() again if __name__ == '__main__': import sys main(*sys.argv)