view Graph.py @ 36:305cc03c2750

Chapter 5.5, exercise 6, #3: compute WS clustering coefficient and characteristic length on a BA model graph.
author Brian Neal <bgneal@gmail.com>
date Thu, 10 Jan 2013 19:24:02 -0600
parents 10db8c3a6b83
children
line wrap: on
line source
""" Code example from Complexity and Computation, a book about
exploring complexity science with Python.  Available free from

http://greenteapress.com/complexity

Copyright 2011 Allen B. Downey.
Distributed under the GNU General Public License at gnu.org/licenses/gpl.html.
"""
import heapq
import itertools
from collections import deque, Counter


INFINITY = float('Inf')


class GraphError(Exception):
    """Exception for Graph errors"""
    pass


class Vertex(object):
    """A Vertex is a node in a graph."""

    def __init__(self, label=''):
        self.label = label

    def __repr__(self):
        """Returns a string representation of this object that can
        be evaluated as a Python expression."""
        return 'Vertex(%s)' % repr(self.label)

    __str__ = __repr__
    """The str and repr forms of this object are the same."""


class Edge(tuple):
    """An Edge is a list of two vertices."""

    def __new__(cls, *vs):
        """The Edge constructor takes two vertices."""
        if len(vs) != 2:
            raise ValueError, 'Edges must connect exactly two vertices.'
        return tuple.__new__(cls, vs)

    def __repr__(self):
        """Return a string representation of this object that can
        be evaluated as a Python expression."""
        return 'Edge(%s, %s)' % (repr(self[0]), repr(self[1]))

    __str__ = __repr__
    """The str and repr forms of this object are the same."""


class Graph(dict):
    """A Graph is a dictionary of dictionaries.  The outer
    dictionary maps from a vertex to an inner dictionary.
    The inner dictionary maps from other vertices to edges.

    For vertices a and b, graph[a][b] maps
    to the edge that connects a->b, if it exists."""

    def __init__(self, vs=None, es=None):
        """Creates a new graph.
        vs: list of vertices;
        es: list of edges.
        """
        if vs:
            for v in vs:
                self.add_vertex(v)

        if es:
            for e in es:
                self.add_edge(e)

    def set_edge_length(self, n=1):
        """Give each edge a length of n; this is used by the
        shortest_path_tree() method.

        """
        for e in self.edges():
            e.length = n

    def add_vertex(self, v):
        """Add a vertex to the graph."""
        self[v] = {}

    def add_edge(self, e):
        """Adds and edge to the graph by adding an entry in both directions.

        If there is already an edge connecting these Vertices, the
        new edge replaces it.
        """
        v, w = e
        self[v][w] = e
        self[w][v] = e

    def get_edge(self, v, w):
        """Returns the edge object that exists between the two vertices v & w,
        or None if no such edge exists.

        """
        try:
            return self[v][w]
        except KeyError:
            return None

    def remove_edge(self, e):
        """Removes the edge e from the graph."""

        v, w = e
        del self[v][w]
        del self[w][v]

    def vertices(self):
        """Returns a list of the vertices in the graph."""

        return self.keys()

    def edges(self):
        """"Returns a list of the edges in the graph."""

        edge_set = set()
        for d in self.itervalues():
            edge_set.update(d.itervalues())

        return list(edge_set)

    def out_vertices(self, v):
        """Returns a list of vertices that are adjacent to the given vertex v.

        """
        return self[v].keys()

    def out_edges(self, v):
        """Returns a list of edges connected to a given vertex v."""

        return self[v].values()

    def remove_all_edges(self):
        """Removes all edges in the graph."""
        for v in self.iterkeys():
            self[v] = {}

    def add_all_edges(self):
        """Makes the graph complete by adding edges between all pairs of
        vertices.

        """
        # Clear all edges first
        self.remove_all_edges()

        # For each combination of 2 vertices, create an edge between them:
        for v, w in itertools.combinations(self.iterkeys(), 2):
            self.add_edge(Edge(v, w))

    def add_regular_edges(self, k):
        """Makes the graph regular by making every vertex have k edges.

        It is not always possible to create a regular graph with a given degree.
        If a graph has n vertices, then a regular graph can be constructed with
        degree k if n >= k + 1 and n * k is even. If these conditions are not
        met a GraphError exception is raised.

        """
        n = len(self.vertices())
        if n < k + 1:
            raise GraphError("Can't make a regular graph with degree >= number"
                             " of vertices")
        if (n * k) % 2 != 0:
            raise GraphError("Can't make a regular graph of degree k and"
                             " order n where k * n is odd")

        # Remove all edges first
        self.remove_all_edges()

        if k % 2 != 0:      # if k is odd
            self._add_regular_edges_even(k - 1)
            self._add_regular_edges_odd()
        else:
            self._add_regular_edges_even(k)

    def _add_regular_edges_even(self, k):
        """Make a regular graph with degree k. k must be even."""

        vs = self.vertices()
        vs2 = vs * 2

        for i, v in enumerate(vs):
            for j in range(1, k / 2 + 1):
                w = vs2[i + j]
                self.add_edge(Edge(v, w))

    def _add_regular_edges_odd(self):
        """Adds an extra edge across the graph to finish off a regular graph
        with odd degree. The number of vertices must be even.

        """
        vs = self.vertices()
        vs2 = vs * 2
        n = len(vs)

        for i in range(n / 2):
            v = vs2[i]
            w = vs2[i + n / 2]
            self.add_edge(Edge(v, w))

    def bfs(self, start, visit_func=None):
        """Perform a breadth first search starting at node start.

        The function visit_func, if supplied, is invoked on each node.

        The set of visited nodes is returned.

        """
        visited = set()

        # Create a work queue consisting initially of the starting node
        queue = deque([start])

        while queue:
            # retrieve first item from the queue
            v = queue.popleft()

            if v in visited:
                continue            # Skip this one if we've seen it before

            # Mark it as visited and invoke user's function on it
            visited.add(v)
            if visit_func:
                visit_func(v)

            # Add the adjacent neigbors to the node to the queue
            queue.extend(c for c in self.out_vertices(v) if c not in visited)

        return visited

    def is_connected(self):
        """Returns True if the graph is connected (there is a path from every
        node to every other node) and False otherwise.

        """
        vs = self.vertices()
        if len(vs):
            visited = self.bfs(vs[0])
            # See if all nodes have been visited
            return len(vs) == len(visited)

        return False        # Graph is empty

    def get_p(self):
        """This method returns a dictionary of probabilities where each key is
        the connectivity k and the value is the probability [0-1] for this
        graph.

        """
        # First, for each vertex, count up how many neighbors it has
        vs = self.vertices()

        c = Counter()
        for v in vs:
            n = len(self.out_vertices(v))
            c[n] += 1

        n = len(vs)
        if n > 0:
            for k in c:
                c[k] = float(c[k]) / n

        return c

    def clustering_coefficient(self):
        """Compute the clustering coefficient for this graph as defined by Watts
        and Strogatz.

        """
        cv = {}
        for v in self:
            # consider a node and its neighbors
            nodes = self.out_vertices(v)
            nodes.append(v)

            # compute the maximum number of possible edges between these nodes
            # if they were all connected to each other:
            n = len(nodes)
            if n == 1:
                # edge case of only 1 node; handle this case to avoid division
                # by zero in the general case
                cv[v] = 1.0
                continue

            possible = n * (n - 1) / 2.0

            # now compute how many edges actually exist between the nodes
            actual = 0
            for x, y in itertools.combinations(nodes, 2):
                if self.get_edge(x, y):
                    actual += 1

            # the fraction of actual / possible is this nodes C sub v value
            cv[v] = actual / possible

        # The clustering coefficient is the average of all C sub v values
        if len(cv):
            return sum(cv.values()) / float(len(cv))
        return 0.0

    def shortest_path_tree(self, source, hint=None):
        """Finds the length of the shortest path from the source vertex to all
        other vertices in the graph. This length is stored on the vertices as an
        attribute named 'dist'. The algorithm used is Dijkstra's.

        hint: if provided, must be a dictionary mapping tuples to already known
        shortest path distances. This can be used to speed up the algorithm.

        """
        if not hint:
            hint = {}

        for v in self.vertices():
            v.dist = hint.get((source, v), INFINITY)
        source.dist = 0

        queue = [v for v in self.vertices() if v.dist < INFINITY]
        sort_flag = True
        while len(queue):

            if sort_flag:
                queue.sort(key=lambda v: v.dist)
                sort_flag = False

            v = queue.pop(0)

            # for each neighbor of v, see if we found a new shortest path
            for w, e in self[v].iteritems():
                d = v.dist + e.length
                if d < w.dist:
                    w.dist = d
                    queue.append(w)
                    sort_flag = True

    def shortest_path_tree2(self, source):
        """Finds the length of the shortest path from the source vertex to all
        other vertices in the graph. This length is stored on the vertices as an
        attribute named 'dist'. The algorithm used is Dijkstra's with a Heap
        used to sort/store pending nodes to be examined.

        """
        for v in self.vertices():
            v.dist = INFINITY
        source.dist = 0

        queue = []
        heapq.heappush(queue, (0, source))
        while len(queue):

            _, v = heapq.heappop(queue)

            # for each neighbor of v, see if we found a new shortest path
            for w, e in self[v].iteritems():
                d = v.dist + e.length
                if d < w.dist:
                    w.dist = d
                    heapq.heappush(queue, (d, w))

    def all_pairs_floyd_warshall(self):
        """Finds the shortest paths between all pairs of vertices using the
        Floyd-Warshall algorithm.

        http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm

        """
        vertices = self.vertices()
        dist = {}
        for i in vertices:
            for j in vertices:
                if i is j:
                    dist[i, j] = 0.0
                else:
                    e = self.get_edge(i, j)
                    dist[i, j] = e.length if e else INFINITY

        for k in vertices:
            for i in vertices:
                for j in vertices:
                    d_ik = dist[i, k]
                    d_kj = dist[k, j]
                    new_cost = d_ik + d_kj
                    if new_cost < dist[i, j]:
                        dist[i, j] = new_cost

        return dist

    def big_l(self):
        """Computes the "big-L" value for the graph as per Watts & Strogatz.

        L is defined as the number of edges in the shortest path between
        two vertices, averaged over all vertices.

        Uses the shortest_path_tree() method, called once for every node.

        """
        d = {}
        for v in self.vertices():
            self.shortest_path_tree(v, d)
            t = [((w, v), w.dist) for w in self.vertices() if v is not w]
            d.update(t)

        if len(d):
            return sum(d.values()) / float(len(d))
        return 0.0

    def big_l2(self):
        """Computes the "big-L" value for the graph as per Watts & Strogatz.

        L is defined as the number of edges in the shortest path between
        two vertices, averaged over all vertices.

        Uses the all_pairs_floyd_warshall() method.

        """
        dist = self.all_pairs_floyd_warshall()
        vertices = self.vertices()
        result = [dist[i, j] for i in vertices for j in vertices if i is not j]

        if len(result):
            return sum(result) / float(len(result))
        return 0.0

    def big_l3(self):
        """Computes the "big-L" value for the graph as per Watts & Strogatz.

        L is defined as the number of edges in the shortest path between
        two vertices, averaged over all vertices.

        Uses the shortest_path_tree2() method, called once for every node.

        """
        d = {}
        for v in self.vertices():
            self.shortest_path_tree2(v)
            t = [((v, w), w.dist) for w in self.vertices() if v is not w]
            d.update(t)

        if len(d):
            return sum(d.values()) / float(len(d))
        return 0.0


def main(script, *args):
    import pprint

    v = Vertex('v')
    print v
    w = Vertex('w')
    print w
    e = Edge(v, w)
    print e
    g = Graph([v,w], [e])
    pprint.pprint(g)

    print "g.get_edge(v, w): ", g.get_edge(v, w)
    x = Vertex('x')
    print "g.get_edge(v, x): ", g.get_edge(v, x)

    g.remove_edge(e)
    pprint.pprint(g)

    print "vertices: ", g.vertices()
    print "edges: ", g.edges()

    g.add_edge(e)
    u = Vertex('u')
    e1 = Edge(u, v)
    e2 = Edge(u, w)
    g.add_vertex(u)
    g.add_edge(e1)
    g.add_edge(e2)
    print "Adding vertex u and edges:"
    pprint.pprint(g)
    print "vertices: ", g.vertices()
    print "edges: ", g.edges()

    print "Out vertices for v: ", g.out_vertices(v)
    print "Out edges for v: ", g.out_edges(v)

    x = Vertex('x')
    g.add_vertex(x)
    g.add_all_edges()
    pprint.pprint(g)

    print "g is connected?", g.is_connected()
    edges = g.out_edges(v)
    for e in edges:
        g.remove_edge(e)
    pprint.pprint(g)
    print "g is connected?", g.is_connected()

    # Isolate v and check is_connected() again


if __name__ == '__main__':
    import sys
    main(*sys.argv)