view ch4ex2.py @ 42:039249efe42f

Chapter 6, exercise 2, #4. Wrote a program to output the center column of a rule 30 CA as a stream of bytes. It is very slow though. It has to run a very long time to produce enough data for dieharder. Committing it now but will have to let it run overnight or something to generate a large file.
author Brian Neal <bgneal@gmail.com>
date Sun, 13 Jan 2013 16:24:00 -0600
parents 84e183b40c63
children
line wrap: on
line source
"""Section 4.2, exercise 2.

"The following implementation of a BFS contains two performance errors. What are
they? What is the actual order of growth for this algorithm?

def bfs(top_node, visit):
    'Breadth-first search on a graph, starting at top_node.'
    visited = set()
    queue = [top_node]
    while len(queue):
        curr_node = queue.pop(0)    # Dequeue
        visit(curr_node)            # Visit the node
        visited.add(curr_node)

        # Enqueue non-visited and non-enqueued children
        queue.extend(c for c in curr_node.children
                     if c not in visited and c not in queue)

Test this code with a range of graph sizes and check your analysis. Then use
a FIFO implementation to fix the errors and confirm that your algorithm is
linear."

----

I think the 2 performance errors are:
    1) Using a list as the queue, and popping element 0.
    2) Doing a linear search to see if the child node is already in the queue.

To characterize this bfs, I'll inherit from my Graph class and add
a flawed_bfs() method which implements the above. I'll then plot both the
flawed_bfs and the bfs method I wrote earlier.

This program tests complete graphs. The flawed_bfs is plotted along with a curve
of slope 3, which fits well, and shows that this search is O(|V|^3). Next, the bfs
method is plotted with a curve of slope 2, which fits well. This shows that bfs
is quadratic, or O(|V|^2), on a complete graph, which is what we expect.

"""
import timeit
import matplotlib.pyplot as pyplot

from Graph import Graph, Vertex


class FlawedGraph(Graph):

    def flawed_bfs(self, start, visit=None):
        """A breadth-first search with performance problems."""
        visited = set()
        queue = [start]
        while len(queue):
            curr_node = queue.pop(0)
            if visit:
                visit(curr_node)
            visited.add(curr_node)

            queue.extend(c for c in self.out_vertices(curr_node)
                            if c not in visited and c not in queue)

GRAPH = None
METHOD = None
V = None


def test_graph_search():

    if METHOD == 'flawed_bfs':
        GRAPH.flawed_bfs(V)
    else:
        GRAPH.bfs(V)


def test_func():

    elapsed = timeit.timeit('test_graph_search()',
        setup='from __main__ import test_graph_search', number=1)
    return elapsed


def test(method):
    """Tests the given function with a range of values for n.

    Returns a list of ns and a list of run times.
    """
    global GRAPH, METHOD, V

    METHOD = method
    factor = 10

    # test (f) over a range of values for (n)
    ns = []
    ts = []
    for i in range(10, 50):
        n = factor * i

        GRAPH = FlawedGraph()
        V = None
        # Add n vertices and connect them
        for i in range(n):
            v = Vertex()
            if not V:
                V = v
            GRAPH.add_vertex(v)

        GRAPH.add_all_edges()   # make g a complete graph

        t = test_func()

        print n, t
        ns.append(n)
        ts.append(t)

    return ns, ts


def plot(ns, ts, label, color='blue', exp=1.0):
    """Plots data and a fitted curve.

    ns: sequence of n (problem size)
    ts: sequence of t (run time)
    label: string label for the data curve
    color: string color for the data curve
    exp: exponent (slope) for the fitted curve
    """
    tfit = fit(ns, ts, exp)
    pyplot.plot(ns, tfit, color='0.7', linewidth=2, linestyle='dashed')
    pyplot.plot(ns, ts, label=label, color=color, linewidth=3)


def fit(ns, ts, exp=1.0, index=-1):
    """Fits a curve with the given exponent.

    Use the given index as a reference point, and scale all other
    points accordingly.
    """
    nref = ns[index]
    tref = ts[index]

    tfit = []
    for n in ns:
        ratio = float(n) / nref
        t = ratio**exp * tref
        tfit.append(t)

    return tfit


def make_fig(funcs):
    pyplot.clf()
    pyplot.xscale('log')
    pyplot.yscale('log')
    pyplot.title('')
    pyplot.xlabel('n')
    pyplot.ylabel('run time (s)')

    colors = ['red', 'green']

    for func, color in zip(funcs, colors):
        data = test(func)
        exp = 3.0 if func == 'flawed_bfs' else 2.0
        plot(*data, label=func, color=color, exp=exp)

    pyplot.legend(loc=4)

    pyplot.show()


def main(script):
    make_fig(['flawed_bfs', 'bfs'])


if __name__ == '__main__':
    import sys
    main(*sys.argv)