#! /usr/bin/python

from Numeric import *
verbose = 0

#                                        (c) David MacKay  Dec 2005
#  - writes a huffman code               This is Free Software. License: GPL
# usage:
# $        ./huffman.py
# (the counts vector is hard-coded in the program)

# define objects that contain count information (inputs)
class node:
    def __init__(self, count, index , name="" ):
        self.count = count
        self.index = index
        self.name  = name
        if self.name=="" : self.name = index
    def __cmp__(self, other):
        return cmp(self.count, other.count)
    def report(self):
        if verbose: 
            print '%3d (%s) %2.2g' % (self.index,self.name,self.count)
        else:
            print '%s\t(%2.2g)' % (self.name,self.count)

# define objects that contain codewords (outputs)
## I make this inherit from node, but I overwrite the cmp operator
class nodeword(node):
    def __init__(self, simplenode , word ):
        self.index = simplenode.index
        self.count = simplenode.count
        self.name  = simplenode.name  ## can I just say self = simplenode? Apparently not!
        self.word = word
    def __cmp__(self, other):
        return cmp(self.index, other.index)
    def __repr__(self): ## not used
        return self.index
    def __str__(self): ## not used
        return self.index
    def report(self):
        if verbose : 
            print '%3d (%2.2g)\t%s' % (self.index,self.count,self.word)
        else:
            print '%s\t(%2.2g)\t%s' % (self.name,self.count,self.word)

def find(f, seq):
  """Return first item in sequence where f(item) == True."""
  for item in seq:
    if f(item): 
      return item

## recursive function that reads in a count list and returns the codeword list
## The object c is destroyed as we go; the returned list of codewords 
##  should retain copies of all the lost information
def iterate (c , m) : 
## c is the list of values and associated numbers; m is the number of 
## nodes remaining, which must be at least 1.

    c.sort() ## sort the nodes by count, using the __cmp__ function defined in the node class

    if ( m > 1 ) :
        first  = c[0].index ## the index of the smallest node
        second = c[1].index ##       and second smallest
        bot =    c[0].count  ## the count in the bottom

        # MERGE THE BOTTOM TWO
        c[1].count += bot  ## Notice that this merged node retains the name of the bigger leaf.
        deletednode = c[0] ## keep a copy so that we can put it back into the returned answer
        del c[0]

        if verbose :
            # report what has been done
            print "combining",first,second
            for v in c :
                v.report() 

	codewords = iterate ( c , m-1 )
        ## find the codeword that has been split/joined at this step
        ## used trick from http://naeblis.cx/rtomayko/2004/09/13/cleanest-python-find-in-list-function
        co = find(lambda p: p.index == second, codewords)
        thestring = co.word
        co.word += '1'
        co.count -= bot 
        codewords.append( nodeword( deletednode, thestring+'0' ) )  
            
    else :
        ## we have been passed a signle object. We can give it the
        ## zero-length codeword, "" .
        codewords = [ nodeword( c[0] , "" ) ]
        
    return codewords
## that's all for iterate

counts = [ 10, 5, 2, 3 ]
counts = [ 1,2,3,4,5,6,7,8,9,10 ]

c=[]
m=0
for co in counts :
    m += 1
    c.append( node( co, m ) )
## equivalent to, for example:  
## c = [ node( 10 , 1 ) , node( 5 , 2 ) , node( 2 , 3 ) , node( 3 , 4 ) ] 
        
## main program starts here
M = len(c)
if verbose : 
    print  M

if (M <= 1):
    print "too few symbols\n"
    exit(0)

#
# make huffman code
#
answer = iterate ( c , M ) 

# write the answer
answer.sort()
if verbose : print "Done ========= "
for co in answer :
    co.report()
# end


