#!/usr/bin/env python
"""
Lempel-Ziv code for compression

This simple version of Lempel-Ziv does no dictionary management and
is inefficient in several ways. See MacKay (2003) Chapter 6. page 119

http://www.aims.ac.za/~mackay/itila/

  LZ.py is free software (c) David MacKay December 2005. License: GPL
"""
## For license statement see  http://www.gnu.org/copyleft/gpl.html

from IntegerCodes import *

verbose=0
class node:
    def __init__(self,substring,pointer):
        self.child = []
        self.substring = substring
        self.pointer = pointer
        if verbose:
            print pointer, " is pointer for ", `substring`
        pass

def find(f, seq):
    """Return first item in sequence where f(item) == True."""
    for item in seq:
        if f(item): 
            return item
    return None

def encode ( c, pretty=1 ): ## c is a list of characters (0/1) ; p is whether to print out prettily
    """
    Encode using Lempel-Ziv, as in MacKay (2003) Chapter 6. page 119
    Pretty printing
    >>> print encode(list("000000000000100000000000"),1)
    (,0)(1,0)(10,0)(11,0)(010,1)(100,0)(110,0)

    Normal printing
    >>> print encode(list("000000000000100000000000"),0)
    010100110010110001100
    """
    output =[]
    
    #initialize dictionary
    dic = [] 
    dic.append( node("",0) )
    
    while(len(c)>0): # point G
        substring = "";         latest = -1
        # read bits from c until we DON'T get a match with the dictionary
        while(len(c)>=0):
            ans = find( lambda p: p.substring == substring , dic )
            if ((ans == None) or (len(c)==0)):
                assert latest != -1 ## we should have gone round this loop once already
                # print out prevanswer's pointer and latest bit
                digits = ceillog ( len(dic) )
                output.append( printout( dec_to_bin( prevanswer.pointer , digits ) , latest ,pretty) )
                # append new string to dictionary
                dic.append( node(substring, len(dic) ) )
                break # go back to G
            else:
                prevanswer = ans
                latest=c.pop(0)
                substring = substring+latest
            pass
        pass
    return "".join(output)

def printout( pointerstring, latest , pretty=1):
    if pretty:
        return "("+pointerstring+","+latest+")"
    else:
        return pointerstring+latest

def decode( c ):
    """
    >>> print decode(list("100011101100001000010"))
    1011010100010
    """
    output = [] 
    #initialize dictionary
    dic = [] 
    dic.append( node("",0) )
    while(len(c)>0):
        digits = ceillog ( len(dic) )
        pointer = bin_to_dec( c , digits )
        # find the dictionary entry with that pointer
        ans = find( lambda p: p.pointer == pointer , dic )
        substring = ans.substring
        output.append( substring )
        if (len(c)>0):
            latest=c.pop(0)
            output.append( latest )
            substring = substring+latest
            dic.append( node(substring, len(dic) ) )
            pass
        pass
    return "".join(output)
            
def test():
    print "encoding examples:"
    examples = [ "101101010001000000", "000000000000100000000000" ]            
    for ex in examples :
        print ex, encode( list(ex) )

    print "decoding examples:"
    examples = [ "100011101100001000010"]
    for ex in examples :
        print ex, decode( list(ex) )

    import doctest
    verbose=1
    if(verbose):
        doctest.testmod(None,None,None,True)
    else:
        doctest.testmod()
        pass

if __name__ == '__main__': test()

