#!/usr/bin/env python
"""
This is a compression algorithm for compressing
files containing the 4 symbols {a,b,c,d}.
The assumed probabilities are {0.5,0.25,0.125,0.125}.

This example uses the Huffman package to create its Huffman code
and to handle encoding and decoding.

If you run this package from within emacs with C-cC-c,
it runs a test called easytest().

The package can also be used directly from a shell to compress
or uncompress data received via stdin or stdout.

The default behaviour is compression; to get uncompression,
give an additional argument (for example --uncompress).

Usage: at shell prompt - compression first, then uncompression
    $ echo -n "aaaabbcd" > Example.txt
    $ python Example.py              < Example.txt > Example.zip
    $ python Example.py --uncompress < Example.zip > Example.unc
    $ diff Example.unc Example.txt
"""

## /home/mackay/python/compression/huffman/Huffman3.py
## This supplies the huffman algorithm, complete with encoders and decoders:
from Huffman3  import  *
verbose=0

def easytest():
    """
    This example uses the Huffman package to create a Huffman code
    and to encode and decode the list aabacbcd
    
    >>> easytest()            # doctest: +NORMALIZE_WHITESPACE
    #Symbol	Count	Codeword
    a	(0.5)	1
    b	(0.25)	01
    c	(0.12)	000
    d	(0.12)	001
    1101100001000001
    ['a', 'a', 'b', 'a', 'c', 'b', 'c', 'd']
    """
    probs=[('a',0.5), ('b',0.25), ('c',0.125), ('d',0.125)]
    symbols = makenodes(probs) # makes nodes for use when encoding
    root = iterate(symbols) # make huffman code and put it into the symbols' nodes, and return the root of the decoding tree

    symbols.sort(lambda x, y: cmp(x.index, y.index)) # sort by index 
    for co in symbols :                              # and write the answer
        co.report()
        pass

    zipped = encode(['a','a','b','a','c','b','c','d'], symbols)
    print zipped
    answer = decode( zipped, root )
    print answer
    pass


def compress_it( inputfile, outputfile ):
    """
    Usage: at shell prompt - compression first, then uncompression
    $ echo -n "aaaabbcd" > Example.txt
    $ python Example.py              < Example.txt > Example.zip
    $ python Example.py --uncompress < Example.zip > Example.unc
    $ diff Example.unc Example.txt
    """
    probs=[('a',0.5), ('b',0.25), ('c',0.125), ('d',0.125)]
    symbols = makenodes(probs) # makes nodes for use when encoding
    root = iterate(symbols) # make huffman code and put it into the symbols' nodes, and return the root of the decoding tree

    string = inputfile.read()
    outputfile.write( encode(list(string), symbols) )
    pass

def uncompress_it( inputfile, outputfile ):
    """
    Usage: at shell prompt - compression first, then uncompression
    $ echo -n "aaaabbcd" > Example.txt
    $ python Example.py              < Example.txt > Example.zip
    $ python Example.py --uncompress < Example.zip > Example.unc
    $ diff Example.unc Example.txt
    """
    probs=[('a',0.5), ('b',0.25), ('c',0.125), ('d',0.125)]
    symbols = makenodes(probs) # makes nodes for use when encoding
    root = iterate(symbols) # make huffman code and put it into the symbols' nodes, and return the root of the decoding tree

    string = inputfile.read() 
    outputfile.write( ''.join( decode(string, root) ) ) ## string.join(list) means 'join the list using "string" as separator'
    pass

def test():
    easytest()
    import doctest
    verbose=1
    if(verbose):
        doctest.testmod(None,None,None,True)
    else:
        doctest.testmod()
    pass

if __name__ == '__main__':
    import sys
    if sys.argv == [''] : ## probably we have been invoked by C-c C-c in emacs
        test()
        pass
    else : ## read data from stdin and write to stdout
        if (len(sys.argv)==1): ## default behaviour is COMPRESS
            print >> sys.stderr, "Compressing"
            compress_it(sys.stdin,sys.stdout)
        else: ## If any extra arguments are given, then UNCOMPRESS
            print >> sys.stderr,  "UNCompressing"
            uncompress_it(sys.stdin,sys.stdout)
        pass
    pass

    

