{-
    The HuffmanTree class, which implements a Huffman tree algorithm for
    the compression of text. The algorithm comes from:
    
    http://q-lang.sourceforge.net/examples/huffman.q
    
    It was translated in dodo to illustrate functional style programming.
    Usage:
      def message = "Alice in Wonderland"
      def H = HuffmanTree.newWithWeights(weights(message))
      def code = H.encode(message)
      console!out.Printf("%d bits: %s", code.count, H.decode(code))
    
    Original header:
    /* huffman.q: Huffman encoding trees */

    /* written by Albert Graef, 05-08-1993 (see also Abelson/Sussman: Structure
       and Interpretation of Computer Programs, MIT Press, 1985)
       revised 11-26-1993, 3-27-1997, 12-19-2000, 03-02-2002, 01-30-04 AG */

    /* 

    NOTE: Alphabets should consist of two symbols at least. Otherwise the single
    symbol will be encoded as the empty list, which leads to infinite recursion
    when decoded. This could be considered a bug. ;-)

    For instance, try the following:

    ==> def message = chars "Alice in Wonderland"
    ==> def H = huffman_tree (weights message)
    ==> def code = encode H message
    ==> strcat (decode H code)

    A more amusing example is perhaps the following exercise from Abelson/Sussman
    1985, p. 125:

    "The following eight-symbol alphabet with associated relative frequencies
    was designed to efficiently encode the lyrics of 1950s rock songs. (Note
    that the "symbols" of an "alphabet" need not be individual letters.)

    A       2           NA      16
    BOOM    1           SHA     3
    GET     2           YIP     10
    JOB     2           WAH     1

    Generate a corresponding Huffman tree, and use it to encode the following
    message:

    Get a job
    Sha na na na na na na na na
    Get a job
    Sha na na na na na na na na
    Wah yip yip yip yip yip yip yip yip
    Sha boom

    How many bits are required for the encoding? What is the smallest number
    of bits that would be needed to encode this song if we used a fixed-length
    code for the eight-symbol alphabet?"

    */
-}
__Module__ huffmantree
__Header__

    class HuffmanTree
    {
        # The decoding function. It takes as its arguments a
        # list of zeros and ones, and reconstructs the original message.
        String decode(flag[])
        
        # The encoding function. It takes as its arguments a
        # list of symbols, and returns the coded message.
        flag[] encode(char[])
        
        # Construct a Huffman tree, starting from a list of
        # (symbol,weight) pairs.
        Self HuffmanTree.newWithWeights(XW[])
    }

    struct XW
    
    # Determine the (symbol,weight) pairs for a particular message
    XW[] weights(char[])

__Main__
{- 
   Huffman encoding trees are represented as binary trees whose leaves
   (represented by the tip symbol) carry individual symbols and their
   weights (frequencies), and whose interior nodes (represented with the bin
   symbol) store the sets of symbols (represented as lists) found in the
   corresponding subtrees, together with the corresponding weights (which are
   the sums of the subtree weights). The nil symbol denotes an empty tree.
-}

    HuffmanTree nil

    HuffmanTree[] leafSet(HW[] xws) =
        addTree(`from HTtip.newWithXW(`all xws`)`, `to []`)

    HuffmanTree[] addTree(HuffmanTree h1, HuffmanTree[] hs) =
        if (hs.empty) [h1]
        else hs[1] -> h2
            if (h1.weight > h2.weight) [h2] + addTree(h1, hs[2+])
            else                       [h1] + hs;

    class HuffmanTree
    {
        char[] syms() #empty
        int weight()  #zero
        String decode(flag[] bs) = decode1(self, bs)
        flag[] encode(char[] s) = encode1(`from s`) + `to []`
        Self HuffmanTree.newWithWeights(XW[] xws) =
            newWithLeafSet(leafSet(xws))

        String decode1(HuffmanTree h, flag[] bs) =
            if (bs.empty) ""
            else throw(IllegalArgument.newWithMessage(
                "Too many bits"))
        flag[] encode1(char x) =
            throw(IllegalArgument.newWithMessage(
                "Cannot encode with empty tree"))
        Self HuffmanTree.newWithLeafSet(HuffmanTree[] hs) = (hs ~ 
            `match []` ->
                nil
            | `match [$h]` ->
                h
            | `match [$h1, $h2, $hs*]` ->
                newWithLeafSet(
                    addTree(
                        HTbin.newWithLeftRight(h1, h2),
                        hs)))
    }

    def HTtip = new HuffmanTree()
    {
        char x
        int w
    
        char[] syms() = [x]
        int weight() = w
        String decode(flag[] bs) =
            if (bs.empty) x     #fix the infinite recursion
            else throw(IllegalArgument.newWithMessage(
                "Too many bits"))
        Self HTtip.newWithXW(XW xw) =
            new instance(x: xw.x, w: xw.w)
    
        String decode1(HuffmanTree h, flag[] bs) =
            x + h.decode(bs)
        flag[] encode1(char x)  #empty
    }

    def HTbin = new HuffmanTree()
    {
        char[] xs
        int w
        HuffmanTree h1, h2
    
        char[] syms() = xs
        int weight() = w
        Self HTbin.newWithLeftRight(HuffmanTree h1, h2) =
            new instance(
                xs: h1.syms + h2.syms,
                w:  h1.weight + h2.weight,
                h1: h1,
                h2: h2)

        String decode1(HuffmanTree h, flag[] bs) =
            if (bs[1] = 0) h1.decode1(h, bs[2+])
            else           h2.decode1(h, bs[2+])
        flag[] encode1(char x) =
            if (h1.syms.contains(x))      [0b] + h1.encode1(x)
            else if (h2.syms.contains(x)) [1b] + h2.encode1(x)
            else throw(IllegalArgument.newWithMessage(
                "Unknown symbol: " + x))
    }

    struct XW: char x; int w.

    XW[] weights(char[] xs) =
        addWeight(`from xs.sort`, `to []`)

    XW[] addWeights(char x, XW[] xws) =
        if (xws.empty)    [new XW.instance(x: x, w: 1)]
        else xws[1] -> xw
            if (x = xw.x) [new XW.instance(x: x, w: ++xw.w)] + xws[2+]
            else          [new XW.instance(x: x, w: 1), xw] + xws[2+];