{-
The HuffmanTree class, which implements a Huffman tree algorithm for
the compression of text. The algorithm comes from:

http://q-lang.sourceforge.net/examples/huffman.q It was translated in dodo to illustrate functional style programming. Usage: def message = "Alice in Wonderland" def H = HuffmanTree.newWithWeights(weights(message)) def code = H.encode(message) console!out.Printf("%d bits: %s", code.count, H.decode(code)) Original header: /* huffman.q: Huffman encoding trees */ /* written by Albert Graef, 05-08-1993 (see also Abelson/Sussman: Structure and Interpretation of Computer Programs, MIT Press, 1985) revised 11-26-1993, 3-27-1997, 12-19-2000, 03-02-2002, 01-30-04 AG */ /* NOTE: Alphabets should consist of two symbols at least. Otherwise the single symbol will be encoded as the empty list, which leads to infinite recursion when decoded. This could be considered a bug. ;-) For instance, try the following: ==> def message = chars "Alice in Wonderland" ==> def H = huffman_tree (weights message) ==> def code = encode H message ==> strcat (decode H code) A more amusing example is perhaps the following exercise from Abelson/Sussman 1985, p. 125: "The following eight-symbol alphabet with associated relative frequencies was designed to efficiently encode the lyrics of 1950s rock songs. (Note that the "symbols" of an "alphabet" need not be individual letters.) A 2 NA 16 BOOM 1 SHA 3 GET 2 YIP 10 JOB 2 WAH 1 Generate a corresponding Huffman tree, and use it to encode the following message: Get a job Sha na na na na na na na na Get a job Sha na na na na na na na na Wah yip yip yip yip yip yip yip yip Sha boom How many bits are required for the encoding? What is the smallest number of bits that would be needed to encode this song if we used a fixed-length code for the eight-symbol alphabet?" */ -}
__Module__ huffmantree __Header__ class HuffmanTree
{
# The decoding function. It takes as its arguments a # list of zeros and ones, and reconstructs the original message. String decode(flag[])

# The encoding function. It takes as its arguments a # list of symbols, and returns the coded message. flag[] encode(char[])

# Construct a Huffman tree, starting from a list of # (symbol,weight) pairs. Class HuffmanTree.newWithWeights(XW[])
}

struct XW

# Determine the (symbol,weight) pairs for a particular message XW[] weights(char[])

__Main__ {-
Huffman encoding trees are represented as binary trees whose leaves
(represented by the tip symbol) carry individual symbols and their
weights (frequencies), and whose interior nodes (represented with the bin
symbol) store the sets of symbols (represented as lists) found in the
corresponding subtrees, together with the corresponding weights (which are
the sums of the subtree weights). The nil symbol denotes an empty tree.
-}
HuffmanTree nil

HuffmanTree[] leafSet(HW[] xws) =
addTree(`pick 1 in HTtip.newWithXW(`all xws`)`, `first []`)

HuffmanTree[] addTree(HuffmanTree h1, HuffmanTree[] hs) =
if (hs.null) [h1] #hs is empty else hs[1] -> h2
if (h1.weight > h2.weight) [h2] + addTree(h1, hs[2+])
else [h1] + hs;

class HuffmanTree
{
char[] syms() #empty list int weight() #zero String decode(flag[] bs) = decode1(self, bs)
flag[] encode(char[] s) = encode1(`pick 1 in s`) + `first []`
Class HuffmanTree.newWithWeights(XW[] xws) =
newWithLeafSet(leafSet(xws))

String decode1(HuffmanTree h, flag[] bs) =
if (bs.null) "" #bs is empty else throw(IllegalArgument.newWithMessage( "Too many bits"))
flag[] encode1(char x) =
throw(IllegalArgument.newWithMessage(
"Cannot encode with empty tree"))
Class HuffmanTree.newWithLeafSet(HuffmanTree[] hs) = (hs ~
match [] ->
nil
| match [$h] ->
h
| match [$h1, $h2, $hs*] ->
newWithLeafSet(
addTree(
HTbin.newWithLeftRight(h1, h2),
hs)))
}

def HTtip = new HuffmanTree()
{
char x
int w

char[] ^syms() = [x] int ^weight() = w
String ^decode(flag[] bs) =
if (bs.null) x #fix the infinite recursion else throw(IllegalArgument.newWithMessage( "Too many bits"))
Class HTtip.newWithXW(XW xw) =
new instance(x: xw.x, w: xw.w)

String ^decode1(HuffmanTree h, flag[] bs) =
x + h.decode(bs)
flag[] ^encode1(char x) #empty list } def HTbin = new HuffmanTree()
{
char[] xs
int w
HuffmanTree h1, h2

char[] ^syms() = xs
int ^weight() = w
Class HTbin.newWithLeftRight(HuffmanTree h1, h2) =
new instance(
xs: h1.syms + h2.syms,
w: h1.weight + h2.weight,
h1: h1,
h2: h2)

String ^decode1(HuffmanTree h, flag[] bs) =
if (bs[1] = 0) h1.decode1(h, bs[2...])
else h2.decode1(h, bs[2...])
flag[] ^encode1(char x) =
if (h1.syms.contains(x)) [0] + h1.encode1(x)
else if (h2.syms.contains(x)) [1] + h2.encode1(x)
else throw(IllegalArgument.newWithMessage(
"Unknown symbol: " + x))
}

struct XW: char x; int w.

XW[] weights(char[] xs) =
addWeight(`pick 1 in xs.sort`, `first []`)

XW[] addWeights(char x, XW[] xws) =
if (xws.null) [new XW.instance(x: x, w: 1)] else xws[1] -> xw
if (x = xw.x) [new XW.instance(x: x, w: ++xw.w)] + xws[2...] else [new XW.instance(x: x, w: 1), xw] + xws[2...];