C ALGORITHM 673, COLLECTED ALGORITHMS FROM ACM.
C THIS WORK PUBLISHED IN TRANSACTIONS ON MATHEMATICAL SOFTWARE,
C VOL. 15, NO. 2, PP. 158-167.
program DynamicHuffman (input, output);
{ Written by J. S. Vitter in Berkeley Pascal and run on a VAX 780 running
Berkeley UNIX 4.3BSD. This Pascal program implements Algorithm Lambda,
described and analyzed in the article by J. S. Vitter entitled
"Design and Analysis of Dynamic Huffman Codes," JACM, Vol. 34, No. 4,
October 1987. There are a couple things to note:
1. The output is done bit by bit using individual write statements,
which typically output one byte rather than a bit. So the "compressed"
file is typically much larger than the original file, because
each bit is written as a byte. This was left
this way because it makes it easier for the implementor to check
the algorithm during implementation. A packing routine for output
can be added later with an easy modification.
2. When the program is executed,
the user is prompted for the following information (in that order):
a. whether encoding or decoding is desired (reply "en" or "de").
b. which alphabet to use (reply "keys" for size 96, "ascii" for 256).
c. input file name
d. output file name
Each reply is followed by a carraige return.
The "keys" alphabet includes the printable ASCII characters, plus
the newline character. The programmer would have to modify the code
to accomodate arbitrary alphabet sizes. The code for "ascii" alphabet
uses the fact that in Berkely UNIX, characters are represented by
8-bit values in two's complement, in the range from -128 to 127.
Since the program expects alphabet values in the range from 1 to 256,
some conversion is necessary when characters are read in.
This is easy to modify for any particular implementation.
3. Because of a Berkeley Pascal quirk, the program effectively adds
an end-of-line character to the end of the last line of the file
if it does not already have one. The encoded file consists
of a several line of 0s and 1s (the number of 0s and 1s per line
is specified by the value of lineLengthLimit),
terminated with the end-of-line and end-of-file characters.}
const
lineLengthLimit = 78; { Number of 0s and 1s in encoded file per line}
type
str = packed array [1..80] of char;
SmallArray = array[1..256] of integer;
LargeArray = array[1..512] of integer;
LargeLongArray = array[1..512] of integer;
AlphabetChoices = (keys, ascii);
RoleChoices = (encode, decode);
var
lineLength : integer;
EOLNcode : integer;
response : char;
theRole : RoleChoices;
theAlphabet : AlphabetChoices;
outputfilename, inputfilename : str;
n : integer;
inp, out : text;
stack, alpha, rep : SmallArray;
M, E, R : integer;
availBlock, Z : integer;
parent, rtChild, parity, block, prevBlock, nextBlock,
first, last : LargeArray;
weight : LargeLongArray;
function FindChild (j, parity : integer) : integer;
var delta, right, gap : integer;
begin
delta := 2 * (first[block[j]] - j) + 1 - parity;
right := rtChild[block[j]]; gap := right - last[block[right]];
if delta <= gap then FindChild := right - delta
else begin
delta := delta - gap - 1;
right := first[prevBlock[block[right]]]; gap := right - last[block[right]];
if delta <= gap then FindChild := right - delta
else FindChild := first[prevBlock[block[right]]] - delta + gap + 1
end;
end;
function Receive : integer;
var v : char;
begin
if eoln(inp) then
begin
Receive := EOLNcode;
readln(inp)
end
else
begin
read(inp, v);
if theAlphabet = keys then Receive := ord(v) - ord(' ') + 2
else Receive := ord(v) + 129
end
end;
function BitReceive : integer;
var c : char;
begin
lineLength := lineLength + 1;
read(inp, c);
BitReceive := ord(c) - ord('0');
if lineLength = lineLengthLimit then
begin readln(inp); lineLength:= 0 end
end;
procedure EncodeAndTransmit (k : integer);
var i, ii, q, t, root : integer;
begin
q := rep[k]; i := 0;
if q <= M then begin { Encode letter of zero weight }
q := q - 1;
if q < 2 * R then t := E + 1
else begin q := q - R; t := E end;
for ii := 1 to t do begin
i := i + 1; stack[i] := q mod 2;
q := q div 2
end;
q := M;
end;
if M = n then root := n else root := Z;
while q <> root do begin { Traverse up the tree }
i := i + 1; stack[i] := (first[block[q]] - q + parity[block[q]]) mod 2;
q := parent[block[q]] - (first[block[q]] - q + 1 - parity[block[q]]) div 2
end;
{ This version of the algorithm outputs each bit in a simple-minded way,
which in most implentations uses one byte to store each bit. This can
be easily fixed for any given implementation. We have it as is so that
the implementor can more easily interpret the codes by hand in case there
are any problems during implementation. Later, when everything is verified,
a more efficient output routine that packs the bits together
can be used instead. One thing our code does do, however, is limit the
length of each line in the encoded file to some prespecified number of
zeros and ones, in case long lines cause a problem for your
particular Pascal implementation. These extra end-of-line markers
are ignored during decoding. The const lineLengthLimit specifies
the limit; if it is a negative number no extra end-of-line markers
are used and this feature is turned off. }
for ii := i downto 1 do begin
lineLength := lineLength + 1;
write(out,stack[ii]:1);
if lineLength = lineLengthLimit then
begin writeln(out); lineLength:= 0 end;
end
end;
function ReceiveAndDecode : integer;
var j, q : integer;
begin
if M = n then q := n else q := Z; { Set |q| to the root node }
while q > n do { Traverse down the tree }
q := FindChild(q, BitReceive);
if q = M then begin { Decode 0-node }
q := 0;
for j := 1 to E do q := 2 * q + BitReceive;
if q < R then q := 2 * q + BitReceive
else q := q + R;
q := q + 1
end;
ReceiveAndDecode := alpha[q]
end;
procedure InterchangeLeaves (e1, e2 : integer);
var temp : integer;
begin
rep[alpha[e1]] := e2; rep[alpha[e2]] := e1;
temp := alpha[e1]; alpha[e1] := alpha[e2]; alpha[e2] := temp
end;
procedure Update (k : integer);
var
q, leafToIncrement, bq, b, oldParent, oldParity, nbq, par, bpar : integer;
slide : boolean;
procedure FindNode;
begin
q := rep[k]; leafToIncrement := 0;
if q <= M then begin { A zero weight becomes positive }
InterchangeLeaves(q, M);
if R = 0 then begin
R := M div 2;
if R > 0 then E := E - 1
end;
M := M - 1; R := R - 1; q := M + 1; bq := block[q];
if M > 0 then begin
{ New 0-node is node |M|; old 0-node is node |M + 1|;
new parent of nodes |M| and |M + 1| is node |M + n| }
block[M] := bq; last[bq] := M; oldParent := parent[bq];
parent[bq] := M + n; parity[bq] := 1;
{ Create new internal block of zero weight for node |M + n| }
b := availBlock; availBlock := nextBlock[availBlock];
prevBlock[b] := bq; nextBlock[b] := nextBlock[bq];
prevBlock[nextBlock[bq]] := b; nextBlock[bq] := b;
parent[b] := oldParent; parity[b] := 0; rtChild[b] := q;
block[M + n] := b; weight[b] := 0;
first[b] := M + n; last[b] := M + n;
leafToIncrement := q; q := M + n
end
end
else begin { Interchange |q| with the first node in |q|'s block }
InterchangeLeaves(q, first[block[q]]);
q := first[block[q]];
if (q = M + 1) and (M > 0) then
begin leafToIncrement := q; q := parent[block[q]] end
end
end;
procedure SlideAndIncrement;
begin { |q| is currently the first node in its block }
bq := block[q]; nbq := nextBlock[bq];
par := parent[bq]; oldParent := par; oldParity := parity[bq];
if ((q <= n) and (first[nbq] > n) and (weight[nbq] = weight[bq]))
or ((q > n) and (first[nbq] <= n) and (weight[nbq] = weight[bq] + 1)) then
begin { Slide |q| over the next block }
slide := true;
oldParent := parent[nbq]; oldParity := parity[nbq];
{ Adjust child pointers for next-higher level in tree }
if par > 0 then begin
bpar := block[par];
if rtChild[bpar] = q then rtChild[bpar] := last[nbq]
else if rtChild[bpar] = first[nbq] then rtChild[bpar] := q
else rtChild[bpar] := rtChild[bpar] + 1;
if par <> Z then
if block[par + 1] <> bpar then
if rtChild[block[par + 1]] = first[nbq] then
rtChild[block[par + 1]] := q
else if block[rtChild[block[par + 1]]] = nbq then
rtChild[block[par + 1]] := rtChild[block[par + 1]] + 1
end;
{ Adjust parent pointers for block |nbq| }
parent[nbq] := parent[nbq] -1 +parity[nbq]; parity[nbq] := 1 -parity[nbq];
nbq := nextBlock[nbq];
end
else slide := false;
if (((q <= n) and (first[nbq] <= n)) or ((q > n) and (first[nbq] > n)))
and (weight[nbq] = weight[bq] + 1) then
begin { Merge |q| into the block of weight one higher }
block[q] := nbq; last[nbq] := q;
if last[bq] = q then begin { |q|'s old block disappears }
nextBlock[prevBlock[bq]] := nextBlock[bq];
prevBlock[nextBlock[bq]] := prevBlock[bq];
nextBlock[bq] := availBlock; availBlock := bq
end
else begin
if q > n then rtChild[bq] := FindChild(q - 1, 1);
if parity[bq] = 0 then parent[bq] := parent[bq] - 1;
parity[bq] := 1 - parity[bq];
first[bq] := q - 1
end
end
else if last[bq] = q then begin
if slide then begin { |q|'s block is slid forward in the block list }
prevBlock[nextBlock[bq]] := prevBlock[bq];
nextBlock[prevBlock[bq]] := nextBlock[bq];
prevBlock[bq] := prevBlock[nbq]; nextBlock[bq] := nbq;
prevBlock[nbq] := bq; nextBlock[prevBlock[bq]] := bq;
parent[bq] := oldParent; parity[bq] := oldParity
end;
weight[bq] := weight[bq] + 1;
end
else begin { A new block is created for |q| }
b := availBlock; availBlock := nextBlock[availBlock];
block[q] := b; first[b] := q; last[b] := q;
if q > n then begin
rtChild[b] := rtChild[bq];
rtChild[bq] := FindChild(q - 1, 1);
if rtChild[b] = q - 1 then parent[bq] := q
else if parity[bq] = 0 then parent[bq] := parent[bq] - 1;
end
else if parity[bq] = 0 then parent[bq] := parent[bq] - 1;
first[bq] := q - 1; parity[bq] := 1 - parity[bq];
{ Insert |q|'s block in its proper place in the block list }
prevBlock[b] := prevBlock[nbq]; nextBlock[b] := nbq;
prevBlock[nbq] := b; nextBlock[prevBlock[b]] := b;
weight[b] := weight[bq] + 1;
parent[b] := oldParent; parity[b] := oldParity
end;
{ Move |q| one level higher in the tree }
if q <= n then q := oldParent else q := par
end; { SlideAndIncrement }
begin { Update }
{ Set |q| to the node whose weight should increase }
FindNode;
while q > 0 do
{ At this point, |q| is the first node in its block. Increment
|q|'s weight by 1 and slide if necessary to maintain invariant (*) }
SlideAndIncrement;
{ Finish up some special cases involving the 0-node }
if leafToIncrement <> 0 then begin q := leafToIncrement; SlideAndIncrement end
end;
procedure Initialize;
var i : integer;
begin
M := 0; E := 0; R := -1; Z := 2 * n - 1;
for i := 1 to n do begin
M := M + 1; R := R + 1;
if 2 * R = M then begin E := E + 1; R := 0 end;
alpha[i] := i; rep[i] := i
end;
{ Initialize node |n| as the 0-node }
block[n] := 1; prevBlock[1] := 1; nextBlock[1] := 1; weight[1] := 0;
first[1] := n; last[1] := n; parity[1] := 0; parent[1] := 0;
{ Initialize available block list }
availBlock := 2;
for i := availBlock to Z - 1 do
nextBlock[i] := i + 1;
nextBlock[Z] := 0;
end;
procedure ReadString ( var s : str);
var i : integer;
begin
i := 0;
while not eoln do begin i := i + 1; read(s[i]) end;
readln;
while i < 80 do begin i := i + 1; s[i] := ' ' end
end;
procedure GetOptions;
begin
writeln('Enter type of coding desired (encode, decode):');
readln(response);
if response = 'e' then theRole := encode else theRole := decode;
writeln('Enter alphabet model (keys, ascii):');
readln(response);
if response = 'k' then theAlphabet := keys else theAlphabet := ascii;
case theAlphabet of
keys : { Alphabet size = 96 (printable ASCII characters, plus linefeed) }
begin n := 96; EOLNcode := 1 end;
ascii : { Alphabet size = 256. Note that 139 is equal to 129 +
(code for UNIX linefeed character in two's complement) }
begin n := 256; EOLNcode := 139 end;
end;
writeln('Enter the input filename: ');
ReadString(inputfilename);
reset(inp, inputfilename);
writeln('Enter the output filename: ');
ReadString(outputfilename);
rewrite(out, outputfilename);
end;
procedure PutName (val : integer);
begin
if val = EOLNcode then writeln(out)
else if theAlphabet = keys then write(out, chr(val + ord(' ') - 2))
else write(out, chr(val - 129));
end;
procedure Runit ;
var c : integer;
begin
lineLength := 0;
if theRole = encode then
begin
while not eof(inp) do begin
c := Receive;
EncodeAndTransmit(c);
Update(c);
end;
writeln(out);
end
else if not eof(inp) then
while not eoln(inp) do
begin
c := ReceiveAndDecode;
PutName(c);
Update(c);
end;
end;
begin { Mainline }
GetOptions;
Initialize;
Runit;
end.