I don't know if this is what you are looking for, but I just wrote this code that encodes a text string using Huffman compression.
It handles the single-character case correctly, at least.
There are probably a few bugs in this code, I wrote it in sort of a hurry.
Code:
include <iostream>
#include <map>
#include <algorithm>
#include <fstream>
using namespace std;
void writeByte(char)
{
}
int totalNumberOfBits = 0;
void writeBits(int bit, int numberOfBits )
{
totalNumberOfBits += numberOfBits;
for (int i=0;i<numberOfBits;++i)
cout << bit;
}
struct CharData
{
CharData()
{
freq = ch = 0;
}
unsigned char ch;
int freq;
};
bool freqLess(const CharData& left, const CharData& right)
{
return left.freq > right.freq;
}
void huffmanCompress(unsigned char data[], int length)
{
//Count the frequencies of the different
//characters
CharData chData[256];
for (int c=0; c<length; ++c)
{
chData[data[c]].ch = data[c];
chData[data[c]].freq ++;
}
//Sort the frequencies
std::sort(chData, chData+256, freqLess);
//Create a vector that converts a character
//into the appropiate number of ones
int charToNumber[256];
//Go through the frequency table
int maximumBits = 1;
for (int i = 0; i<256 && chData[i].freq != 0; ++i)
{
//Save in the conversion table
charToNumber[ chData[i].ch ] = i;
if (maximumBits <= i)
maximumBits = i;
//Save to the beginning of the file which
//is the frequency table
writeByte( chData[i].ch );
//DEBUG
cout << "\'" << chData[i].ch << "\' will be represented with " << i << " ones." << endl;
}
//Mark the end of the header
writeByte(0);
writeByte(0);
cout << endl;
//Save the characters
for (int c=0; c<length; ++c)
{
writeBits( 1, charToNumber[data[c]] );
if (charToNumber[data[c]] < maximumBits)
//Write a zero
writeBits(0,1);
cout << " ";
}
}
int main()
{
unsigned char dataFile[10240] = "";
ifstream fin("C:\\Petter\\Text.txt");
int p=0;
while (fin)
dataFile[p++] = fin.get();
p--;
huffmanCompress(dataFile, p);
cout << endl << "Total number of original bytes: " << p;
cout << endl << "Total number of compresssed bytes: " << (totalNumberOfBits>>3);
cin.get();
}
EDIT: Perhaps I'm not implementing the algorithm right (I've never looked at it before). I'm getting
Code:
Total number of original bytes: 9895
Total number of compresssed bytes: 13335
For some larger text I tried to compress. Compresses fine for small amounts, though.