/****************************************************************************
 * fbwtenc.cpp                         the "final" combined BWT/MTF encoder *
 ****************************************************************************
 * made by Fabian Giesen aka RYG/Chrome Design                              *
 * for my BWT article in hugi #13                                           *
 *                                                                          *
 * I don't ask you to credit me when using this because you wouldn't use    *
 * THIS version of the encoder                                              *
 ****************************************************************************
 * WARNING: This code assumes that char is unsigned and memcmp compares     *
 *          unsigned chars! If that isn't right for your platform, you have *
 *          to write an own memcmp function and change all chars in this    *
 *          source to unsigned ones!                                        *
 ****************************************************************************/

// This code works different than our two example encoders. It has an
// imaginary "end of buffer" character which is said to be bigger than any
// other byte in the input stream. For binary data, it may be impossible to
// find such a byte, so a different approach is done: We don't compare with
// that byte. If we hit the end of the buffer, we simply say the string which
// hit it first was greater. This enlarges our output data by one byte, as
// this imaginary character must be put (it is represented by '?'). It won't
// be read by the decoder, which also knows about this trick. (I adapted this
// trick from Mark Nelsons code)

// Why I do this? Because doing comparisions always do the end would slow
// down this code rapidly. It doesn't use a quicksort function specially
// suited to this or anything else, so it will be REALLY slow (about 5 mins
// for a 500kb block on my Cyrix P166+). This was also the reason why I
// changed blocksize from 500kb to 100kb. This will hit compression ratio,
// but this code is only meant as example anyway. If you want to see a faster
// implementation, look at bzip2 by Julian Seward, which source is freely
// available.

// Oh, and I output in a format readable by Mark Nelsons decoders. I don't
// do this because I'm not able to create a new one, I do it because this
// helps me much with debugging.

// Also I want you to notice that this code was optimized for readability,
// not for speed.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <iostream.h>

#define BLOCKSIZE  100L*1024L          // blocksize (100k in this example)

typedef unsigned long ulong;           // useful!

char *inbuffer, *outbuffer;            // input/output buffers
unsigned int inbuflen;                 // input buffer length
unsigned int *index;                   // read in the encoder examples about
                                       // the index array trick

int   timer;                           // used to view progress.

char  order[256];                      // the MTF order table
char  revorder[256];                   // and a "reverse" order table, which
                                       // makes MTF encoding faster.

// This is my comparision function, mainly copy-and-pasted from Mark Nelsons
// code.

int compare_func(const int *i1, const int *i2)
{
  char *in1, *in2, *end;

  if (!(++timer & 8191)) cerr << '.';  // show the user we're alive

  if (i1==i2) return 0;                // if strings are same, return 0

  in1=&inbuffer[*i1];                  // just for niceness.
  in2=&inbuffer[*i2];
  end=&inbuffer[inbuflen];

  unsigned int l1=(unsigned int) (end-in1); // compute bytes till imaginary
  unsigned int l2=(unsigned int) (end-in2); // eob character for string 1 & 2

  int result=memcmp(in1, in2, l1<l2?l1:l2); // compare the strings

  if (result==0) return l2-l1; else return result; // return
};

// the MTF initialization function

void initMTF()
{
  for (int i=0; i<256; i++) order[i]=revorder[i]=i;
};

// the MTF encoding "put" function

void putMTF(char value, char *&where)
{
  int i;                               // temporary counter

  i=revorder[value];                   // get code from reverse order table

  *where++=i;                          // write code to buffer

  for (; i>0; i--)                     
  {
    order[i]=order[i-1];               // shift order table
    revorder[order[i]]=i;              // update reverse order table
  };

  order[0]=value;                      // move our character to front
  revorder[value]=0;                   // update reverse order table again
};

// encode a dword using mtf and write it to buffer (little endian byte order)

void putDWord(int what, char *&to)
{
  putMTF(what & 0xff, to); what>>=8;   // should be self-explanatory
  putMTF(what & 0xff, to); what>>=8;
  putMTF(what & 0xff, to); what>>=8;
  putMTF(what & 0xff, to);
};

// the BWT transformer
// This will produce results 100% compatible to Mark Nelson's encoder. This
// makes debugging much easier as I can test the results with his decoders.
// His format isn't bad also.

void transformBWT(char *indata, char *outdata, int inlen)
{
  int i, j;                            // counters
  int primind, lastind;                // primary/eof index

  putDWord(inlen+1, outdata);          // put length+1, as mark nelson does

  if (index) delete [] index;          // if index table exists, delete it
  index=new unsigned int[inlen+1];     // then create us a new one

  for (i=0; i<=inlen; i++) index[i]=i; // initialize index arraay

  qsort(index, inlen+1, sizeof(int),   // then quicksort it
        (int(*)(const void*, const void*)) compare_func);

  for (i=0; i<=inlen; i++)             // write our data
  {
    if (index[i]==1) primind=i;        // set primary ind. when we find it

    if (index[i]==0)                   // handle our imaginary EOF character
    {                                  // specially
      lastind=i;                       // remember its index
      putMTF('?', outdata);            // then put our '?' character
    }
    else
      putMTF(inbuffer[index[i]-1], outdata);  // otherwise just put the code
  };

  putDWord(primind, outdata);          // put primary index
  putDWord(lastind, outdata);          // put eof character index

  // You will notice that this function looks different from the one used in
  // our example encoders. This is because of our imaginary end-of-buffer
  // character handling. So don't worry about it.
};

int main(int argc, char *argv[])
{
  FILE *in, *out;                      // input/output files
  int   block;                         // simple block counter

  cerr << "The \"Final\" BWT/MTF Encoder" << endl;

  if (argc!=3)                         // if wrong parameter count, exit
  {
    cerr << endl << "Use: FBWTENC <Input filename> <Output filename>" << endl;
    return 1;
  };

  in=fopen(argv[1], "rb");             // open input file

  if (!in)                             // if open failed, exit
  {
    cerr << endl << "cannot open input file!" << endl;
    return 1;
  };

  out=fopen(argv[2], "wb");            // open output file

  if (!out)                            // if open failed, exit
  {
    cerr << endl << "cannot open output file!" << endl;
    return 1;
  };

  initMTF();                           // initialize MTF encoder

  inbuffer=new char[BLOCKSIZE];        // allocate our buffers
  outbuffer=new char[BLOCKSIZE+13];

  block=1;                             // block init value

  do                                   // the encoding loop (is easy, so no
  {                                    // comments)
    cerr << "Encoding block " << block++ << "..." << endl;
    timer=0;

    inbuflen=fread(inbuffer, 1, BLOCKSIZE, in);

    transformBWT(inbuffer, outbuffer, inbuflen);

    fwrite(outbuffer, 1, inbuflen+13, out);

    cerr << endl;                      // linefeed (because of our dots)
  } while (inbuflen==BLOCKSIZE);

  delete[] inbuffer;                   // free our memory
  delete[] outbuffer;

  fclose(in);                          // close our files
  fclose(out);

  return 0;                            // and we're done
};
