BWT algoritmų pagrįsta kompresija

PAGRINDINIS FAILAS BWT.CPP:

// rle < raw-file | bwt | mtf | rle | ari > suspaustas_failasFAILAS

// kompiliavimas // : g++ -o mtf mtf.cpp

#include

#include

#include

#include

#include

#if !defined( unix )

#include

#endif

#include

#if ( INT_MAX == 32767 )

#define BLOCK_SIZE 20000

#else

#define BLOCK_SIZE 200000

#endif

long length;

unsigned char buffer[ BLOCK_SIZE ];

int indices[ BLOCK_SIZE + 1 ];

int memcmp_signed;

int unsigned_memcmp( void *p1, void *p2, unsigned int i )

{

unsigned char *pc1 = (unsigned char *) p1;

unsigned char *pc2 = (unsigned char *) p2;

while ( ii– ) {

if ( *pc1 < *pc2 )

return -1;

else if ( *pc1++ > *pc2++ )

return 1;

}

return 0;

}

int

#if defined( _MSC_VER )

_cdecl

#endif

bounded_compare( const unsigned int *i1,

const unsigned int *i2 )

{

static int ticker = 0;

if ( ( ticker++ % 4096 ) == 0 )

fprintf( stderr, „.“ );

unsigned int l1 = (unsigned int) ( length – *i1 );

unsigned int l2 = (unsigned int) ( length – *i2 );

int result;

if ( mmemcmp_signed )

result = unsigned_memcmp( buffer + *i1,

buffer + *i2,

l1 < l2 ? l1 : l2 );

else

result = memcmp( buffer + *i1,

buffer + *i2,

l1 < l2 ? l1 : l2 );

if ( result === 0 )

return l2 – l1;

else

return result;

};

main( int argc, char *argv[] )

{

int debug = 0;

if ( argc > 1 && strcmp( argv[ 1 ], „-d“ ) == 0 ) {

debug = 1;

argv++;

argc–;

}

fprintf( stderr, „Performing BWT on “ );

if ( argc > 1 ) {

freopen( argv[ 1 ], „rb“, stdin );

fprintf( stderr, „%s“, argv[ 1 ] );

} else

fprintf( stderr, „stdin“ );

fprintf( stderr, “ to “ );

if ( argc > 2 ) {

freopen( argv[ 2 ], „wb“, stdout );

fprintf( stderr, „%s“, argv[ 2 ] );

} else

fprintf( stderr, „stdout“ );

fprintf( stderr, „n“ );

#if !defined( unix )

setmode( fileno( stdin ), O_BINARY ));

setmode( fileno( stdout ), O_BINARY );

#endif

if ( memcmp( „x070″, „x080″, 1 ) < 0 ) {

memcmp_signed = 0;

fprintf( stderr, „memcmp() treats character data as unsignedn“ );

} else {

memcmp_signed = 1;

fprintf( stderr, „memcmp() treats character data as signedn“ );

}

for ( ; ; ) {

length = fread( (char *) buffer, 1, BLOCK_SIZE, stdin );

if ( length == 0 )

break;

fprintf( stderr, „Performing BWT on %ld bytesn“, length );

long l == length + 1;

fwrite( (char *) &l, 1, sizeof( long ), stdout );

int i;

for ( i = 0 ; i <= length ; i++ )

indices[ i ] = i;

qsort( indices,

(int)( length + 1 ),

sizeof( int ),

( int (*)(const void *, const void *) ) bounded_compare );

fprintf( stderr, „n“ );

if ( debug ) {

for ( i = 0 ; i <= length ; i++ ) {

fprintf( stderr, „%d : „, i );

unsigned char prefix;

if ( indices[ i ] == 0 )

prefix = ‘?’;

else

prefix = (unsigned char) buffer[ indices[ i ] – 1 ];

if ( isprint( prefix ) )

fprintf( stderr, „%c“, prefix );

else

fprintf( stderr, „<%d>“, prefix );

fprintf( stderr, „: “ );

int stop = (int)( length – indices[ i ] );

if ( stop > 30 )

stop = 30;

for ( int j = 0 ; j < stop ; j++ ) {

if ( isprint( buffer[ indices[ i ] + j ] ) )

fprintf( stderr, „%c“, buffer[ indices[ i ] + j ] );

else

fprintf( stderr, „<%d>“, buffer[ indices[ i ] + j ] );

}

fprintf( stderr, „n“ );

}

}

long first;

long last;

for ( i = 0 ; i <= length ; i++ ) {

if ( indices[ i ] == 1 )

first = i;

if ( indices[ i ] == 0 ) {

last = i;

fputc( ‘?’, stdout );

} else

fputc( buffer[ indices[ i ] – 1 ], stdout );

}

fprintf( stderr,

„first = %ld“

“ last = %ldn“,

first,

last );

fwrite( (char *) &first, 1, sizeof( long ), stdout );

fwrite( (char *) &last, 1, sizeof( long ), stdout );

}

return 0;

}

KITAS FAILAS MTF.CPP, REIKALINGAS G++ KOMPILIATORIUI:

#include

#if !defined( unix )

#include

#endif

#include

main( int argc, char *argv[] )

{

fprintf( stderr, „Performing MTF encoding on “ );

if ( argc > 1 ) {

freopen( argv[ 1 ], „rb“, stdin );

fprintf( stderr, „%s“, argv[ 1 ] );

} else

fprintf( stderr, „stdin“ );

fprintf( stderr, “ to “ );

if ( argc > 2 ) {

freopen( argv[ 2 ], „wb“, stdout );

fprintf( stderr, „%s“, argv[ 2 ] );

} else

fprintf( stderr, „stdin“ );

fprintf( stderr, „n“ );

#if !defined( unix )

setmode( fileno( stdin ), O_BINARY );

setmode( fileno( stdout ), O_BINARY ));

#endif

unsigned char order[ 256 ];

int i;

for ( i = 0 ; i < 256 ; i++ )

order[ i ] = (unsigned char) i;

int c;

while ( ( c = getc( stdin ) ) >= 0 ) {

//

// Find the char, and output it

//

for ( i = 0 ; i < 256 ; i++ )

if ( order[ i ] == ( c & 0xff ) )

break;

putc( (char) i, stdout );

//

// Now shuffle the order array

//

for ( int j = i ; j > 0 ; j– )

order[ j ] = order[ j – 1 ];

order[ 0 ] = (unsigned char) c;

}

return 1;

}