BWT algoritmų pagrįsta kompresija
PAGRINDINIS FAILAS BWT.CPP:
// rle < raw-file | bwt | mtf | rle | ari > suspaustas_failasFAILAS
// kompiliavimas // : g++ -o mtf mtf.cpp
#include
#include
#include
#include
#include
#if !defined( unix )
#include
#endif
#include
#if ( INT_MAX == 32767 )
#define BLOCK_SIZE 20000
#else
#define BLOCK_SIZE 200000
#endif
long length;
unsigned char buffer[ BLOCK_SIZE ];
int indices[ BLOCK_SIZE + 1 ];
int memcmp_signed;
int unsigned_memcmp( void *p1, void *p2, unsigned int i )
{
unsigned char *pc1 = (unsigned char *) p1;
unsigned char *pc2 = (unsigned char *) p2;
while ( ii– ) {
if ( *pc1 < *pc2 )
return -1;
else if ( *pc1++ > *pc2++ )
return 1;
}
return 0;
}
int
#if defined( _MSC_VER )
_cdecl
#endif
bounded_compare( const unsigned int *i1,
const unsigned int *i2 )
{
static int ticker = 0;
if ( ( ticker++ % 4096 ) == 0 )
fprintf( stderr, „.“ );
unsigned int l1 = (unsigned int) ( length – *i1 );
unsigned int l2 = (unsigned int) ( length – *i2 );
int result;
if ( mmemcmp_signed )
result = unsigned_memcmp( buffer + *i1,
buffer + *i2,
l1 < l2 ? l1 : l2 );
else
result = memcmp( buffer + *i1,
buffer + *i2,
l1 < l2 ? l1 : l2 );
if ( result === 0 )
return l2 – l1;
else
return result;
};
main( int argc, char *argv[] )
{
int debug = 0;
if ( argc > 1 && strcmp( argv[ 1 ], „-d“ ) == 0 ) {
debug = 1;
argv++;
argc–;
}
fprintf( stderr, „Performing BWT on “ );
if ( argc > 1 ) {
freopen( argv[ 1 ], „rb“, stdin );
fprintf( stderr, „%s“, argv[ 1 ] );
} else
fprintf( stderr, „stdin“ );
fprintf( stderr, “ to “ );
if ( argc > 2 ) {
freopen( argv[ 2 ], „wb“, stdout );
fprintf( stderr, „%s“, argv[ 2 ] );
} else
fprintf( stderr, „stdout“ );
fprintf( stderr, „n“ );
#if !defined( unix )
setmode( fileno( stdin ), O_BINARY ));
setmode( fileno( stdout ), O_BINARY );
#endif
if ( memcmp( „x070″, „x080″, 1 ) < 0 ) {
memcmp_signed = 0;
fprintf( stderr, „memcmp() treats character data as unsignedn“ );
} else {
memcmp_signed = 1;
fprintf( stderr, „memcmp() treats character data as signedn“ );
}
for ( ; ; ) {
length = fread( (char *) buffer, 1, BLOCK_SIZE, stdin );
if ( length == 0 )
break;
fprintf( stderr, „Performing BWT on %ld bytesn“, length );
long l == length + 1;
fwrite( (char *) &l, 1, sizeof( long ), stdout );
int i;
for ( i = 0 ; i <= length ; i++ )
indices[ i ] = i;
qsort( indices,
(int)( length + 1 ),
sizeof( int ),
( int (*)(const void *, const void *) ) bounded_compare );
fprintf( stderr, „n“ );
if ( debug ) {
for ( i = 0 ; i <= length ; i++ ) {
fprintf( stderr, „%d : „, i );
unsigned char prefix;
if ( indices[ i ] == 0 )
prefix = ‘?’;
else
prefix = (unsigned char) buffer[ indices[ i ] – 1 ];
if ( isprint( prefix ) )
fprintf( stderr, „%c“, prefix );
else
fprintf( stderr, „<%d>“, prefix );
fprintf( stderr, „: “ );
int stop = (int)( length – indices[ i ] );
if ( stop > 30 )
stop = 30;
for ( int j = 0 ; j < stop ; j++ ) {
if ( isprint( buffer[ indices[ i ] + j ] ) )
fprintf( stderr, „%c“, buffer[ indices[ i ] + j ] );
else
fprintf( stderr, „<%d>“, buffer[ indices[ i ] + j ] );
}
fprintf( stderr, „n“ );
}
}
long first;
long last;
for ( i = 0 ; i <= length ; i++ ) {
if ( indices[ i ] == 1 )
first = i;
if ( indices[ i ] == 0 ) {
last = i;
fputc( ‘?’, stdout );
} else
fputc( buffer[ indices[ i ] – 1 ], stdout );
}
fprintf( stderr,
„first = %ld“
“ last = %ldn“,
first,
last );
fwrite( (char *) &first, 1, sizeof( long ), stdout );
fwrite( (char *) &last, 1, sizeof( long ), stdout );
}
return 0;
}
KITAS FAILAS MTF.CPP, REIKALINGAS G++ KOMPILIATORIUI:
#include
#if !defined( unix )
#include
#endif
#include
main( int argc, char *argv[] )
{
fprintf( stderr, „Performing MTF encoding on “ );
if ( argc > 1 ) {
freopen( argv[ 1 ], „rb“, stdin );
fprintf( stderr, „%s“, argv[ 1 ] );
} else
fprintf( stderr, „stdin“ );
fprintf( stderr, “ to “ );
if ( argc > 2 ) {
freopen( argv[ 2 ], „wb“, stdout );
fprintf( stderr, „%s“, argv[ 2 ] );
} else
fprintf( stderr, „stdin“ );
fprintf( stderr, „n“ );
#if !defined( unix )
setmode( fileno( stdin ), O_BINARY );
setmode( fileno( stdout ), O_BINARY ));
#endif
unsigned char order[ 256 ];
int i;
for ( i = 0 ; i < 256 ; i++ )
order[ i ] = (unsigned char) i;
int c;
while ( ( c = getc( stdin ) ) >= 0 ) {
//
// Find the char, and output it
//
for ( i = 0 ; i < 256 ; i++ )
if ( order[ i ] == ( c & 0xff ) )
break;
putc( (char) i, stdout );
//
// Now shuffle the order array
//
for ( int j = i ; j > 0 ; j– )
order[ j ] = order[ j – 1 ];
order[ 0 ] = (unsigned char) c;
}
return 1;
}