LZW算法的C语言实现
- 格式:doc
- 大小:68.50 KB
- 文档页数:25
将源文件拷贝到当前目录下,打开源程序lzw.cbp,在main.cpp中更改源文件、压缩文件、解压文件的名称、路径:#define S() (strcpy(sourfile, "sourfile.jpg")) //源文件名及路径#define C() (strcpy(codefile, "codefile.lzw")) //压缩文件名及路径#define D() (strcpy(destfile, "destfile.jpg")) //解压文件名及路径下面是具体实现,总共四个文件://头文件模块:lzw.h#ifndef LZW_H_INCLUDED#define LZW_H_INCLUDED#define N 90000#define M 100typedef unsigned int Uint;/*函数功能: 将sourfile文件用LZW压缩到codefile文件中函数参数:待压缩文件/压缩目标文件函数返回值:压缩成功(TRUE)/压缩失败(FALSE)*/void LzwEncoding(char sourfile[], char codefile[]);/*函数功能: 将codefile文件用LZW解压到destfilefile文件中函数参数:压缩目标文件/解压文件函数返回值:解压成功(TRUE)/解压失败(FALSE)*/void LzwDecoding(char codefile[], char destfile[]);/*函数功能:判断前缀数组与当前字符的结合是否在字典中存在索引函数参数:当前字符(cbuff)函数返回值:在字典中(TRUE)/不在字典中(F ALSE)*/bool IsFindDictKey(Uint buff);/*函数功能:判断前缀数组与当前字符的结合是否在字典中存在索引函数参数:前缀数组(preFix)/前缀数组的有效长度(preCount)/当前字符(cbuff)函数返回值:在字典中(TRUE)/不在字典中(F ALSE)*/bool IsDictValue(Uint preFix[], Uint preCount, Uint cbuff);/*函数功能:查找前缀数组在字典中的索引函数参数:前缀数组(preFix)/前缀数组的有效长度(preCount)函数返回值:前缀数组在字典中的索引*/Uint FindDictKey(Uint preFix[], Uint preCount);extern Uint dict[N][M]; //全局字典extern char sourfile[20]; //全局带压缩文件extern char codefile[20]; //全局压缩文件extern char destfile[20]; //全局解压文件#endif // LZW_H_INCLUDED运行源程序将执行压缩机解压缩两个过程:LzwEncoding(sourfile, codefile); //压缩文件sourfile到codefile LzwDecoding(codefile, destfile); //解压文件codefile到destfile//主函数模块:main.cpp#include <stdio.h>#include <stdlib.h>#include <string.h>#include "lzw.h"#define S() (strcpy(sourfile, "sourfile.pdf"))#define C() (strcpy(codefile, "codefile.lzw"))#define D() (strcpy(destfile, "destfile.pdf"))using namespace std;Uint dict[N][M]; //全局字典int main(void){char sourfile[20];char codefile[20];char destfile[20];S();C();D();LzwEncoding(sourfile, codefile);LzwDecoding(codefile, destfile);return 0;}//压缩模块:LzwEncoding.cpp#include <stdio.h>#include <stdlib.h>#include <windows.h>#include "lzw.h"using namespace std;/*函数功能:查找前缀数组在字典中的索引函数参数:前缀数组(preFix)/前缀数组的有效长度(preCount) 函数返回值:前缀数组在字典中的索引*/Uint DictKey(Uint preFix[], Uint preCount){Uint i;int flag = 0;for (i = 0; i < N; i++){Uint j = preCount;if (dict[i][0] == preCount){flag = 1;while ( j-- ){if (dict[i][j+1] != preFix[j]){flag = 0;break;}}}if ( flag ) break;}return i;}/*函数功能:判断前缀数组与当前字符的结合是否在字典中存在索引函数参数:前缀数组(preFix)/前缀数组的有效长度(preCount)/当前字符((int)sbuff[count]) 函数返回值:在字典中(TRUE)/不在字典中(F ALSE)*/bool IsDictValue(Uint preFix[], Uint preCount, Uint cbuff){int flag = 0;for (Uint i = 0; i < N; i++){if ((dict[i][0] == preCount+1) && (dict[i][preCount+1] == cbuff)){flag = 1;for (Uint j = 0; j < preCount; j++){if (dict[i][j+1] != preFix[j]){flag = 0;break;}}}if ( flag ) break;}if ( flag )return true;elsereturn false;}/*O(n*n)*//*函数功能: 将sourfile文件用LZW压缩到codefile文件中函数参数:待压缩文件/压缩目标文件函数返回值:压缩成功(TRUE)/压缩失败(FALSE)*/void LzwEncoding(char sourfile[], char codefile[]){FILE *sfp; //源文件句柄FILE *cfp; //编码文件句柄BYTE sbuff[N]; //源文件字节缓冲区Uint cbuff[N]; //压缩文件字节缓冲区Uint preCount; //前缀数组小标Uint preFix[N]; //前缀数组Uint dictCount = 255; //字典索引位置Uint cCount = 0; //压缩字节缓冲区下标Uint count = 0; //计数器Uint code; //临时编码if ((sfp = fopen(sourfile, "rb")) == NULL){printf("Read source file error!");exit( 0 );}if ((cfp = fopen(codefile, "wb")) == NULL){printf("Write code file error!");fclose(sfp);exit( 0 );}//初始化字典for (int i = 0; i < N; i++){if (i < 256){dict[i][0] = 1; //第i个索引处有一个字符dict[i][1] = i; //第i个索引处的字符}else{dict[i][0] = 0; //第i个索引处没有字符}}fseek(sfp, 0, SEEK_END);long fl = ftell(sfp); //获取原文件的字节数fseek(sfp, 0, SEEK_SET); //将文件指针移到文件开头处fread(sbuff, sizeof(sbuff[0]), fl, sfp); //将文件一次性读到缓冲区preCount = 0; //初始化前缀数组下标while ( fl-- ) //读取源文件内容并进行编码{if (IsDictValue(preFix, preCount, (int)sbuff[count])) //当前字符串在字典中preFix[preCount++] = (int)sbuff[count]; //将当前字符复制给前缀数组else{ //当前字符串不在字典中dictCount++; //字典增长//将前缀数组对应的索引写入编码文件code = DictKey(preFix, preCount);cbuff[cCount++] = code;//将前缀数组的字符及当前字符添加到字典中Uint icount = preCount; //记录前缀数组的有效长度dict[dictCount][0] = icount+1; //将dictCount索引处的字符数记录到字典while ( icount-- ){dict[dictCount][preCount-icount] = preFix[preCount-icount-1];}dict[dictCount][preCount-icount] = (int)sbuff[count];preCount = 0;preFix[preCount++] = (int)sbuff[count]; //将当前字符复制给前缀数组}count++;}code = DictKey(preFix, preCount);cbuff[cCount++] = code;fwrite(cbuff, sizeof(Uint), cCount, cfp); //将压缩文件整块写入文件fclose(sfp);fclose(cfp);}/* O(n^2*m) *///解压模块:LzwDecoding.cpp#include <stdio.h>#include <stdlib.h>#include <windows.h>#include "lzw.h"using namespace std;/*函数功能:判断buff是否为字典中的一个索引函数参数:无符号整形数buff函数返回值:是(TRUE)/否(FALSE)*/bool IsDictKey(Uint buff){if (dict[buff][0] == 0)return false;return true;}/*函数功能: 将codefile文件用LZW解压到destfilefile文件中函数参数:压缩目标文件/解压文件函数返回值:解压成功(TRUE)/解压失败(FALSE)*/void LzwDecoding(char codefile[], char destfile[]){FILE *cfp; //编码文件句柄FILE *dfp; //源文件句柄Uint oldCount; //old数组下标Uint oldCode[N] = {0}; //解码过的索引值??Uint cbuff[N/4]; //压缩文件缓冲区Uint cCount = 0; //压缩文件缓冲区下标BYTE dbuff[N] = {0}; //解压文件缓冲区Uint dCount = 0; //解压文件缓冲区下标Uint dictCount = 255; //字典长度Uint i, j; //循环变量if ((cfp = fopen(codefile, "rb")) == NULL){printf("Read coding file error!");exit( 0 );}if ((dfp = fopen(destfile, "wb")) == NULL){printf("Write decoding file error!");fclose(cfp);exit( 0 );}//初始化字典for (i = 0; i < N; i++){if (i < 256){dict[i][0] = 1; //第i个索引处有一个字符dict[i][1] = i; //第i个索引处的字符}else{dict[i][0] = 0; //第i个索引处没有字符}}fseek(cfp, 0, SEEK_END);long fl = ftell(cfp)/4; //获取原文件的编码数fseek(cfp, 0, SEEK_SET); //将文件指针移到文件开头处oldCount = 0; //初始化前缀数组下标、处理第一个编码fread(cbuff, sizeof(Uint), fl, cfp);//将压缩文件整块读入dbuff[dCount++]=cbuff[cCount];oldCode[oldCount++]=cbuff[cCount];fl--;while ( fl-- ) //读取源文件内容并进行编码{cCount++; //处理下一编码dictCount++; //字典增长if (IsDictKey(cbuff[cCount])) //字节在字典中{j = oldCount;dict[dictCount][0] = oldCount+1;while ( j-- ) //更新字典dict[dictCount][oldCount-j] = oldCode[oldCount-j-1];dict[dictCount][oldCount-j] = dict[cbuff[cCount]][1];i = dict[cbuff[cCount]][0];while ( i-- ) //将当前索引对应的字典值加入解压文件缓冲区dbuff[dCount++] = dict[cbuff[cCount]][dict[cbuff[cCount]][0]-i];//更新前缀数组oldCount = 0;i = dict[cbuff[cCount]][0];while ( i-- ){oldCode[oldCount]=dict[cbuff[cCount]][oldCount+1];oldCount++;}}else{i = oldCount;while ( i-- ) //前缀数组中的字典值加入解压文件缓冲区dbuff[dCount++]=oldCode[oldCount-i-1];dbuff[dCount++]=oldCode[0];dict[cbuff[cCount]][0] = oldCount+1;j = oldCount;while ( j-- ) //将前缀数组及前缀数组的第一个字符更新到字典dict[dictCount][oldCount-j+1] = oldCode[oldCount-j];dict[dictCount][oldCount-j]=oldCode[0];oldCode[oldCount++]=oldCode[0];}}fwrite(dbuff, sizeof(BYTE), dCount, dfp); //将解压文件缓冲区整块写入解压文件fclose(cfp);fclose(dfp);}改进方案:用结构体代替数组实现全局字典字典!其实现很简单,留给读者自行解决!。
实验三LZW编码一、实验目的1、加深对LZW编码的理解;2、掌握LZW编码的程序设计。
二、原理与说明LZW编码的步骤:a ab aa bc ba aab ab bc1 12 4 23 6 7 5 8三、实验内容利用C、VB或VC语言实现LZW编码的算法,掌握LZW编码的程序设计并调试通过。
四、实验设备计算机程序如下:#include<>#include""#include""#include""#include""#define BIRS 12#define HASHING_SHIFT BITS-8#define MAX_V ALUE(1<<BITS)-1#define MAX_CODE MAX_V ALUE-1 #if BITS==14#define TABLE_SIZE 18041#endif#if BITS==13#define TABLE_SIZE 9029#endif#if BITS<=12#define TABLE_SIZE 5021int *code_value;unsigned int *prefix_code;unsigned char *append_character; unsigned char decode_stack[4000];char ok;find match(int hash_perfix,unsigned int hash_character){int index;int offset;index=(hash_character<<HASHING_SHIFT)^hash_prefix;if(index==0)offset=1;elseoffset=TABLE_SIZE-index;while(1){if(code_value[index]==-1)return(index);if(prefix_code[index]==hash_prefix&&append_character[index]==hash_character) return(index);index-=offset;if(index<0)index+=TABLE_SIZE;}}input_code(FILE*input){unsigned int return_value;static int input_bit_count=0;static unsigned long input_bit_buffer=0L;while(input_bit_count<=24){input_bit_buffer|=(unsigned long)getc(input)<<(24-input_bit_count);input_bit_count+=8;}return_value=input_bit_buffer>>(32-BITS);input_bit_buffer<<=BITS;input_bit_count-=BITS;return(return_value);}void output_code(FILE*output,unsigned int code){static int output_bit_count=0;static unsigned long output_bit_buffer=0L;output_bit_buffer|=(unsigned long)code<<(32-BITS-output_bit_count);output_bit_count+=BITS;while(output_bit_count>=8)putc(output_bit_buffer>>24,output);output_bit_buffer<<=8;output_bit_count-=8;}}void compress(FILE *input,FILE *output){unsigned int next_code;unsigned int character;unsigned int string_code;unsigned int index;int i;next_code=256;for(i=0;i<TABLE_SIZE;i++)code_value[i]=-1;i=0;printf("\n\nCompressing...\n);string_code=getc(input);while((character=getc(input))!=(unsigned)EOF) {index=find_match(string_code,character);if(code_value[index]!=-1)string_code=code_value[index];{if(next_code<=MAX_CODE){code_value[index]=next_code++;prefix_code[index]=string_code;append_character[index]=character;}output_code(output,string_code);string_code=character;}}output_code(output,string_code);output_code(output,MAX_V ALUE);output_code(output,0);printf("\n");getchar();void expand(FILE*input,FILE*output){unsigned int next_code;unsigned int nex_code;unsigned int old_code;int character;unsigned char*string;char*decode_string(unsigned char*buffer,unsigned int code)lnext_code=256;counter=0;printf("\n\nExpanding...\n");old_code=input_code(input);character=old_code;putc(old_code,output);while((nex_code=input_code(input))!=(MAX_V ALUE)){if(new_code>=next_code){*decode_stack=character;string=(unsigned char*)decode_string(decode_stcak+1,old_code);}elsestring=(unsigned char*)decode_string(decode_stck,nex_code);character=*string;while(string>=decode_stack)putc(*string--,output);if(next_code<=MAX_CODE){append_character[next_code]=character;next_code++;}old_code=nex_code;}printf("\n");getchar();}char *decode_string(unsigned char*buffer,unsigned int code) {int i;i=0;while(code>255){*buffer++=append_character[code];code=prefix_code[code];if(i++>=4094){printf("Fatal error during code expansion.\n");exit(0);}}*buffer=code;int main(int argc,char*argv[]){FILE*input_file;FILE*output_file;FILE*lzw_file;char input_file_name[81];int select;character=*string;while(string>=decode_stack)putc(*string--,output);if(next_code<=MAX_CODE){prefix_code[next_code]=old_code;append_character[next_code]=character;next_code++;}old_code=new_code;}printf("\n");getchar();}printf("**\n");printf("**********************\n");scanf("%d",&select);if(select==1){if(argc>1)strcpy(input_file_name,argv[1]);else{printf("\nInput file name?");scanf("%s",input_file_name);printf("\nCompressed file name?");scanf("%s",compressed_file_name);}input_file=fopen(input_file_name,"rb");lzw_file=fopen(compressed_filename,"wb");while(input_file==NULL||lzw_file==NULL){printf("Fatal error opening files!\n");printf("\nInput file names?");scanf("%s",input_file_name);printf("\nCompressed file name?");scanf("%s",compressed_file_name);input_file=fopen(input_file_name,"rb");};compress(input_file,lzw_file);fclose(input_file);fclode(lzw_file);free(code_value);else if(select==2){printf("\nOnput file names?");scanf("%s",onput_filename);printf("\nExpanded file name?");scanf("%s",expanded_filename);input_file=fopen(onput_filename,"rb");lzw_file=fopen(expanded_filename,"wb");while(lzw_file==NULL||output_file==NULL){printf("Fatal error opening files!\n");printf("\nOnput file names?");scanf("%s",onput_filename);printf("\nExpanded file name?");scanf("%s",expanded_filename);input_file=fopen(onput_filename,"rb");lzw_file=fopen(expanded_filename,"wb");};expand(lzw_file,output_file);fclose(lzw_file);-11fclose(output_file);}else{exit(0);}printf("\nContinue or not(y/n)?");scanf("%c",&ok);getchar();if(ok=='y'){goto loop;}else{printf("Complete......\n\nPress any key to continue");getchar();free(prefix_code);free(append_character);}return 0;}}-12。
LZW压缩和解压黄陂一中盘龙校区张兴才LZW压缩是由Lemple、Zip和Welch共同创造,用他们的名字命名的压缩方法。
下面结合C语言的实现方法,介绍LZW压缩和解压的原理。
一、码表被压缩的字符系列称为数据流,压缩后的代码称为编码流,将数据流压缩成编码流要依据码表。
什么是码表?我们先看看码表的结构和初始化吧。
typedef struct{char used ;UINT prev; //typedef UINT unsigned intBYTE c;}ENTRY;void InitTable(){int i;for(i = 0 ; i < 4096;i++){string_tab[i].used = FALSE ;//#define FALSE 0string_tab[i].prev = NO_PREV;//#define NO_PREV 0xFFFF表示没有前缀string_tab[i].c= 0;for(i = 0 ; i< 258; i++){string_tab[i].used = TRUE; //#define TRUE !FALSEstring_tab[i].c = i;}}从上面的代码可知,码表共有4096行,每行有3列。
used表示该行是否被使用,使用了其值为TRUE,否则为FALSE。
prev表示前缀,主要存储的是该码表的索引值(行号),用以指示该表的不同行,取值范围是0——4095。
c表示后缀,存储一个字符。
该码表的0——257行的prev域被初始化,其中的值表示的意义是:0—255用来表示单个字符,256表示开始新的码表,257表示压缩结束。
二、压缩过程以下程序段将infp中的字符系列压缩到outfp中。
putcode(outfp, CC);//outfp是存储编码流的文件,CC的值为256,表示开始新的码表,outfp是存放编码流的文件,以上函数表示将码表开始代码放在outfp文件的开始位置InitTable();c = readc(infp);//从输入文件读取一个字符,infp表示输入文件,c是后缀变量prevcode = QueryTable(NO_PREV , c);//在码表中查询前缀为“NO_PREV”,后缀为“c”(即刚刚读入的字符)的行,并将行号赋给前缀变量prevcode,其实相当于prevcode=c;++total;while(UEOF != (c = readc(infp)))//UEOF为文件结束标志{++total;if(NOT_FIND!=(localcode = QueryTable(prevcode , c)))// NOT_FIND表示在码表中查询指定的前缀和后缀对,没有找到{prevcode = localcode;//找到指定的前缀后缀对后,将找到行的行号赋给前缀变量,接着从输入文件读入下一个字符作为后缀,进行下一轮的查询continue;}putcode(outfp , prevcode);// 指定的前缀和后缀对没有找到,则将前缀变量prevcode的值输出到存储编码流的输出文件outfp中if(count)//count为码表中空白行的行数{UpdateTable(prevcode, c);//将指定的前缀和后缀分别填入码表的第一个空白行的相应位置--count;//码表的空白行减1}if(count == 0)//如果码表没有空白行了,则重新建一个码表,并在编码流中放入代码CC(256),表示开始新的码表。
标准的LZW压缩原理:~~~~~~~~~~~~~~~~~~先来解释一下几个基本概念:LZW压缩有三个重要的对象:数据流(CharStream)、编码流(CodeStream)和编译表(String Table)。
在编码时,数据流是输入对象(图象的光栅数据序列),编码流就是输出对象(经过压缩运算的编码数据);在解码时,编码流则是输入对象,数据流是输出对象;而编译表是在编码和解码时都须要用借助的对象。
字符(Character):最基础的数据元素,在文本文件中就是一个字节,在光栅数据中就是一个像素的颜色在指定的颜色列表中的索引值;字符串(String):由几个连续的字符组成;前缀(Prefix):也是一个字符串,不过通常用在另一个字符的前面,而且它的长度可以为0;根(Root):单个长度的字符串;编码(Code):一个数字,按照固定长度(编码长度)从编码流中取出,编译表的映射值;图案:一个字符串,按不定长度从数据流中读出,映射到编译表条目.LZW压缩的原理:提取原始图象数据中的不同图案,基于这些图案创建一个编译表,然后用编译表中的图案索引来替代原始光栅数据中的相应图案,减少原始数据大小。
看起来和调色板图象的实现原理差不多,但是应该注意到的是,我们这里的编译表不是事先创建好的,而是根据原始图象数据动态创建的,解码时还要从已编码的数据中还原出原来的编译表(GIF文件中是不携带编译表信息的),为了更好理解编解码原理,我们来看看具体的处理过程:编码器(Compressor)~~~~~~~~~~~~~~~~编码数据,第一步,初始化一个编译表,假设这个编译表的大小是12位的,也就是最多有4096个单位,另外假设我们有32个不同的字符(也可以认为图象的每个像素最多有32种颜色),表示为a,b,c,d,e...,初始化编译表:第0项为a,第1项为b,第2项为c...一直到第31项,我们把这32项就称为根。
开始编译,先定义一个前缀对象Current Prefix,记为[.c.],现在它是空的,然后定义一个当前字符串Current String,标记为[.c.]k,[.c.]就为Current Prefix,k就为当前读取字符。
lzw压缩算法的c语言实现1 程序由五个模块组成。
(1)lzw.h定义了一些基本的数据结构,常量,还有变量的初始化等。
#ifndef __LZW_H__#define __LZW_H__//------------------------------------------------------------------------------#in c lude#in c lude#in c lude#in c lude//------------------------------------------------------------------------------#define LZW_BASE0x102//The c ode base#define C ODE_LEN12//Max c ode length#define TABLE_LEN4099 // It must be prime number and bigger than 2^C OD E_LEN=4096.// Su c h as 5051 is also ok.#define BUFFERSIZE1024//------------------------------------------------------------------------------typedef stru c t{HANDLEh_sour;// Sour c e file handle.HANDLEh_dest;// Destination file handle.HANDLEh_suffix; // Suffix table handle.HANDLEh_prefix; // Prefix table handle.HANDLEh_c ode;// C ode table handle.LPWORDlp_prefix; // Prefix table head pointer.LPBYTElp_suffix; // Suffix table head pointer.LPWORDlp_c ode; // C ode table head pointer.WORD c ode;WORDprefix;BYTEsuffix;BYTE c ur_c ode_len; // C urrent c ode length.[ used in Dynami c-C ode-Length mode ]}LZW_DATA,*PLZW_DATA;typedef stru c t{WORDtop;WORDindex;LPBYTElp_buffer;HANDLEh_buffer;BYTEby_left;DWORDdw_buffer;BOOLend_flag;}BUFFER_DATA,*PBUFFER_DATA;typedef stru c t//Sta c k used in de c ode{WORDindex;HANDLEh_sta c k;LPBYTElp_sta c k;}STA C K_DATA,*PSTA C K_DATA;//------------------------------------------------------------------------------VOID sta c k_c reate( PSTA C K_DATA sta c k ){sta c k->h_sta c k= GlobalAllo c( GHND , TABLE_LEN*sizeof(BYTE) );sta c k->lp_sta c k = GlobalLo c k( sta c k->h_sta c k );sta c k->index = 0;}//------------------------------------------------------------------------------VOID sta c k_destory( PSTA C K_DATA sta c k ){GlobalUnlo c k( sta c k->h_sta c k );GlobalFree( sta c k->h_sta c k );}//------------------------------------------------------------------------------VOID buffer_c reate( PBUFFER_DATAbuffer ){buffer->h_buffer= GlobalAllo c(GHND,BUFFERSIZE*sizeof(BYTE));buffer->lp_buffer= GlobalLo c k( buffer->h_buffer );buffer->top= 0;buffer->index= 0;buffer->by_left= 0;buffer->dw_buffer= 0;buffer->end_flag= FALSE;}//------------------------------------------------------------------------------VOID buffer_destory( PBUFFER_DATAbuffer ){GlobalUnlo c k( buffer->h_buffer );GlobalFree( buffer->h_buffer );}//------------------------------------------------------------------------------VOID re_init_lzw( PLZW_DATA lzw )//When c ode table rea c hed its top it should{//be reinitialized.memset( lzw->lp_c ode, 0xFFFF, TABLE_LEN*sizeof(WORD) );lzw->c ode= LZW_BASE;lzw->c ur_c ode_len= 9;}//------------------------------------------------------------------------------VOID lzw_c reate(PLZW_DATAlzw,HANDLE h_sour,HANDLE h_dest){WORD i;lzw->h_c ode= GlobalAllo c( GHND, TABLE_LEN*sizeof(WORD) );lzw->h_prefix= GlobalAllo c( GHND, TABLE_LEN*sizeof(WORD) );lzw->h_suffix= GlobalAllo c( GHND, TABLE_LEN*sizeof(BYTE) );lzw->lp_c ode= GlobalLo c k( lzw->h_c ode);lzw->lp_prefix= GlobalLo c k( lzw->h_prefix );lzw->lp_suffix= GlobalLo c k( lzw->h_suffix );lzw->c ode= LZW_BASE;lzw->c ur_c ode_len= 9;lzw->h_sour= h_sour;lzw->h_dest= h_dest;memset( lzw->lp_c ode, 0xFFFF, TABLE_LEN*sizeof(WORD) );}//------------------------------------------------------------------------------VOID lzw_destory(PLZW_DATAlzw){GlobalUnlo c k( lzw->h_c ode);GlobalUnlo c k( lzw->h_prefix );GlobalUnlo c k( lzw->h_suffix );GlobalFree( lzw->h_c ode);GlobalFree( lzw->h_prefix );GlobalFree( lzw->h_suffix );}//------------------------------------------------------------------------------#endif(2) fileio.h定义了一些文件操作#ifndef __FILEIO_H__#define __FILEIO_H__//------------------------------------------------------------------------------#in c lude#in c lude#in c lude//------------------------------------------------------------------------------HANDLEfile_handle(C HAR* file_name){HANDLE h_file;h_file = C reateFile(file_name,GENERI C_READ|GENERI C_WRITE,FILE_SHARE_READ|FILE_SHARE_WRITE,NULL,OPEN_ALWAYS,0,NULL);return h_file;}//------------------------------------------------------------------------------WORD load_buffer(HANDLE h_sour, PBUFFER_DATA buffer)// Load file to bu ffer{DWORD ret;ReadFile(h_sour,buffer->lp_buffer,BUFFERSIZE,&ret,NULL);buffer->index = 0;buffer->top = (WORD)ret;return (WORD)ret;}//------------------------------------------------------------------------------WORD empty_buffer( PLZW_DATA lzw, PBUFFER_DATA buffer)// Output buffer to file{DWORD ret;if(buffer->end_flag) // The flag mark the end of de c ode{if( buffer->by_left ){buffer->lp_buffer[ buffer->index++ ] = (BYTE)( buffer->dw_buffer >> 32 -buffer->by_left )<<(8-buffer->by_left);}}WriteFile(lzw->h_dest, buffer->lp_buffer,buffer->index,&ret,NULL); buffer->index = 0;buffer->top = ret;return (WORD)ret;}//------------------------------------------------------------------------------#endif(3) hash.h定义了压缩时所用的码表操作函数,为了快速查找使用了hash算法,还有处理hash冲突的函数#ifndef __HASH_H__#define __HASH_H__//------------------------------------------------------------------------------#in c lude#in c lude#in c lude//------------------------------------------------------------------------------#defineDIVTABLE_LEN#defineHASHSTEP13// It should bigger than 0.//------------------------------------------------------------------------------WORD get_hash_index( PLZW_DATA lzw ){DWORD tmp;WORD result;DWORD prefix;DWORD suffix;prefix = lzw->prefix;suffix = lzw->suffix;tmp = prefix<<8 | suffix;result = tmp % DIV;return result;}//------------------------------------------------------------------------------WORD re_hash_index( WORD hash ) // If hash c onfli c t o cc ured we must re c al c ulate{// hash index .WORD result;result = hash + HASHSTEP;result = result % DIV;return result;}//------------------------------------------------------------------------------BOOL in_table( PLZW_DATA lzw ) // To find whether c urrent c ode is alre ady in table.{BOOL result;WORD hash;hash = get_hash_index( lzw );if( lzw->lp_c ode[ hash ] == 0xFFFF ){result = FALSE;}else{if( lzw->lp_prefix[ hash ] == lzw->prefix &&lzw->lp_suffix[ hash ] == lzw->suffix ){result = TRUE;}else{result = FALSE;while( lzw->lp_c ode[ hash ] != 0xFFFF ){if( lzw->lp_prefix[ hash ] == lzw->prefix &&lzw->lp_suffix[ hash ] == lzw->suffix ){result = TRUE;break;}hash = re_hash_index( hash );}}}return result;}//------------------------------------------------------------------------------WORD get_c ode( PLZW_DATA lzw ){WORD hash;WORD c ode;hash = get_hash_index( lzw );if( lzw->lp_prefix[ hash ] == lzw->prefix &&lzw->lp_suffix[ hash ] == lzw->suffix ){c ode = lzw->lp_c ode[ hash ];}else{while( lzw->lp_prefix[ hash ] != lzw->prefix ||lzw->lp_suffix[ hash ] != lzw->suffix ){hash = re_hash_index( hash );}c ode = lzw->lp_c ode[ hash ];}return c ode;}//------------------------------------------------------------------------------VOID insert_table( PLZW_DATA lzw ){WORD hash;hash = get_hash_index( lzw );if( lzw->lp_c ode[ hash ] == 0xFFFF ){lzw->lp_prefix[ hash ] = lzw->prefix;lzw->lp_suffix[ hash ] = lzw->suffix;lzw->lp_c ode[ hash ]= lzw->c ode;}else{while( lzw->lp_c ode[ hash ] != 0xFFFF ){hash = re_hash_index( hash );}lzw->lp_prefix[ hash ] = lzw->prefix;lzw->lp_suffix[ hash ] = lzw->suffix;lzw->lp_c ode[ hash ]= lzw->c ode;}}//------------------------------------------------------------------------------#endif(4) en c ode.h压缩程序主函数#ifndef __EN C ODE_H__#define __EN C ODE_H__//------------------------------------------------------------------------------#in c lude#in c lude#in c lude//------------------------------------------------------------------------------VOID output_c ode( DWORD c ode ,PBUFFER_DATA out, PLZW_DATA lzw){out->dw_buffer |= c ode << ( 32 - out->by_left - lzw->c ur_c ode_len ); out->by_left += lzw->c ur_c ode_len;while( out->by_left >= 8 ){if( out->index == BUFFERSIZE ){empty_buffer( lzw,out);}out->lp_buffer[ out->index++ ] = (BYTE)( out->dw_buffer >> 24 );out->dw_buffer <<= 8;out->by_left -= 8;}}//------------------------------------------------------------------------------VOID do_en c ode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw) {WORD prefix;while( in->index != in->top ){if( !in_table(lzw) ){// c urrent c ode not in c ode table// then add it to table and output prefixinsert_table(lzw);prefix = lzw->suffix;output_c ode( lzw->prefix ,out ,lzw );lzw->c ode++;if( lzw->c ode == (WORD)1<< lzw->c ur_c ode_len ){// c ode rea c hed c urrent c ode top(1<<CUR_CODE_LEN)// then c urrent c ode length add onelzw->c ur_c ode_len++;if( lzw->c ur_c ode_len == C ODE_LEN + 1 ){re_init_lzw( lzw );}}}else{// c urrent c ode already in c ode table// then output nothingprefix = get_c ode(lzw);}lzw->prefix = prefix;lzw->suffix = in->lp_buffer[ in->index++ ];}}//------------------------------------------------------------------------------VOID en c ode(HANDLE h_sour,HANDLE h_dest){LZW_DATAlzw;BUFFER_DATAin ;BUFFER_DATAout;BOOL first_run = TRUE;lzw_c reate( &lzw ,h_sour,h_dest );buffer_c reate( &in );buffer_c reate( &out );while( load_buffer( h_sour, &in ) ){if( first_run ){// File length should be c onsideredbut here we simply// believe file length bigger than 2 bytes.lzw.prefix = in.lp_buffer[ in.index++ ];lzw.suffix = in.lp_buffer[ in.index++ ];first_run = FALSE;}do_en c ode(&in , &out, &lzw);}output_c ode(lzw.prefix, &out , &lzw);output_c ode(lzw.suffix, &out , &lzw);out.end_flag = TRUE;empty_buffer( &lzw,&out);lzw_destory( &lzw );buffer_destory( &in );buffer_destory( &out );}//------------------------------------------------------------------------------#endif(5) de c ode.h解压函数主函数#ifndef __DE C ODE_H__#define __DE C ODE_H__//------------------------------------------------------------------------------#in c lude#in c lude#in c lude//------------------------------------------------------------------------------VOID out_c ode( WORD c ode ,PBUFFER_DATA buffer,PLZW_DATA lzw,PSTA C K_DAT A sta c k){WORD tmp;if( c ode < 0x100 ){sta c k->lp_sta c k[ sta c k->index++ ] = c ode;}else{sta c k->lp_sta c k[ sta c k->index++ ] = lzw->lp_suffix[ c ode ];tmp = lzw->lp_prefix[ c ode ];while( tmp > 0x100 ){sta c k->lp_sta c k[ sta c k->index++ ] = lzw->lp_suffix[ tmp ];tmp = lzw->lp_prefix[ tmp ];}sta c k->lp_sta c k[ sta c k->index++ ] = (BYTE)tmp;}while( sta c k->index ){if( buffer->index == BUFFERSIZE ){empty_buffer(lzw,buffer);}buffer->lp_buffer[ buffer->index++ ] = sta c k->lp_sta c k[ --sta c k->index ] ;}}//------------------------------------------------------------------------------VOID insert_2_table(PLZW_DATA lzw ){lzw->lp_c ode[ lzw->c ode ]= lzw->c ode;lzw->lp_prefix[ lzw->c ode ] = lzw->prefix;lzw->lp_suffix[ lzw->c ode ] = lzw->suffix;lzw->c ode++;if( lzw->c ode == ((WORD)1<c ur_c ode_len)-1 ){lzw->c ur_c ode_len++;if( lzw->c ur_c ode_len == C ODE_LEN+1 )lzw->c ur_c ode_len = 9;}if(lzw->c ode >= 1<{re_init_lzw(lzw);}}//------------------------------------------------------------------------------WORD get_next_c ode( PBUFFER_DATA buffer , PLZW_DATA lzw ){BYTE next;WORD c ode;while( buffer->by_left < lzw->c ur_c ode_len ){if( buffer->index == BUFFERSIZE ){load_buffer( lzw->h_sour, buffer );}next = buffer->lp_buffer[ buffer->index++ ];buffer->dw_buffer |= (DWORD)next << (24-buffer->by_left);buffer->by_left+= 8;}c ode = buffer->dw_buffer >> ( 32 - lzw->c ur_c ode_len );buffer->dw_buffer <<= lzw->c ur_c ode_len;buffer->by_left-= lzw->c ur_c ode_len;return c ode;}//------------------------------------------------------------------------------VOID do_de c ode( PBUFFER_DATA in, PBUFFER_DATA out, PLZW_DATA lzw, PSTA C K_DATA sta c k){WORD c ode;WORD tmp;while( in->index != in->top){c ode = get_next_c ode( in ,lzw );if( c ode < 0x100 ){// c ode already in table// then simply output the c odelzw->suffix = (BYTE)c ode;}else{if( c ode < lzw->c ode){// c ode also in table// then output c ode c haintmp = lzw->lp_prefix[ c ode ];while( tmp > 0x100 ){tmp = lzw->lp_prefix[ tmp ];}lzw->suffix = (BYTE)tmp;}else{// c ode == lzw->c ode// c ode not in table// add c ode into table// and out put c odetmp = lzw->prefix;while( tmp > 0x100 ){tmp = lzw->lp_prefix[ tmp ];}lzw->suffix = (BYTE)tmp;}}insert_2_table( lzw );out_c ode(c ode,out,lzw,sta c k);lzw->prefix = c ode;}}//------------------------------------------------------------------------------VOID de c ode( HANDLE h_sour, HANDLE h_dest ){LZW_DATAlzw;BUFFER_DATAin ;BUFFER_DATAout;STA C K_DATAsta c k;BOOLfirst_run;first_run = TRUE;lzw_c reate( &lzw ,h_sour,h_dest );buffer_c reate( &in );buffer_c reate( &out );sta c k_c reate(&sta c k );while( load_buffer( h_sour, &in ) ){if( first_run ){lzw.prefix = get_next_c ode( &in, &lzw );lzw.suffix = lzw.prefix;out_c ode(lzw.prefix, &out, &lzw , &sta c k);first_run = FALSE;}do_de c ode(&in , &out, &lzw, &sta c k);}empty_buffer( &lzw,&out);lzw_destory( &lzw );buffer_destory( &in );buffer_destory( &out );sta c k_destory( &sta c k);}#endif2下面给出一个应用上面模块的简单例子#in c lude#in c lude//------------------------------------------------------------------------------#in c lude "lzw.h"#in c lude "hash.h"#in c lude "fileio.h"#in c lude "en c ode.h"#in c lude "de c ode.h"//------------------------------------------------------------------------------HANDLE h_file_sour;HANDLE h_file_dest;HANDLE h_file;C HAR*file_name_in = "d:\\c ode.c";C HAR*file_name_out= "d:\\en c ode.e";C HAR*file_name= "d:\\de c ode.d";//------------------------------------------------------------------------------int main(int arg c, c har *argv[]){h_file_sour = file_handle(file_name_in);h_file_dest = file_handle(file_name_out);h_file= file_handle(file_name);en c ode(h_file_sour, h_file_dest); // de c ode(h_file_dest,h_file);C loseHandle(h_file_sour);C loseHandle(h_file_dest);C loseHandle(h_file);return 0;}。
用C++实现数据无损压缩、解压(使用LZW算法)小俊发表于 2008-9-10 14:50:00推荐LZW压缩算法由Lemple-Ziv-Welch三人共同创造,用他们的名字命名。
LZW就是通过建立一个字符串表,用较短的代码来表示较长的字符串来实现压缩。
LZW压缩算法是Unisys的专利,有效期到2003年,所以对它的使用是有限制的。
字符串和编码的对应关系是在压缩过程中动态生成的,并且隐含在压缩数据中,解压的时候根据表来进行恢复,算是一种无损压缩。
个人认为LZW很适用于嵌入式系统上。
因为:1、压缩和解压速度比较快,尤其是解压速度;2、占用资源少;3、压缩比也比较理想;4、适用于文本和图像等出现连续重复字节串的数据流。
LZW算法有一点比较特别,就是压缩过程中产生的字符串对应表,不需要保存到压缩数据中,因为这个表在解压过程中能自动生成回来。
LZW算法比较简单,我是按照这本书上写的算法来编程的:以下是源代码:class LZWCoder{private:struct TStr{char *string;unsigned int len;};TStr StrTable[4097];unsigned int ItemPt;unsigned int BytePt;unsigned char BitPt;unsigned char Bit[8];unsigned char Bits;unsigned int OutBytes;void InitStrTable();void CopyStr(TStr *d, TStr s);void StrJoinChar(TStr *s, char c);unsigned int InStrTable(TStr s);void AddTableEntry(TStr s);void WriteCode(char *dest, unsigned int b);unsigned int GetNextCode(char *src);void StrFromCode(TStr *s, unsigned int c);void WriteString(char *dest, TStr s);public:unsigned int Encode(char *src, unsigned int len, char *dest);unsigned int Decode(char *src, unsigned int *len, char *dest);LZWCoder();~LZWCoder();};void LZWCoder::InitStrTable(){unsigned int i;for(i = 0; i < 256; i ++){StrTable[i].string = (char *)realloc(StrTable[i].string, 1);StrTable[i].string[0] = i;StrTable[i].len = 1;}StrTable[256].string = NULL;StrTable[256].len = 0;StrTable[257].string = NULL;StrTable[257].len = 0;ItemPt = 257;Bits = 9;}void LZWCoder::CopyStr(TStr *d, TStr s){unsigned int i;d->string = (char *)realloc(d->string, s.len);for(i = 0; i < s.len; i ++)d->string[i] = s.string[i];d->len = s.len;}void LZWCoder::StrJoinChar(TStr *s, char c){s->string = (char *)realloc(s->string, s->len + 1);s->string[s->len ++] = c;}unsigned int LZWCoder::InStrTable(TStr s){unsigned int i,j;bool b;for(i = 0; i <= ItemPt; i ++){if(StrTable[i].len == s.len){b = true;for(j = 0; j < s.len; j ++)if(StrTable[i].string[j] != s.string[j]){b = false;break;}if(b) return i;}}return 65535;}void LZWCoder::AddTableEntry(TStr s){CopyStr(&StrTable[++ItemPt], s);void LZWCoder::WriteCode(char *dest, unsigned int b){unsigned char i;for(i = 0; i < Bits; i++){Bit[BitPt ++] = (b & (1 << (Bits - i - 1))) != 0;if(BitPt == 8){BitPt = 0;dest[BytePt ++] = (Bit[0] << 7)+ (Bit[1] << 6)+ (Bit[2] << 5)+ (Bit[3] << 4)+ (Bit[4] << 3)+ (Bit[5] << 2)+ (Bit[6] << 1)+ Bit[7];}}}unsigned int LZWCoder::GetNextCode(char *src){unsigned char i;unsigned int c = 0;for(i = 0; i < Bits; i ++){c = (c << 1) + ((src[BytePt] & (1 << (8 - (BitPt ++) - 1))) ! = 0);if(BitPt == 8){BitPt = 0;BytePt ++;}}return c;void LZWCoder::StrFromCode(TStr *s, unsigned int c){CopyStr(s, StrTable[c]);}void LZWCoder::WriteString(char *dest, TStr s){unsigned int i;for(i = 0; i < s.len; i++)dest[OutBytes ++] = s.string[i];}unsigned int LZWCoder::Encode(char *src, unsigned int len, char *dest){TStr Omega, t;char k;unsigned int i;unsigned int p;BytePt = 0;BitPt = 0;InitStrTable();WriteCode(dest, 256);Omega.string = NULL;Omega.len = 0;t.string = NULL;t.len = 0;for(i = 0; i < len; i ++){k = src[i];CopyStr(&t, Omega);StrJoinChar(&t, k);if(InStrTable(t) != 65535)CopyStr(&Omega, t);else{WriteCode(dest, InStrTable(Omega));AddTableEntry(t);switch(ItemPt){case 512: Bits = 10; break;case 1024: Bits = 11; break;case 2048: Bits = 12; break;case 4096: WriteCode(dest, 256); InitStrTable();}Omega.string = (char *)realloc(Omega.string, 1);Omega.string[0] = k;Omega.len = 1;}}WriteCode(dest, InStrTable(Omega));WriteCode(dest, 257);Bits = 7;WriteCode(dest, 0);free(Omega.string);free(t.string);return BytePt;}unsigned int LZWCoder::Decode(char *src, unsigned int *len, char *dest){unsigned int code, oldcode;TStr t, s;BytePt = 0;BitPt = 0;OutBytes = 0;t.string = NULL;t.len = 0;s.string = NULL;s.len = 0;InitStrTable();while((code = GetNextCode(src)) != 257){if(code == 256){InitStrTable();code = GetNextCode(src);if(code == 257) break;StrFromCode(&s, code);WriteString(dest, s);oldcode = code;}else{if(code <= ItemPt){StrFromCode(&s, code);WriteString(dest, s);StrFromCode(&t, oldcode);StrJoinChar(&t, s.string[0]);AddTableEntry(t);switch(ItemPt){case 511: Bit s = 10; break;case 1023: Bi ts = 11; break;case 2047: Bi ts = 12; break;}oldcode = code;}else{StrFromCode(&s, oldcode);StrJoinChar(&s, s.string[0]);WriteString(dest, s);AddTableEntry(s);switch(ItemPt){case 511: Bit s = 10; break;case 1023: Bi ts = 11; break;case 2047: Bi ts = 12; break;}oldcode = code;}}}free(t.string);free(s.string);*len = BytePt + (BitPt != 0);return OutBytes;}LZWCoder::LZWCoder(){unsigned int i;for(i = 0; i < 4097; i ++){StrTable[i].string = NULL;StrTable[i].len = 0;}}LZWCoder::~LZWCoder(){unsigned int i;for(i = 0; i < 4097; i ++)free(StrTable[i].string);}用法:LZWCoder *Coder;Coder = new LZWCoder();然后用Coder->Encode(char *src, unsigned int len, char *dest);Coder->Decode(char *src, unsigned int *len, char *dest);进行压缩或解压。
LZW编解码的VB界面化实现通俗的讲,LZW就是通过建立一个字符串表,用较短的代码来表示较长的字符串来实现压缩。
LZW压缩算法的基本原理:提取原始文本文件数据中的不同字符,基于这些字符创建一个编译表,然后用编译表中的字符的索引来替代原始文本文件数据中的相应字符,减少原始数据大小。
LZW编码算法的具体执行步骤如下:1. 开始时的字典包含所有可能的根,而当前前缀P是空的2. 当前的字符(C)=字符流中的下一个字符3. 判断缀-字符串P+C是否在字典中(1)如果在字典中:P=P+C(2)如果不在字典中,则:a. 把当前前缀P的码字输出到码字流b. 把缀-符串P+C添加到字典c. 令P=C4. 判断码字流中是否还有码字要译(1)如果有,返回到步骤2(2)如果没有,则:a. 把代表当前前缀P的码字输出到码字流b. 结束伪码描述:dic[j]<-all n single character ,j=1,2,3…nj=n+1prefix<-read first character in charstreamwhile ((C<-nextcharacter)!=null)beginif prefix.c is in dicprefix<-prefix.celsecodestream<-cW for Prefixdic[j]<-prefix.cj=n+1Prefix<-CEndcodestream <-cW for PrefixLZW译码算法的具体执行步骤如下:1.在开始译码时,词典包含所有可能的前缀根2.当前码字cW=码字流中的第一个码字3.输出当前缀-符串 .cW到字符流中4.先前码字pW=当前码字cW5.当前码字cW=码字流中的下一个码字6.判断当前缀-符串sting.cW是否在词典中:(1)如果是,则:a.当前缀-符串string.cW输出到字符流b.当前前缀P=先前缀-符串string.pWc.当前字符C=当前前缀-符串string.pW的第一个字符d.把缀-符串P+C添加到词典(2)如果否,则:a.当前前缀P=先前缀-符串string.pWb.当前字符C=当前缀-符串string.pW的第一个字符c.输出缀-符串P+C到字符流,然后把它添加到词典中7.判断码字流中是否还有码字要译(1)如果是,返回到第四步(2)否,结束伪码描述:Dic[j]<-all n single-character,j=1,2,3…nJ=n+1cW<-first code from codestreamcharstream<-dic[cW]pW<-cWwhile ((cW<-next code word)!=null)beginif cW is in diccharstream<-dic[cW]prefix<-dic[pW]cW<-first character of dic[cW]dic[j]<-prefix.cWj=n+1pW=cWelseprefix<-dic[pW]cW<-first character of prefixcharstream<-prefix.cWdic[j]<-prefix.CpW<-cWj<-n+1endend按照上述伪码描述,将其转为VB语言,并加上简单界面。
实验2 用C语言实现LZW编码1.实验目的1)通过实验进一步掌握LZW编码的原理2)能正确C语言实现LZW编、解码2.实验要求给出字符,能正确输出编码,并能进行译码3.实验内容1)编码过程LZW编码是围绕称为词典的转换表来完成的。
这张转换表用来存放称为前缀(Prefix)的字符序列,并且为每个表项分配一个码字(Code word),或者叫做序号,如表6所示。
这张转换表实际上是把8位ASCII字符集进行扩充,增加的符号用来表示在文本或图像中出现的可变长度ASCII字符串。
扩充后的代码可用9位、10位、11位、12位甚至更多的位来表示。
Welch的论文中用了12位,12位可以有4096个不同的12位代码,这就是说,转换表有4096个表项,其中256个表项用来存放已定义的字符,剩下3840个表项用来存放前缀(Prefix)。
表6 词典LZW编码器(软件编码器或硬件编码器)就是通过管理这个词典完成输入与输出之间的转换。
LZW编码器的输入是字符流(Charstream),字符流可以是用8位ASCII字符组成的字符串,而输出是用n位(例如12位)表示的码字流(Codestream),码字代表单个字符或多个字符组成的字符串。
LZW编码器使用了一种很实用的分析(parsing)算法,称为贪婪分析算法(greedy parsing algorithm)。
在贪婪分析算法中,每一次分析都要串行地检查来自字符流(Charstream)的字符串,从中分解出已经识别的最长的字符串,也就是已经在词典中出现的最长的前缀(Prefix)。
用已知的前缀(Prefix)加上下一个输入字符C也就是当前字符(Current character)作为该前缀的扩展字符,形成新的扩展字符串——缀-符串(String):Prefix.C。
这个新的缀-符串(String)是否要加到词典中,还要看词典中是否存有和它相同的缀-符串String。
如果有,那么这个缀-符串(String)就变成前缀(Prefix),继续输入新的字符,否则就把这个缀-符串(String)写到词典中生成一个新的前缀(Prefix),并给一个代码。
毕业设计(论文)题目LZW压缩算法的软件实现学生姓名学号专业计算机科学与技术班级指导教师评阅教师完成日期2008 年5 月18 日LZW压缩算法的研究与实现摘要随着计算机技术和网络技术的发展,人们需要存储和传播的数据越来越多,因此我们可以通过压缩数据来提高数据存储与传播的效率。
本系统按照LZW算法压缩原理,使用Microsoft Visual C++ 6.0 进行了开发,完成了压缩函数和解压缩函数的编写,实现了文件的无损压缩和解压缩。
关键字无损压缩解压缩LZWResearching and Implementing of LosslessCompression AlgorithmsABSTRACTAlong with the development of computer technical and network technology, the data that people need to stock and propagate is more and more. These datas have occupied plenty of disk spaces and network bandwidthes. However in data, there is also a lot of redundancies, therefore we may decrease the disk space of data occupation and the bandwidth with network occupied on transmit through compress data to stock. Data compression divides into loss compression and lossless compression, data reconciles before reducing to reduce rear content the compression that does not occur any change is called as lossless compression.Through long development has arisen a lot of lossless data compressed algorithms, we have compare various relatively lossless compression algorithm, has reached the advantage and disadvantage of each kind of algorithm.This system is used Microsoft Visual C++ 6.0 developed. According to LZW algorithms, we have accomplished the Compresses function and Decompresses function, and realizes lossless compression and decompress file.Keyword: Lossless compression Decompress LZW目录摘要 (II)ABSTRACT ...................................................................................................... I II 引言 (1)第1章系统需求分析 (3)1.1功能需求 (3)1.2性能需求 (3)1.3无损压缩算法的简介和比较 (4)1.3.1 LZ77算法 (4)1.3.2 LZSS算法 (6)1.3.3 LZ78算法 (8)1.3.4 LZW算法 (11)1.3.5 各种算法的比较 (17)1.4本课题的目标 (19)1.5系统开发环境 (19)第2章系统设计 (20)2.1系统结构 (2)2.2压缩文件格式的设计 (22)2.3开发方法的说明 (22)2.4各模块设计 (23)2.5算法分析 (26)第3章系统的实现 (29)3.1系统界面和主要功能 (29)3.2测试 (35)第4章结论 (37)致谢 (39)参考文献 (39)附录:主要源程序 (40)引言21世纪是一个属于网络信息高速发展的世纪,随着人们对网络的使用频率迅速提升,网络所负载的信息压力也越来越大,这主要体现在人们需要存储和传输的数据会越来越多。
程序由五个模块组成lzw.h 定义了一些基本的数据结构,常量,还有变量的初始化等。
#ifndef __LZW_H__#define __LZW_H__//------------------------------------------------------------------------------#include <stdio.h>#include <stdlib.h>#include <windows.h>#include <memory.h>//------------------------------------------------------------------------------#define LZW_BASE 0x102// The code base#define CODE_LEN 12 // Max code length#define TABLE_LEN 4099 // It must be prime number and bigger than 2^CODE_LEN=4096.// Such as 5051 is also ok.#define BUFFERSIZE 1024//------------------------------------------------------------------------------typedef struct{HANDLE h_sour; // Source file handle.HANDLE h_dest; // Destination file handle.HANDLE h_suffix; // Suffix table handle.HANDLE h_prefix; // Prefix table handle.HANDLE h_code; // Code table handle.LPWORD lp_prefix; // Prefix table head pointer.LPBYTE lp_suffix; // Suffix table head pointer.LPWORD lp_code; // Code table head pointer.WORD code;WORD prefix;BYTE suffix;BYTE cur_code_len; // Current code length.[ used in Dynamic-Code-Length mode ] }LZW_DATA,*PLZW_DATA;typedef struct{WORD top;WORD index;LPBYTE lp_buffer;HANDLE h_buffer;BYTE by_left;DWORD dw_buffer;BOOL end_flag;}BUFFER_DATA,*PBUFFER_DATA;typedef struct //Stack used in decode{WORD index;HANDLE h_stack;LPBYTE lp_stack;}STACK_DATA,*PSTACK_DATA;//------------------------------------------------------------------------------ VOID stack_create( PSTACK_DATA stack ){stack->h_stack = GlobalAlloc( GHND , TABLE_LEN*sizeof(BYTE) ); stack->lp_stack = GlobalLock( stack->h_stack );stack->index = 0;}//------------------------------------------------------------------------------ VOID stack_destory( PSTACK_DATA stack ){GlobalUnlock( stack->h_stack );GlobalFree ( stack->h_stack );}//------------------------------------------------------------------------------ VOID buffer_create( PBUFFER_DATA buffer ){buffer->h_buffer = GlobalAlloc( GHND, BUFFERSIZE*sizeof(BYTE) );buffer->lp_buffer = GlobalLock( buffer->h_buffer );buffer->top = 0;buffer->index = 0;buffer->by_left = 0;buffer->dw_buffer = 0;buffer->end_flag = FALSE;}//------------------------------------------------------------------------------VOID buffer_destory( PBUFFER_DATA buffer ){GlobalUnlock( buffer->h_buffer );GlobalFree ( buffer->h_buffer );}//------------------------------------------------------------------------------VOID re_init_lzw( PLZW_DATA lzw ) //When code table reached its top it should { //be reinitialized.memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );lzw->code = LZW_BASE;lzw->cur_code_len = 9;}//------------------------------------------------------------------------------VOID lzw_create(PLZW_DATA lzw, HANDLE h_sour, HANDLE h_dest) {WORD i;lzw->h_code = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );lzw->h_prefix = GlobalAlloc( GHND, TABLE_LEN*sizeof(WORD) );lzw->h_suffix = GlobalAlloc( GHND, TABLE_LEN*sizeof(BYTE) );lzw->lp_code = GlobalLock( lzw->h_code );lzw->lp_prefix = GlobalLock( lzw->h_prefix );lzw->lp_suffix = GlobalLock( lzw->h_suffix );lzw->code = LZW_BASE;lzw->cur_code_len = 9;lzw->h_sour = h_sour;lzw->h_dest = h_dest;memset( lzw->lp_code, 0xFFFF, TABLE_LEN*sizeof(WORD) );}//------------------------------------------------------------------------------VOID lzw_destory(PLZW_DATA lzw){GlobalUnlock( lzw->h_code );GlobalUnlock( lzw->h_prefix );GlobalUnlock( lzw->h_suffix );GlobalFree( lzw->h_code );GlobalFree( lzw->h_prefix );GlobalFree( lzw->h_suffix );}//------------------------------------------------------------------------------#endiffileio.h 定义了一些文件操作#ifndef __FILEIO_H__#define __FILEIO_H__//------------------------------------------------------------------------------#include <stdio.h>#include <stdlib.h>#include <windows.h>//------------------------------------------------------------------------------HANDLE file_handle(CHAR* file_name){HANDLE h_file;h_file = CreateFile(file_name,GENERIC_READ|GENERIC_WRITE,FILE_SHARE_READ|FILE_SHARE_WRITE,NULL,OPEN_ALWAYS,0,NULL);return h_file;}//------------------------------------------------------------------------------WORD load_buffer(HANDLE h_sour, PBUFFER_DATA buffer) // Load file to buffer {DWORD ret;ReadFile(h_sour,buffer->lp_buffer,BUFFERSIZE,&ret,NULL);buffer->index = 0;buffer->top = (WORD)ret;return (WORD)ret;}//------------------------------------------------------------------------------WORD empty_buffer( PLZW_DATA lzw, PBUFFER_DATA buffer)// Output buffer to file{DWORD ret;if(buffer->end_flag) // The flag mark the end of decode{if( buffer->by_left ){buffer->lp_buffer[ buffer->index++ ] = (BYTE)( buffer->dw_buffer >> 32-buffer->by_left )<<(8-buffer->by_left); }}WriteFile(lzw->h_dest, buffer->lp_buffer,buffer->index,&ret,NULL);buffer->index = 0;buffer->top = ret;return (WORD)ret;}//------------------------------------------------------------------------------#endifhash.h 定义了压缩时所用的码表操作函数,为了快速查找使用了hash算法,还有处理hash冲突的函数#ifndef __HASH_H__#define __HASH_H__//------------------------------------------------------------------------------#include <stdio.h>#include <stdlib.h>#include <windows.h>//------------------------------------------------------------------------------#define DIV TABLE_LEN#define HASHSTEP 13 // It should bigger than 0.//------------------------------------------------------------------------------WORD get_hash_index( PLZW_DATA lzw ){DWORD tmp;WORD result;DWORD prefix;DWORD suffix;prefix = lzw->prefix;suffix = lzw->suffix;tmp = prefix<<8 | suffix;result = tmp % DIV;return result;}//------------------------------------------------------------------------------WORD re_hash_index( WORD hash ) // If hash conflict occured we must recalculate{ // hash index .WORD result;result = hash + HASHSTEP;result = result % DIV;return result;}//------------------------------------------------------------------------------BOOL in_table( PLZW_DATA lzw ) // To find whether current code is already in table. {BOOL result;WORD hash;hash = get_hash_index( lzw );if( lzw->lp_code[ hash ] == 0xFFFF ){result = FALSE;}else{if( lzw->lp_prefix[ hash ] == lzw->prefix &&lzw->lp_suffix[ hash ] == lzw->suffix ){result = TRUE;}else{result = FALSE;while( lzw->lp_code[ hash ] != 0xFFFF ){if( lzw->lp_prefix[ hash ] == lzw->prefix &&lzw->lp_suffix[ hash ] == lzw->suffix ){result = TRUE;break;}hash = re_hash_index( hash );}}}return result;}//------------------------------------------------------------------------------ WORD get_code( PLZW_DATA lzw ){WORD hash;WORD code;hash = get_hash_index( lzw );if( lzw->lp_prefix[ hash ] == lzw->prefix &&lzw->lp_suffix[ hash ] == lzw->suffix ){code = lzw->lp_code[ hash ];}else{while( lzw->lp_prefix[ hash ] != lzw->prefix ||lzw->lp_suffix[ hash ] != lzw->suffix ){hash = re_hash_index( hash );}code = lzw->lp_code[ hash ];}return code;}//------------------------------------------------------------------------------ VOID insert_table( PLZW_DA TA lzw ){WORD hash;hash = get_hash_index( lzw );if( lzw->lp_code[ hash ] == 0xFFFF ){lzw->lp_prefix[ hash ] = lzw->prefix;lzw->lp_suffix[ hash ] = lzw->suffix;lzw->lp_code[ hash ] = lzw->code;}else{while( lzw->lp_code[ hash ] != 0xFFFF ){hash = re_hash_index( hash );}lzw->lp_prefix[ hash ] = lzw->prefix;lzw->lp_suffix[ hash ] = lzw->suffix;lzw->lp_code[ hash ] = lzw->code;}}//------------------------------------------------------------------------------ #endifencode.h 压缩程序主函数#ifndef __ENCODE_H__#define __ENCODE_H__//------------------------------------------------------------------------------#include <stdio.h>#include <stdlib.h>#include <windows.h>//------------------------------------------------------------------------------VOID output_code( DWORD code ,PBUFFER_DA TA out, PLZW_DATA lzw) {out->dw_buffer |= code << ( 32 - out->by_left - lzw->cur_code_len );out->by_left += lzw->cur_code_len;while( out->by_left >= 8 ){if( out->index == BUFFERSIZE ){empty_buffer( lzw,out);}out->lp_buffer[ out->index++ ] = (BYTE)( out->dw_buffer >> 24 );out->dw_buffer <<= 8;out->by_left -= 8;}}//------------------------------------------------------------------------------VOID do_encode( PBUFFER_DA TA in, PBUFFER_DA TA out, PLZW_DA TA lzw) {WORD prefix;while( in->index != in->top ){if( !in_table(lzw) ){// current code not in code table// then add it to table and output prefixinsert_table(lzw);prefix = lzw->suffix;output_code( lzw->prefix ,out ,lzw );lzw->code++;if( lzw->code == (WORD)1<< lzw->cur_code_len ){// code reached current code top(1<<cur_code_len)// then current code length add onelzw->cur_code_len++;if( lzw->cur_code_len == CODE_LEN + 1 ){re_init_lzw( lzw );}}}else{// current code already in code table// then output nothingprefix = get_code(lzw);}lzw->prefix = prefix;lzw->suffix = in->lp_buffer[ in->index++ ];}}//------------------------------------------------------------------------------ VOID encode(HANDLE h_sour,HANDLE h_dest){LZW_DATA lzw;BUFFER_DATA in ;BUFFER_DATA out;BOOL first_run = TRUE;lzw_create( &lzw ,h_sour,h_dest );buffer_create( &in );buffer_create( &out );while( load_buffer( h_sour, &in ) ){if( first_run ){// File length should be considered but here we simply// believe file length bigger than 2 bytes.lzw.prefix = in.lp_buffer[ in.index++ ];lzw.suffix = in.lp_buffer[ in.index++ ];first_run = FALSE;}do_encode(&in , &out, &lzw);}output_code(lzw.prefix, &out , &lzw);output_code(lzw.suffix, &out , &lzw);out.end_flag = TRUE;empty_buffer( &lzw,&out);lzw_destory( &lzw );buffer_destory( &in );buffer_destory( &out );}//------------------------------------------------------------------------------ #endifdecode.h 解压函数主函数#ifndef __DECODE_H__#define __DECODE_H__//------------------------------------------------------------------------------#include <stdio.h>#include <stdlib.h>#include <windows.h>//------------------------------------------------------------------------------VOID out_code( WORD code ,PBUFFER_DATA buffer,PLZW_DATA lzw,PSTACK_DATA stack) {WORD tmp;if( code < 0x100 ){stack->lp_stack[ stack->index++ ] = code;}else{stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ code ];tmp = lzw->lp_prefix[ code ];while( tmp > 0x100 ){stack->lp_stack[ stack->index++ ] = lzw->lp_suffix[ tmp ];tmp = lzw->lp_prefix[ tmp ];}stack->lp_stack[ stack->index++ ] = (BYTE)tmp;}while( stack->index ){if( buffer->index == BUFFERSIZE ){empty_buffer(lzw,buffer);}buffer->lp_buffer[ buffer->index++ ] = stack->lp_stack[ --stack->index ] ; }}//------------------------------------------------------------------------------VOID insert_2_table(PLZW_DATA lzw ){lzw->lp_code[ lzw->code ] = lzw->code;lzw->lp_prefix[ lzw->code ] = lzw->prefix;lzw->lp_suffix[ lzw->code ] = lzw->suffix;lzw->code++;if( lzw->code == ((WORD)1<<lzw->cur_code_len)-1 ){lzw->cur_code_len++;if( lzw->cur_code_len == CODE_LEN+1 )lzw->cur_code_len = 9;}if(lzw->code >= 1<<CODE_LEN ){re_init_lzw(lzw);}}//------------------------------------------------------------------------------ WORD get_next_code( PBUFFER_DA TA buffer , PLZW_DATA lzw ) {BYTE next;WORD code;while( buffer->by_left < lzw->cur_code_len ){if( buffer->index == BUFFERSIZE ){load_buffer( lzw->h_sour, buffer );}next = buffer->lp_buffer[ buffer->index++ ];buffer->dw_buffer |= (DWORD)next << (24-buffer->by_left);buffer->by_left += 8;}code = buffer->dw_buffer >> ( 32 - lzw->cur_code_len );buffer->dw_buffer <<= lzw->cur_code_len;buffer->by_left -= lzw->cur_code_len;return code;}//------------------------------------------------------------------------------VOID do_decode( PBUFFER_DA TA in, PBUFFER_DA TA out, PLZW_DATA lzw, PSTACK_DATA stack) {WORD code;WORD tmp;while( in->index != in->top ){code = get_next_code( in ,lzw );if( code < 0x100 ){// code already in table// then simply output the codelzw->suffix = (BYTE)code;}else{if( code < lzw->code ){// code also in table// then output code chaintmp = lzw->lp_prefix[ code ];while( tmp > 0x100 ){tmp = lzw->lp_prefix[ tmp ];}lzw->suffix = (BYTE)tmp;}else{// code == lzw->code// code not in table// add code into table// and out put codetmp = lzw->prefix;while( tmp > 0x100 ){tmp = lzw->lp_prefix[ tmp ];}lzw->suffix = (BYTE)tmp;}}insert_2_table( lzw );out_code(code,out,lzw,stack);lzw->prefix = code;}}//------------------------------------------------------------------------------ VOID decode( HANDLE h_sour, HANDLE h_dest ){LZW_DATA lzw;BUFFER_DATA in ;BUFFER_DATA out;STACK_DATA stack;BOOL first_run;first_run = TRUE;lzw_create( &lzw ,h_sour,h_dest );buffer_create( &in );buffer_create( &out );stack_create(&stack );while( load_buffer( h_sour, &in ) ){if( first_run ){lzw.prefix = get_next_code( &in, &lzw );lzw.suffix = lzw.prefix;out_code(lzw.prefix, &out, &lzw , &stack);first_run = FALSE;}do_decode(&in , &out, &lzw, &stack);}empty_buffer( &lzw,&out);lzw_destory( &lzw );buffer_destory( &in );buffer_destory( &out );stack_destory( &stack);}#endif一个应用上面模块的简单例子#include <stdio.h>#include <stdlib.h>//------------------------------------------------------------------------------#include "lzw.h"#include "hash.h"#include "fileio.h"#include "encode.h"#include "decode.h"//------------------------------------------------------------------------------ HANDLE h_file_sour;HANDLE h_file_dest;HANDLE h_file;CHAR* file_name_in = "d:\\code.c";CHAR* file_name_out= "d:\\encode.e";CHAR* file_name = "d:\\decode.d";//------------------------------------------------------------------------------ int main(int argc, char *argv[]){h_file_sour = file_handle(file_name_in);h_file_dest = file_handle(file_name_out);h_file = file_handle(file_name);encode(h_file_sour, h_file_dest); // decode(h_file_dest,h_file);CloseHandle(h_file_sour);CloseHandle(h_file_dest);CloseHandle(h_file);return 0;}。