Title TEXT COMPRESSION USING HUFFMAN CODES. Author: rajarathnam.l Visitor Submitted Source Code Author Email: rajara2004@yahoo.co.in Description: The project provides the way of compressing the repeated text size by using concept of huffman encoding technique. Hits: 3891 Since 25th November, 2003 Code: Select and Copy the Code /* TEXT COMPRESSION - HUFFMAN CODING */ #include<stdio.h> #include<string.h> #include<conio.h> #define MAXSUM 50 #define MAXNODES 100 #define MAXLEN 15 enum direction{left,right}; enum boolean{false,true}; typedef class node*nodeptr; typedef short int symbolindex; typedef short int listptr; nodeptr root; class symbol { public: char c; int count; nodeptr ptr; operator int() { return(count); } }; class node { public: int weight; nodeptr lc,rc; nodeptr parent; direction dir; boolean isleaf; symbolindex index; node() { lc=NULL; rc=NULL; parent=NULL; weight=0; isleaf=false; index=0; } node(symbolindex i,symbol &s) { index=i; weight=s.count; lc=NULL; rc=NULL; parent=NULL; isleaf=true; s.ptr=this; } int getindex() { if(isleaf) return(index); else return(0); } void insertright(nodeptr n) { n->parent=this; n->dir=right; rc=n; isleaf=false; } void insertleft(nodeptr n) { n->parent=this; n->dir=left; lc=n; isleaf=false; } }; class nodelist { private: listptr start,avail,prev; int cnt; nodeptr info[MAXNODES]; listptr link[MAXNODES]; public: nodelist(); void add(nodeptr n); nodeptr least(); boolean isempty() { return(cnt==0)?true:false; } }; nodelist::nodelist() { int i; start=1; link[start]=0; info[start]=NULL; avail=2; cnt=0; for(i=2;i<MAXNODES;i++) { link[i]=i+1; } } void nodelist::add(nodeptr n) { listptr ptr=link[start]; prev=start; listptr temp=avail; avail=link[avail]; info[temp]=n; while((ptr!=0 && (info[ptr]->weight<info[temp]->weight))) { prev=ptr; ptr=link[ptr]; } link[prev]=temp; link[temp]=ptr; cnt++; } nodeptr nodelist::least() { listptr temp=link[start]; cnt--; if(temp==0) return(NULL); link[start]=link[temp]; link[temp]=avail; avail=temp; return(info[temp]); } class symtab { private: symbol sym[MAXSUM]; public: int count; symtab() { count=0; } int find(char s); symbol & operator[](int index) { return(sym[index]); } void add(char c,int n); char*getcode(int index); }; int symtab::find(char s) { int i; for(i=0;i<count;i++) if(sym[i].c==s) return i; return(-1); } void symtab::add(char c,int n) { symbol s; s.count=n; s.c=c; sym[count++]=s; } char *symtab::getcode(int index) { int pos=MAXLEN; static char buffer[MAXLEN+1]; buffer[pos]='\0'; nodeptr ptr=sym[index].ptr; while(ptr!=root) { if(ptr->dir==left) buffer[--pos]='0'; else buffer[--pos]='1'; ptr=ptr->parent; } return(&buffer[pos]); } nodelist l; symtab stab; void initlist() { symbolindex i; nodeptr ptr; for(i=0;i<stab.count;i++) { ptr=new node(i,stab[i]); l.add(ptr); } }; void huffman() { nodeptr temp,lch,rch; for(;;) { lch=l.least(); rch=l.least(); temp=new node(); if(rch==NULL) { temp=lch; break; } temp->weight=lch -> weight+rch->weight; temp->insertleft(lch); temp->insertright(rch); l.add(temp); } root=temp; } void decode(char *encode) { char ch; int pos=0,i; nodeptr ptr; for(;;) { ptr=root; while(!ptr->isleaf) { ch=encode[pos++]; if(!ch) return; if(ch=='1') ptr=ptr->rc; else if(ch=='0') ptr=ptr->lc; else printf("unknown:%c",ch); } i=ptr->index; printf("%c",stab[i].c); } } void main(void) { symbolindex i; int pos=0,len; char symbol,message[256]; char encoded[1000]="\0"; clrscr(); printf("\n\t\t TEXT COMPRESSION USING HUFFMAN CODES \n\n"); printf("Enter Any Message : "); gets(message); while ( symbol = message[pos++]) { if((i=stab.find(symbol))<0) stab.add(symbol,1); else stab[i].count++; } len=pos-1; initlist(); huffman(); printf("\nCoding :\n"); printf("\n\t\tCharacter Occurence Code \n"); for(i=0;i<stab.count;i++) printf("\t\t %c = %d = %s \n",stab[i].c,stab[i].count,stab.getcode(i)); for(pos=0;pos<len;pos++) { symbol=message[pos]; strcat(encoded,stab.getcode(stab.find(symbol))); } printf("\nReport :\n\n"); printf(" The Length Of Uncompressed Text : %d\n",strlen(message)*8); printf(" The Length Of compressed text : %d\n",strlen(encoded)); printf(" The Encoded Version : %s\n",encoded); printf(" The Decoded Version : "); decode(encoded); getch(); } Output: TEXT COMPRESSION USING HUFFMAN CODES Enter Any Message : she sells sea shells on the sea shore Coding : Character Occurence Code s = 8 = 10 h = 4 = 011 e = 7 = 00 = 7 = 111 l = 4 = 010 a = 2 = 11011 o = 2 = 11010 n = 1 = 11000 t = 1 = 110011 r = 1 = 110010 Report : The Length Of Uncompressed Text : 296 The Length Of compressed text : 112 The Encoded Version : 1001100111100001001010111100011011111100110 001001010111110101100011111001101100111100011011111100111101011001000 The Decoded Version : she sells sea shells on the sea shore