之前看到cppblog一篇關(guān)于huffman的文,和我今早的一個(gè)夢(mèng)不謀而合。我記得似乎曾經(jīng)給前女友寫(xiě)過(guò)一個(gè)Huffman的課程大作業(yè),花了當(dāng)天晚上的一些時(shí)間,只是為了完成任務(wù)而寫(xiě)的,草草的回憶了一下huffman的原理,然后就開(kāi)始寫(xiě)了,當(dāng)時(shí)因?yàn)樗淖鳂I(yè)并沒(méi)要求規(guī)模,我只把控制臺(tái)輸入端作為文件輸入,先壓縮再解壓,并且把所有中間過(guò)程輸出。
我知道自己有個(gè)弱點(diǎn),當(dāng)自己不把模塊劃分得很細(xì),那么DEBUG的時(shí)間就會(huì)變得相對(duì)長(zhǎng)一些,但是那一次寫(xiě)哈夫曼編碼就完全是認(rèn)為這種作業(yè)就不必劃分得太細(xì)也能很容易實(shí)現(xiàn)。結(jié)果今早由于夢(mèng)見(jiàn)一個(gè)詭異的夢(mèng)(夢(mèng)見(jiàn)自己得了絕癥,她那弱不禁風(fēng)的身影在夢(mèng)里不斷浮現(xiàn)...),又看到有人寫(xiě)了篇哈夫曼編碼的隨筆,我就重新打開(kāi)那個(gè)我寫(xiě)的工程運(yùn)行了一次。
嗯,運(yùn)行正確,然后我找來(lái)一篇英文小說(shuō)復(fù)制了一大片進(jìn)去,結(jié)果bug出現(xiàn)了。接著是我不斷找bug的過(guò)程(比如當(dāng)時(shí)規(guī)模設(shè)置得太小,只是為了給她應(yīng)付作業(yè);再比如bit位批量處理類(lèi)型等等細(xì)節(jié)...)
在搞定兩個(gè)bug,最終還有一個(gè)詭異的bug沒(méi)解決,I give up. 我知道這個(gè)錯(cuò)誤之所以難以找到,是因?yàn)樽约簽榱穗S意發(fā)揮違背自己歷來(lái)遵從的編程原則,就算找到那個(gè)bug也無(wú)濟(jì)于事,不如以后不再任性,把模塊按down-top、top-down劃分(這種方法幾乎能讓我不出任何錯(cuò)),并且寫(xiě)代碼的時(shí)候,為每一次項(xiàng)目的測(cè)試單元和調(diào)試代碼做準(zhǔn)備。
當(dāng)時(shí)自己任性寫(xiě)的代碼如下(注釋是很全面,因?yàn)楫?dāng)時(shí)要給她去交作業(yè),但是卻發(fā)現(xiàn)那些注釋只是給她老師看的,而非給我自己看的....因?yàn)槲覜](méi)有在注釋里面詳細(xì)定義哈夫曼編碼壓縮后的壓縮文件格式信息):
(以下代碼的input不能直接處理Unicode,寫(xiě)成重定向到文件可以)
嗯嗯,我還把自己這段代碼復(fù)制到百度知道回答別人的問(wèn)題過(guò)~~真是貽害四方瓦。
1 #include <iostream> 2 3 using namespace std; 4 5 #define MAX_FILE 50000//假設(shè)的文件最大長(zhǎng)度 6 #define MAXLIST 256//最大MAP值 7 #define MAX_HUFFMAN_LENGTH 100//哈夫曼編碼長(zhǎng)度 8 unsigned char dictionary[MAXLIST][2]= {0};//Hash映射,[][0]為權(quán)值,[][1]為字符 9 unsigned char fileContent[MAX_FILE];//處理的字符串大小 10 int Huffman[MAXLIST][MAX_HUFFMAN_LENGTH]= {2};//哈夫曼編碼序列 11 unsigned char HuffmanList[MAXLIST]= {0};//哈夫曼編碼對(duì)應(yīng)的字符有序序列 12 unsigned char HuffFileCode[MAX_FILE]= {0};//哈夫曼編碼字符串序列 13 unsigned char HuffFile[MAX_FILE]= {0}; 14 //編碼到假設(shè)的文件的哈夫曼壓縮格式: 依次存儲(chǔ) 原字符串長(zhǎng)度(1字節(jié)存儲(chǔ):可擴(kuò)展到2字節(jié))、哈夫曼編碼數(shù)(1字節(jié))、每個(gè)哈夫曼編碼的長(zhǎng)度序列、每個(gè)哈夫曼編碼對(duì)應(yīng)的字符序列、編碼過(guò)的哈夫曼字符串 15 16 unsigned char GetFile[MAX_FILE]= {0};//解碼序列 17 18 void ShellSort(unsigned char pData[MAXLIST][2],int Count)//Shell排序,用于準(zhǔn)備有序化要構(gòu)造的編碼權(quán)值構(gòu)造哈夫曼樹(shù)做準(zhǔn)備 19  { 20 int step[4]= {9,5,3,1};//增量序列 21 22 int iTemp,cTemp; 23 int k,s,w; 24 for(int i=0;i<4;i++) 25 { 26 k=step[i]; 27 s=-k; 28 for(int j=k;j<Count;j++) 29 {iTemp=pData[j][0]; 30 cTemp=pData[j][1]; 31 w=j-k; 32 if(s==0) 33 { 34 s=-k; 35 s++; 36 pData[s][0]=iTemp; 37 pData[s][1]=cTemp; 38 } 39 while((iTemp<pData[w][0])&&(w>=0)&&(w<=Count)) 40 { 41 pData[w+k][0]=pData[w][0];//權(quán)值交換 42 pData[w+k][1]=pData[w][1];//字符交換 43 w=w-k; 44 } 45 pData[w+k][0]=iTemp; 46 pData[w+k][1]=cTemp; 47 } 48 } 49 } 50 51 52 struct TNode//哈夫曼樹(shù)結(jié)點(diǎn) 53  { 54 TNode* pNode; 55 TNode* lNode; 56 TNode* rNode; 57 unsigned char dictionary; 58 unsigned char weight; 59 TNode(unsigned char dic,unsigned char wei) 60 { 61 pNode=0; 62 lNode=0; 63 rNode=0; 64 dictionary=dic; 65 weight=wei; 66 } 67 }; 68 69 struct LNode//鏈表結(jié)點(diǎn),用于存儲(chǔ)哈夫曼樹(shù)結(jié)點(diǎn),進(jìn)而構(gòu)造哈夫曼樹(shù)(保證每一步鏈表結(jié)點(diǎn)包含的哈夫曼結(jié)點(diǎn)都是有序的) 70  { 71 LNode* prev; 72 LNode* next; 73 TNode* tnode; 74 LNode() 75 { 76 prev=next=0; 77 tnode=0; 78 } 79 }; 80 81 int len=0;//哈夫曼編碼數(shù) 82 int deep=-1;//深度 83 void Preorder(TNode * p);//前序遍歷 84 void byLeft(TNode*p)//經(jīng)由左結(jié)點(diǎn) 85  { 86 deep++; 87 Huffman[len][deep]=0; 88 Preorder(p); 89 90 Huffman[len][deep]=2; 91 deep--; 92 } 93 void byRight(TNode*p)//經(jīng)由右結(jié)點(diǎn) 94  { 95 96 deep++; 97 Huffman[len][deep]=1; 98 Preorder(p); 99 100 Huffman[len][deep]=2; 101 deep--; 102 } 103 void Preorder(TNode * p) 104  { 105 106 if(p->lNode!=0)//當(dāng)左子結(jié)點(diǎn)非空則遍歷 107 { 108 109 byLeft(p->lNode); 110 } 111 if(p->rNode!=0)//當(dāng)右子結(jié)點(diǎn)非空則遍歷 112 { 113 byRight(p->rNode); 114 } 115 116 117 118 if((p->lNode==0)&&(p->rNode==0))//當(dāng)左右結(jié)點(diǎn)都為空,則增加哈夫曼編碼數(shù)到另一個(gè)記錄 119 { 120 121 Huffman[len][deep+1]=2; 122 int i=0; 123 for(;Huffman[len][i]!=2;i++) 124 { 125 Huffman[len+1][i]=Huffman[len][i]; 126 } 127 Huffman[len+1][i]=2; 128 129 HuffmanList[len]=p->dictionary; 130 131 132 133 len++; 134 } 135 136 } 137 138 139 unsigned char generateOne(int k)//產(chǎn)生k個(gè)連續(xù)1的二進(jìn)制串,比如111,1111,111111,用于編碼進(jìn)假設(shè)的文件 140  { 141 unsigned char c=0; 142 for(;k!=0;k--) 143 { 144 c|=(1<<(k-1)); 145 146 } 147 return c; 148 } 149 150 int compareBits(unsigned char b1,unsigned char b2,unsigned char c,int l,int d)//判斷由 [b1,b2] 組成的16位二進(jìn)制數(shù)以d為起點(diǎn),是否是長(zhǎng)度為l的c二進(jìn)制串(哈夫曼編碼)的前綴 151  { 152 unsigned __int8 t=(((((0x00ff&b1)<<8)|(0x00ff&b2))>>(8-d))&0x00ff); 153 return (((t)&((generateOne(l)<<(8-l))&0xff))==((c<<(8-l))&0xff)); 154 } 155 void input(unsigned char* _list,unsigned char end) 156  { 157 158 int i=0; 159 for(;(_list[i]=cin.get())!=end;i++); 160 _list[i]='\0'; 161 162 } 163 164 int main() 165  { 166 167 /**//* 或許假定的文件字符串向量中的字符串 */ 168 cout<<"請(qǐng)輸入要壓縮的字符串:"; 169 input(fileContent,'$'); 170 171 unsigned int fileLen=0; 172 173 174 /**//* Hash進(jìn)dictionary */ 175 for(int i=0;fileContent[i]!='\0';i++,fileLen++) 176 { 177 178 ++dictionary[fileContent[i]][0]; 179 dictionary[fileContent[i]][1]=fileContent[i]; 180 } 181 unsigned int len=0; 182 183 /**//* 把Hash了的dictionary向前靠攏 */ 184 185 186 { 187 for(int i=0;i!=MAXLIST;i++) 188 { 189 190 if(dictionary[i][0]!=0) 191 { 192 dictionary[len][0]=dictionary[i][0]; 193 dictionary[len][1]=dictionary[i][1]; 194 len++; 195 } 196 } 197 } 198 cout<<"哈夫曼編碼個(gè)數(shù):"<<len<<endl; 199 /**//* 對(duì)dictionary按權(quán)值進(jìn)行排序 */ 200 201 ShellSort(dictionary,len); 202 203 /**//* 構(gòu)造鏈表,鏈表中放有序dictionary權(quán)值的樹(shù)結(jié)點(diǎn) */ 204 LNode* head=new LNode,*p=head; 205 head->next=new LNode; 206 TNode *tempTNode=new TNode(dictionary[0][1],dictionary[0][0]); 207 head->tnode=tempTNode; 208 209 210 { 211 for(int i=0;i!=len-1;i++) 212 { 213 p->next->prev=p->next; 214 p=p->next; 215 216 p->next=new LNode; 217 tempTNode=new TNode(dictionary[i+1][1],dictionary[i+1][0]); 218 p->tnode=tempTNode; 219 } 220 } 221 delete p->next; 222 p->next=0; 223 224 /**//* 每次最小權(quán)值的前面兩個(gè)鏈表結(jié)點(diǎn)中的樹(shù)結(jié)點(diǎn)組成一個(gè)子樹(shù),子樹(shù)有合權(quán)值,子數(shù)的根按權(quán)值排序進(jìn)鏈表*/ 225 226 for(p=head;p->next!=0;) 227 { 228 229 p->tnode->pNode=new TNode('\0',(p->tnode->weight)+(p->next->tnode->weight)); 230 p->next->tnode->pNode=p->tnode->pNode; 231 p->tnode->pNode->lNode=p->tnode; 232 p->tnode->pNode->rNode=p->next->tnode; 233 head=p->next; 234 delete p; 235 p=head; 236 p->tnode=p->tnode->pNode; 237 for(LNode* t=head;t->next!=0;t=t->next) 238 { 239 if(t->tnode->weight>t->next->tnode->weight) 240 { 241 TNode* k=t->tnode; 242 t->tnode=t->next->tnode; 243 t->next->tnode=k; 244 } 245 } 246 247 } 248 249 250 int code[MAX_FILE],h=0; 251 252 253 254 /**//* 前序遍歷構(gòu)造哈夫曼編碼 */ 255 Preorder(p->tnode); 256 257 { 258 for(int i=0;i!=len;i++) 259 dictionary[HuffmanList[i]][0]=i; 260 } 261 262 int codeLen=0,total=0; 263 { 264 265 for(int i=0;i!=fileLen;i++) 266 { 267 268 unsigned int j=dictionary[fileContent[i]][0]; 269 270 271 for(int k=0;Huffman[j][k]!=2;k++) 272 { 273 274 HuffFileCode[codeLen]|=(Huffman[j][k]<<(7-total%8)); 275 276 code[h++]=Huffman[j][k]; 277 278 if(((total+1)%8)==0) 279 { 280 281 HuffFile[4+len*3+2+codeLen]=HuffFileCode[codeLen]; 282 codeLen++; 283 } 284 total++; 285 } 286 287 288 289 } 290 } 291 292 HuffFile[4+len*3+2+codeLen]=HuffFileCode[codeLen]; 293 294 295 HuffFile[0]=((fileLen>>24)); 296 HuffFile[1]=((fileLen>>16)); 297 HuffFile[2]=((fileLen>>8)); 298 HuffFile[3]=((fileLen)); 299 300 301 302 /**//* 解壓縮假定的文件HuffFile成為原字符串序列 */ 303 cout<<"哈夫曼編碼序列:\n"; 304 305 /**//* 哈夫曼編碼表長(zhǎng)度 */ 306 HuffFile[4]=(len&0xff00)>>8; 307 HuffFile[5]=len&0x00ff; 308 309 310 311 { 312 for(int i=0,j=0;i!=len;i++,j=0) 313 { 314 315 for(;Huffman[i][j]!=2;j++); 316 317 HuffFile[3+i+2+1]=j; 318 HuffFile[3+i+2+2*len+1]=HuffmanList[i]; 319 320 321 for(int k=0;k!=j;k++) 322 { 323 324 cout<<Huffman[i][k]; 325 HuffFile[3+i+2+len+1]|=(Huffman[i][k]<<(j-1-k)); 326 327 } 328 cout<<":"<<HuffmanList[i]<<endl; 329 330 331 } 332 } 333 334 335 336 unsigned int packFileLen=(HuffFile[0]<<24)|(HuffFile[1]<<16)|(HuffFile[2]<<8)|(HuffFile[3]<<0); 337 unsigned int packCodeLen=((unsigned int)HuffFile[4]<<8)|((unsigned int)HuffFile[5]); 338 339 340 { 341 for(int i=0,j=0;i!=(packFileLen);i++) 342 { 343 for(int k=0;k!=packCodeLen;k++) 344 { 345 346 unsigned char l=HuffFile[3+2+k+1],d=j%8,b1=HuffFile[3+j/8+2+packCodeLen*3+1],b2=HuffFile[3+j/8+1+2+packCodeLen*3+1]; 347 348 unsigned char c=HuffFile[3+packCodeLen+2+k+1]; 349 350 if(compareBits(b1,b2,c,l,d)) 351 { 352 353 j+=HuffFile[3+2+k+1]; 354 355 GetFile[i]=HuffFile[3+2+packCodeLen*2+k+1]; 356 cout<<GetFile[i]; 357 358 break; 359 360 } 361 } 362 363 } 364 } 365 366 { 367 cout<<"哈夫曼壓縮后二進(jìn)制序列:"<<endl; 368 for(int i=0;i!=h;i++) 369 { 370 cout<<code[i]; 371 if((i+1)%8==0) 372 cout<<" "; 373 } 374 } 375 cout<<endl; 376 377 { 378 cout<<"哈夫曼壓縮打包假定文件格式二進(jìn)制的文本體現(xiàn):"; 379 cout<<endl; 380 for(int i=0;i!=packFileLen+packCodeLen*3;i++) 381 { 382 cout<<HuffFile[i]; 383 } 384 cout<<endl; 385 } 386 387 cout<<"原字節(jié)數(shù)為:"<<fileLen<<endl; 388 cout<<"壓縮后字節(jié)數(shù)為:"<<(h)/8+1<<endl; 389 cout<<"壓縮率為"<<((h/8.0+1)/fileLen)*100<<"%"<<endl; 390 391 392 { 393 394 cout<<"字符串字節(jié)數(shù)為:"<<(packFileLen)<<endl; 395 cout<<"字符串解壓序列為:"; 396 unsigned char buffer[MAX_FILE]; 397 int i; 398 for(i=0;i!=(packFileLen);i++) 399 { 400 buffer[i]=GetFile[i]; 401 } 402 buffer[i]=0; 403 cout<<buffer<<endl; 404 405 406 407 408 cout<<endl; 409 410 } 411 return 1; 412 }
執(zhí)行的case如下:
1 請(qǐng)輸入要壓縮的字符串:I ask the indulgence of the children who may read this boo 2 k for dedicating it to a grown-up. I have a serious reason: he is the best frie 3 nd I have in the world. I have another reason: this grown-up understands everyth 4 ing 5 even books about children. I have a third reason: he lives in France where he is 6 hungry and cold. He needs cheering up.$ 7 哈夫曼編碼個(gè)數(shù):30 8 哈夫曼編碼序列: 9 0000:t 10 00010000:F 11 00010001:H 12 00010010:<回車(chē)> 13 00010011:m 14 000101:b 15 00011:u 16 0010:s 17 0011:o 18 0100:i 19 0101:a 20 011:e 21 100000:I 22 100001:. 23 1000100:- 24 1000101:: 25 100011:w 26 1001:r 27 1010:h 28 1011:n 29 1100000:f 30 1100001:y 31 1100010:k 32 1100011:p 33 110010:l 34 110011:g 35 110100:c 36 110101:v 37 11011:d 38 111: 39 I ask the indulgence of the children who may read this book for dedicating it to 40 a grown-up. I have a serious reason: he is the best friend I have in the world 41 . I have another reason: this grown-up understands everything 42 even books about children. I have a third reason: he lives in France where he is 43 hungry and cold. He needs cheering up.哈夫曼壓縮后二進(jìn)制序列: 44 10000011 10101001 01100010 11100001 01001111 10100101 11101100 01111001 01100110 45 11101111 01000111 11001111 00000111 00001010 01111111 01001010 01001100 1011011 46 1 00101110 11111100 01110100 01111100 01001101 01110000 11111001 01101011 101111 47 10 00010100 10000101 11000101 00110011 11000101 11110000 00011100 11111101 10111 48 101 10100110 10001010 00001001 01111001 11110100 00001110 00000111 11010111 1110 49 0111 00100111 00011101 11000100 00011110 00111000 01111111 10000011 11010010 111 50 01010 11111010 11110010 01110010 10000110 00110010 11110010 11010100 10001110 11 51 100010 11111010 01111101 00001011 10000101 00111110 00101011 00100000 11111000 0 52 0100101 00011101 11101111 11000001 11101001 01110101 01111101 00101111 10000101 53 00111111 00011001 11001110 01011011 10000111 11000001 11101001 01110101 01111101 54 01101100 11000010 10011100 11111001 01101010 01000111 01110001 01111000 0101001 55 0 00010111 11001110 01001110 00111011 10001000 00111100 01111100 01110111 101101 56 11 00100100 00001011 01111011 00101110 11110101 01110011 10000100 00101001 00101 57 111 00110001 00100111 10101011 10111110 00101001 10011110 00100010 11101010 0010 58 1001 10001100 00111110 10010100 10011001 01101110 01011101 11000011 11100000 111 59 10100 10111010 10111110 10111100 00101001 00100111 01111110 01011010 10010001 11 60 011100 01011111 01001111 11100100 10011010 10110010 11101001 01111100 01000010 0 61 1010110 11110100 01111110 00111010 01110010 11111101 00111110 10000101 11101000 62 01110111 10011100 11100001 11101011 01111011 11111010 00011110 01011011 10000111 63 10001000 10111111 01101101 11101100 10111110 10010100 11011100 10100101 1110011 64 1 11000111 10001110 0001 65 哈夫曼壓縮打包假定文件格式二進(jìn)制的文本體現(xiàn): 66 67 68 69 (此處代碼被截?cái)?無(wú)法copy到剪貼板) 70 71 原字節(jié)數(shù)為:341 72 壓縮后字節(jié)數(shù)為:181 73 壓縮率為53.2258% 74 字符串字節(jié)數(shù)為:341 75 字符串解壓序列為:I ask the indulgence of the children who may read this book for 76 dedicating it to a grown-up. I have a serious reason: he is the best friend I 77 have in the world. I have another reason: this grown-up understands everything 78 even books about children. I have a third reason: he lives in France where he is 79 hungry and cold. He needs cheering up. 80
寫(xiě)到此處,我突然發(fā)現(xiàn)在進(jìn)行位處理的時(shí)候,我并沒(méi)有用一個(gè)更簡(jiǎn)化的方法(100行內(nèi)可以解決)- -bnr ,好吧 下次寫(xiě)一個(gè)通用的壓縮軟件。
|
|
隨筆:30
文章:0
評(píng)論:146
引用:0
| 日 | 一 | 二 | 三 | 四 | 五 | 六 |
---|
27 | 28 | 29 | 30 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 1 | 2 | 3 | 4 | 5 | 6 | 7 |
|
常用鏈接
留言簿(11)
隨筆分類(lèi)
隨筆檔案
文章分類(lèi)
相冊(cè)
搜索
最新評(píng)論

閱讀排行榜
評(píng)論排行榜
|
|