?? huffman_h.cpp
字號(hào):
#include "huffman_h.h"#include <math.h>void huffman_h::generate_codes(int num, const unsigned long* weights){ if (num <= 1 || weights == NULL) throw new huffman_exception("參數(shù)非法"); int heap_num, i, nonzero_count; // 分配生成Huffman樹(shù)時(shí)使用的堆結(jié)構(gòu),其大小是元素?cái)?shù)目的2倍 unsigned long* heap = new unsigned long[2*num]; if (heap == NULL) throw new huffman_exception("內(nèi)存不足"); // 將所有元素權(quán)值值(葉子結(jié)點(diǎn))復(fù)制到堆的后半部分,堆的前半部分置0 memcpy(heap + num, weights, sizeof(unsigned long)*num); memset((char *)heap, 0, num * sizeof (*heap)); // 將堆的前半部分視作指針,按順序指向后半部分的葉子結(jié)點(diǎn) // 同時(shí)統(tǒng)計(jì)權(quán)值非0的葉子結(jié)點(diǎn)數(shù)目 for (nonzero_count = i = 0; i < num; i++) if (heap[num + i]) heap[nonzero_count++] = num + i; if (pow(2.0, (double)max_code_len) < (double)nonzero_count) throw new huffman_exception("元素個(gè)數(shù)超出了預(yù)設(shè)的最大碼長(zhǎng)限制"); /* 將堆的前半部分視作指針,按照指針?biāo)傅臋?quán)值大小,把堆的前半部分組織成為 堆排序(Heap Sort)算法中定義的"堆",即:堆對(duì)應(yīng)一棵完全二叉樹(shù),且所有非葉 結(jié)點(diǎn)的值均不大于其子女的值,根結(jié)點(diǎn)的值是最小的.在這里,根結(jié)點(diǎn)是heap[0] 參見(jiàn)數(shù)據(jù)結(jié)構(gòu)或算法書(shū)籍中的堆排序(Heap Sort)算法介紹 */ heap_num = nonzero_count; for (i = heap_num / 2; i > 0; i--) { register int curr, child; curr = i; child = curr * 2; while (child <= heap_num) { if (child < heap_num && heap[heap[child]] < heap[heap[child - 1]]) child++; if (heap[heap[curr - 1]] > heap[heap[child - 1]]) { register unsigned long temp; temp = heap[child - 1]; heap[child - 1] = heap[curr - 1]; heap[curr - 1] = temp; curr = child; child = 2 * curr; } else break; } } /* 創(chuàng)建Huffman樹(shù) 這里,創(chuàng)建Huffman樹(shù)的過(guò)程利用了堆排序(Heap Sort)算法的基本原理,即根結(jié)點(diǎn)是 最小的元素,樹(shù)中最后一個(gè)元素是最大的元素.選出根結(jié)點(diǎn)后,把最后的元素調(diào)到根 結(jié)點(diǎn),從樹(shù)根到樹(shù)葉,讓最后的元素移動(dòng)到合適的位置,重新建成堆.這樣,總是可以 找出2個(gè)最小的子樹(shù),將這兩個(gè)子樹(shù)合并后,再作為新元素放到堆中.所有子樹(shù)都合并 后,Huffman樹(shù)就建成了.(參見(jiàn)數(shù)據(jù)結(jié)構(gòu)或算法書(shū)籍中的堆排序算法介紹) 這一段代碼的運(yùn)行結(jié)果是整個(gè)heap數(shù)組成了一棵Huffman樹(shù),heap[0]未用,樹(shù)根是 heap[1],其中保存所有權(quán)值值的總和, heap[2]..heap[num-1]對(duì)應(yīng)于所有根以外 的分支結(jié)點(diǎn),其中保存的是雙親結(jié)點(diǎn)的索引值, heap[num]..heap[num*2-1]對(duì)應(yīng)于所 有葉子結(jié)點(diǎn)(即所有要編碼的元素),其中保存的是雙親結(jié)點(diǎn)的索引值 */ // 為了建樹(shù)之后,對(duì)二叉樹(shù)進(jìn)行限制碼長(zhǎng)的調(diào)整,有必要在建樹(shù)過(guò)程中,每選出一個(gè) // 葉子結(jié)點(diǎn),就用數(shù)組index按順序記錄下元素位置 int* index = new int[num]; int index_i = 0; if (index == NULL) throw new huffman_exception("內(nèi)存不足"); /* 當(dāng)用于堆排序的二叉樹(shù)中還有結(jié)點(diǎn)時(shí)循環(huán) */ while (heap_num > 1) { int pos[2]; /* 循環(huán)2次,找出2個(gè)最小的子樹(shù),存入pos中 */ for (i = 0; i < 2; i++) { register int curr, child; /* 根結(jié)點(diǎn)就是最小的結(jié)點(diǎn) */ pos[i] = heap[0]; if (pos[i] >= num) // 如果是葉子結(jié)點(diǎn),就記錄 index[index_i++] = pos[i]; /* 將最后的結(jié)點(diǎn)移動(dòng)到根結(jié)點(diǎn)處,總結(jié)點(diǎn)數(shù)目減1 */ heap[0] = heap[--heap_num]; /* 以下是重建堆的過(guò)程 */ curr = 1; child = 2; while (child <= heap_num) { if (child < heap_num && heap[heap[child]] < heap[heap[child - 1]]) child++; if (heap[heap[curr - 1]] > heap[heap[child - 1]]) { register int temp; temp = heap[child - 1]; heap[child - 1] = heap[curr - 1]; heap[curr - 1] = temp; curr = child; child = 2 * curr; } else break; } } /* 合并子樹(shù),其結(jié)果作為新的結(jié)點(diǎn)放入堆中(但不在堆排序的二叉樹(shù)內(nèi),實(shí)際 上,新加入的結(jié)點(diǎn)是和堆的后半段一起構(gòu)成了Huffman樹(shù)) */ heap[heap_num + 1] = heap[pos[0]] + heap[pos[1]]; /* 子樹(shù)的左,右分支都指向子樹(shù)的根結(jié)點(diǎn) */ heap[pos[0]] = heap[pos[1]] = heap_num + 1; /* 把子樹(shù)根結(jié)點(diǎn)作為葉子結(jié)點(diǎn),放到堆排序中的二叉樹(shù)內(nèi) */ heap[heap_num] = heap_num + 1; { /* 在堆中,讓新加入的葉子結(jié)點(diǎn)上升到合適的位置,不破壞堆的秩序 */ register int parent, curr; heap_num++; curr = heap_num; parent = curr >> 1; while (parent && heap[heap[parent - 1]] > heap[heap[curr - 1]]) { register int temp; temp = heap[parent - 1]; heap[parent - 1] = heap[curr - 1]; heap[curr - 1] = temp; curr = parent; parent = curr >> 1; } } } // 從根出發(fā),求每個(gè)編碼的碼長(zhǎng) int overflow = 0; // 記錄有多少個(gè)編碼超長(zhǎng) const int tmp = sizeof(unsigned long)*8 + 1; int len_count[tmp]; memset(len_count, 0, sizeof(int)*tmp); heap[0] = (unsigned long)(-1l); // 雙親結(jié)點(diǎn)為0的葉子,可由此算得碼長(zhǎng)0 heap[1] = 0; // 根結(jié)點(diǎn)碼長(zhǎng)為0 for (i = 2; i < 2*num; i++) { heap[i] = heap[heap[i]] + 1; // 結(jié)點(diǎn)碼長(zhǎng)等于雙親結(jié)點(diǎn)碼長(zhǎng)加1 if (i >= num) { if (heap[i] <= (unsigned long)max_code_len) len_count[heap[i]]++; else // 統(tǒng)計(jì)超長(zhǎng)葉子結(jié)點(diǎn)數(shù)量 { heap[i] = max_code_len; overflow++; } } } // 如果有編碼被限制了碼長(zhǎng),就意味著碼長(zhǎng)為max_code_len的編碼 // 數(shù)量已經(jīng)超過(guò)了二叉樹(shù)的容量,必須做二叉樹(shù)的重整 if (overflow > 0) { // 假設(shè)把超長(zhǎng)葉子結(jié)點(diǎn)都去掉,計(jì)算碼長(zhǎng)max_code_len處還可騰出幾個(gè)位置 // 計(jì)算方法是,從根開(kāi)始,每層的分支結(jié)點(diǎn)數(shù)目等于上層分支結(jié)點(diǎn)數(shù)乘2減 // 本層葉子結(jié)點(diǎn)數(shù);最后一層的分支結(jié)點(diǎn)數(shù),就是可騰出的空位數(shù)量 int nonleaf = 1, bits; for(i = 1; i <= max_code_len; i++) nonleaf = nonleaf * 2 - len_count[i]; // 先把nonleaf個(gè)超長(zhǎng)結(jié)點(diǎn)放到max_code_len這一層 overflow -= nonleaf; len_count[max_code_len] += nonleaf; // 調(diào)整剩下的超長(zhǎng)結(jié)點(diǎn),這個(gè)循環(huán)只調(diào)整碼長(zhǎng)的計(jì)數(shù)數(shù)組len_count while(overflow > 0) { bits = max_code_len - 1; while(len_count[bits] == 0) bits--; // 向上找到一個(gè)有葉子的層次 len_count[bits]--; // 把葉子移下一層 len_count[bits+1] += 2; // 把移下來(lái)的葉子和超長(zhǎng)的一個(gè)結(jié)點(diǎn)合并為一個(gè)子樹(shù) overflow --; }; // 下面這個(gè)循環(huán)真正對(duì)碼長(zhǎng)進(jìn)行調(diào)整 // 從碼長(zhǎng)最長(zhǎng)的結(jié)點(diǎn)開(kāi)始,在堆中倒著數(shù),如果發(fā)現(xiàn)實(shí)際數(shù)目 // 和len_count中記錄的不一致,就調(diào)整碼長(zhǎng),使之和len_count一致 int m, n; index_i = 0; for(bits = max_code_len; bits != 0; bits--) { n = len_count[bits]; while(n != 0) { m = index[index_i++]; if (heap[m] != (unsigned long)bits) heap[m] = bits; n--; } } } code_lens.clear(); for (i = num; i < 2*num; i++) code_lens.push_back(heap[i]); // 由碼長(zhǎng)得到canonical huffman編碼 generate_canonical_codes(); delete[] heap; delete[] index;}
?? 快捷鍵說(shuō)明
復(fù)制代碼
Ctrl + C
搜索代碼
Ctrl + F
全屏模式
F11
切換主題
Ctrl + Shift + D
顯示快捷鍵
?
增大字號(hào)
Ctrl + =
減小字號(hào)
Ctrl + -