û 【文件压缩解压项目】 【项目技术:】 模版堆,哈夫曼树,哈夫曼编码,函数对象,feof函数,文件读写 【项目准备:】 文件运行于Visual Studio 平台(VS), 数据准备一份待压缩文件,本程序运行可以不做准备。 【项目过程:】 压缩过程 1.从待压缩原文件中统计字符个数,利用堆建立哈夫曼树。 2.依据哈弗曼树写入每个字符相应的哈夫曼编码。将这里的字符 和统计出现的次数写入配置文件。 3.打开一个新文件,按照哈夫曼编码用较短编码代替原文件较长的编码,实现压缩。 //配置文件丢失后 压缩的文件也就失去了意义这两个文件是相互对应互相起作用的。 解压过程: 1.根据配置文件中的次数再次建立哈夫曼树,哈夫曼字符编码集。 2.根据哈夫曼字符编码集,翻译程序生成的短的编码文件。翻译得到的内容写入新文件 //可以对比原文件与翻译后的文件 //时间上的考虑,在调试版本与发行版本中压缩世界总是大于解压时间。 【项目思考:】对于少量信息的文本,压缩所能遇见的问题 对于中文字符 二次压缩 为什么配置文件里不直接写构造好的编码 ,虽然这样可以做到 因为人的思想总是快一点 以为对照字符与哈夫曼编码进行翻译能够快 而实际计算机在运行中 总是对 待翻译的编码 在配置文件中比对编码和寻找对应的字符。 对于项目运行后的警告 文件读写复习。 324 #include using namespace std; #include #include #include #include #include template struct Less { bool operator() (const T& l, const T& r) { return l < r; } }; template struct Greater { bool operator() (const T& l, const T& r) { return l > r; } }; template class Heap { public: Heap() {} Heap(const T* a, size_t size) { _a.reserve(size); for (size_t i = 0; i < size; ++i) { _a.push_back(a[i]); } for (int i = (_a.size() - 2) / 2; i >= 0; --i) { _AdjustDown(i); } } Heap(vector& a) { _a.swap(a); for (int i = (_a.size() - 2) / 2; i >= 0; --i) { _AdjustDown(i); } } void Push(const T& x) { _a.push_back(x); _AdjustUp(_a.size() - 1); } void Pop() { size_t size = _a.size(); assert(size > 0); swap(_a[0], _a[size - 1]); _a.pop_back(); _AdjustDown(0); } T& Top() { assert(!_a.empty()); return _a[0]; } size_t size() { assert(!_a.empty()); return (_a.size()); } protected: void _AdjustUp(int child) { int parent = (child - 1) / 2; while (child > 0) { Compare com; if (com(_a[child], _a[parent])) { swap(_a[child], _a[parent]); child = parent; parent = (child - 1) / 2; } else { break; } } } void _AdjustDown(size_t parent) { size_t child = parent * 2 + 1; while (child < _a.size()) { Compare com; if (child + 1 < _a.size() && com(_a[child + 1], _a[child])) { ++child; } if (com(_a[child], _a[parent])) { swap(_a[parent], _a[child]); parent = child; child = 2 * parent + 1; } else { break; } } } protected: vector _a; }; /*/ ------文件可拆分线--------#include "Heap.h" ------- 构建哈夫曼树 (利用小堆构建哈夫曼树) /*/ template struct HuffmanNode { HuffmanNode*_left; HuffmanNode*_right; T _weight; HuffmanNode(const T&weight) :_left(NULL) , _right(NULL) , _weight(weight) {} }; template class HuffmanTree { typedef HuffmanNode Node; public: HuffmanTree(const T*a, size_t size, const T& invalid) /*/invalid代表非法值,若为非法值,则不构建哈夫曼树/*/ { _root = CreateTree(a, size, invalid); } Node* GetRootNode() { return _root; } protected: Node* CreateTree(const T*a, size_t size, const T& invalid) { struct Compare { bool operator()(const Node*l, const Node*r) { return (l->_weight < r->_weight); } }; Heap minHeap; for (size_t i = 0; i < size; ++i) { if (a[i] != invalid) { minHeap.Push(new Node(a[i])); } } /*/小堆的top结点的权值必是最小的,每次选出小堆的top构造哈夫曼树的结点/*/ while (minHeap.size()>1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_weight + right->_weight); /*/哈夫曼树特点,父结点是两个子结点和/*/ parent->_left = left; parent->_right = right; minHeap.Push(parent); } return minHeap.Top(); } protected: HuffmanNode* _root; }; /*/ ----文件可分割线--#include "HuffmanTree.h"------ 统计字符次数 构建哈夫曼树(堆) 生成哈夫曼编码 读取源文件字符压缩 哈夫曼树根结点的权值就是源文件读入的个数 统计字符次数 /*/ typedef unsigned long long LongType; struct CharInfo { unsigned char _ch; /*/字符/*/ LongType _count; /*/出现次数/*/ string _code; /*/Huffman code/*/ CharInfo() :_ch(0) , _count(0) {} CharInfo(LongType count) :_ch(0) , _count(count) {} bool operator!=(const CharInfo&info) const { return _count != info._count; } CharInfo operator+(const CharInfo&info) const { return CharInfo(_count + info._count); } bool operator<(const CharInfo&info) const { return _count < info._count; } }; template class FileCompress { typedef HuffmanNode Node; public: FileCompress() { for (size_t i = 0; i < 256; ++i) { _infos[i]._ch = i; _infos[i]._count = 0; } } public: void Compress(const char* filename) { assert(filename); /*/统计文件中字符出现的次数/*/ FILE* fout = fopen(filename, "rb"); assert(fout); char ch = fgetc(fout); while (!feof(fout)) { _infos[(unsigned char)ch]._count++; ch = fgetc(fout); } /*/构建哈夫曼树/*/ CharInfo invalid(0); HuffmanTreetree(_infos, 256, invalid); /*/生成哈夫曼编码/*/ string code; GenerateHuffmanCode(tree.GetRootNode(), code); /*/读取源文件字符压缩,将哈夫曼编码写进对应的位/*/ string Compressfilename = filename; Compressfilename += ".compress"; FILE* fin = fopen(Compressfilename.c_str(), "wb"); /*/用二进制读写文件/*/ fseek(fout, 0, SEEK_SET); /*/定位到文件起始位置/*/ ch = fgetc(fout); char value = 0; int pos = 0; while (!feof(fout)) { string &code = _infos[(unsigned char)ch]._code; /*/注意code要为引用/*/ for (size_t i = 0; i < code.size(); ++i) /*/利用位存储哈夫曼编码,减少内存/*/ { value <<= 1; if (code[i] == '1') { value |= 1; } if (++pos == 8) /*/ 满8位(1字节),将哈夫曼编码写进对应的文件, 然后继续读取这个字符的后续编码 /*/ { fputc(value, fin); value = 0; pos = 0; } } ch = fgetc(fout); /*/继续读取下一个字符的哈夫曼编码/*/ } if (pos != 0) /*/如果最后几位哈夫曼编码不满8位, 则需要补充记录, 后续补充(配置文件)/*/ { value <<= (8 - pos); fputc(value, fin); } /*/写配置文件,方便解压缩的时候重建哈夫曼树/*/ string configfilename = filename; configfilename += ".config"; FILE* finconfig = fopen(configfilename.c_str(), "wb"); assert(finconfig); char buffer[128]; /*/设置写入缓冲区/*/ string str; for (size_t i = 0; i < 256; ++i) { if (_infos[i]._count>0) { str += _infos[i]._ch; str += ","; str += _itoa(_infos[i]._count, buffer, 10); /*/itoa将整数_count转换成字符存入buffer中,10为进制/*/ str += '\n'; } fputs(str.c_str(), finconfig); str.clear(); } fclose(fout); fclose(fin); fclose(finconfig); } /*/解压缩/*/ void UnCompress(const char* filename) { string configfilename = filename; configfilename += ".config"; FILE* foutconfig = fopen(configfilename.c_str(), "rb"); assert(foutconfig); string str; LongType charCount = 0; while (Readline(foutconfig, str)) { if (str.empty()) { str += '\n'; } else { _infos[(unsigned char)str[0]]._count = atoi(str.substr(2).c_str()); /*/将配置文件中保存的字符格式转换为次数, (如a,4所以从第2个字符开始)/*/ str.clear(); } } /*/重建哈夫曼树/*/ CharInfo invalid(0); HuffmanTreetree(_infos, 256, invalid); charCount = tree.GetRootNode()->_weight._count; string Compressfilename = filename; Compressfilename += ".compress"; FILE* fout = fopen(Compressfilename.c_str(), "rb"); assert(fout); string UnCompressfilename = filename; UnCompressfilename += ".uncompress"; FILE* fin = fopen(UnCompressfilename.c_str(), "wb"); assert(fin); char ch = fgetc(fout); HuffmanNode* root = tree.GetRootNode(); HuffmanNode* cur = root; int pos = 7; while (1) { if (ch & (1 << pos)) { cur = cur->_right; } else { cur = cur->_left; } if (pos-- == 0) { pos = 7; ch = fgetc(fout); /*/继续读取字符/*/ } if (cur->_left == NULL&&cur->_right == NULL) { fputc(cur->_weight._ch, fin); if (--charCount == 0) { break; } cur = root; } } fclose(fin); } protected: void GenerateHuffmanCode(HuffmanNode* root, string code) { if (root == NULL) { return; } if (root->_left) { GenerateHuffmanCode(root->_left, code + '0'); } if (root->_right) { GenerateHuffmanCode(root->_right, code + '1'); } if ((root->_left == NULL) && (root->_right == NULL)) { _infos[root->_weight._ch]._code = code; } } bool Readline(FILE* fout, string& str) { char ch = fgetc(fout); if (feof(fout)) { return false; } while (!feof(fout) && ch != '\n') { str += ch; ch = fgetc(fout); } return true; } protected: CharInfo _infos[256]; }; void TestCompress() { FileCompress fc; double start_compress = clock(); fc.Compress("Input.txt"); double finish_compress = clock(); fc.UnCompress("Input.txt"); double finish_uncompress = clock(); cout << "压缩时间是:" << finish_compress - start_compress << "ms" << endl; cout << "解压缩时间是:" << finish_uncompress - finish_compress << "ms" << endl; } void wztest(char* s) { FileCompress fc; double start_compress = clock(); fc.Compress(s); double finish_compress = clock(); fc.UnCompress(s); double finish_uncompress = clock(); cout << "压缩时间是:" << finish_compress - start_compress << "ms" << endl; cout << "解压缩时间是:" << finish_uncompress - finish_compress << "ms" << endl; } #include #include using namespace std; void testfile() { char *s="WZXXXXXXXXXXXX.TXT"; ofstream fout(s); int ss=100; while(ss--){ fout <<"123abcdefghijklmnopqrstuvwxyz"; } wztest(s); } int main0() { TestCompress(); system("pause"); return 0; } int main() { FILE *pFile = fopen("xxx.txt", "w"); char x[]="wzzx sts bit "; fwrite(x, 6,12,pFile); fclose(pFile); fflush(pFile); wztest("xxx.txt"); system("pause"); return 0; } ogr pgspt %^&*( 程序运行可以进行微设计 可以用用户输入绝对路径 然后将对应的文件压缩 同文件目录中产生新文件 注意转义操作 ABCDEFGH LMOIJK PQRSD
324
ogr
pgspt
%^&*(
LMOIJK
PQRSD