菜鸟说：哈夫曼压缩的问题

jcs130

浏览: 132741 次
性别:
来自: Ottawa

最近访客更多访客>>

dasongshu

WangJiaX

a380816700

马帅0001

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

博客分类：

学习小结

搞了一天终于把哈夫曼压缩搞好了，自己想了很多，也参照了别人的代码，终于把自己的做出来了，

关于代码，就不多做说明了，因为思路都是差不多的，代码会在文章最后面贴出来，那我就讲讲几个我遇到的几个问题吧:

1.static尽量不要用：以前我编写什么程序，静态变量太好用了~加个点一引用就好了，不需要传来传去~但是这次我算得到了教训，为了方便，一开始我爸很多变量都设成了静态的，但是邓树构造完成以后，获得叶节点编码的时候，就是一串000000000……找了半天错误不知道错在哪，就把所有的静态变量都设为了private 来回传，结果这样就对了……所以静态变量要少用，尤其是在需要进行递归的场合，更要多加注意！

2.缓冲流可以加快写入速度，但是一定要记着flush：在不用缓冲流的时候，压缩小文件的时候速度还是可以接受的，但是要压缩一个3兆的图片的时候就要等很长时间了，这是把文件输出流变成缓冲流就能节省时间，但是我在一开始用的时候，忘记了在每次写入缓冲流的时候强制刷新（flush），结果导致压缩后的文件异常的小，本来几百K的文件压缩完了只剩下一百多K，一开始我还挺高兴~想这个压缩的效率还挺高~但是当我压缩一个只有4Kb的小文件的时候……压缩出来的文件竟然是0Kb……囧……经过同学的提醒意识到了应该强制刷新一下，这下就正常了。

3.关于压缩的效率：哈夫曼压缩是无损压缩，所以当我压缩一个色彩比较丰富的BMP位图（3M）

压缩完以后，大小没变多少

但是当我压缩一个我自己建的一个重复数据很多的文件的时候，大小的改变就很明显了：

压缩前的大小：

压缩后的大小：

4.最后补0的个数：因为不一定哈弗曼编码的总长度是8的倍数，所以要在后面补0，这样就需要一个用来记录补了多少个0的数据，方便再解压缩的时候读取。

解压缩就是压缩的逆过程（说起来简单啊……╮(╯▽╰)╭），下一步就是要把我压缩的文件还原出来~加油~~~

压缩的代码很长……如下：

package 哈夫曼压缩;

/**
 * 哈夫曼树节点类
 * 
 * @author Micro
 * 
 */
public class HafNode implements Comparable<HafNode> {
	private HafNode father;
	private HafNode left;
	private HafNode right;
	// 字节
	private int num;
	// 出现的频率
	private int pl;

	// 重载构造器
	public HafNode(int num, int pl) {
		this.num = num;
		this.pl = pl;
	}

	// get set方法
	public int getPl() {
		return pl;
	}

	public void setPl(int pl) {
		this.pl = pl;
	}

	public HafNode getFather() {
		return father;
	}

	public void setFather(HafNode father) {
		this.father = father;
	}

	public HafNode getLeft() {
		return left;
	}

	public void setLeft(HafNode left) {
		this.left = left;
	}

	public HafNode getRight() {
		return right;
	}

	public void setRight(HafNode right) {
		this.right = right;
	}

	public int getNum() {
		return num;
	}

	public void setNum(int num) {
		this.num = num;
	}

	//设置按照什么顺序排序
	@Override
	public int compareTo(HafNode o) {
		// 按照频率的自然顺序排序
		int P = o.getPl();
		return pl - P;
	}

}

记录编码的类

package 哈夫曼压缩;

/**
 * 存储每一个字节的哈弗曼编码和哈夫曼编码的长度
 * 
 * @author Micro
 * 
 */
public class Code {
	// 哈夫曼编码
	private String node;
	// 编码长度
	private int n;

	// 重载构造器
	public Code(int n, String node) {
		this.n = n;
		this.node = node;
	}

	public String getNode() {
		return node;
	}

	public void setNode(String node) {
		this.node = node;
	}

	public int getN() {
		return n;
	}

	public void setN(int n) {
		this.n = n;
	}

}

创建哈夫曼树，获得叶节点编码及读取文件写入文件的方法等：

package 哈夫曼压缩;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.PriorityQueue;

/**
 * 读取文件，统计每个字节出现频率
 * 
 * @author Micro
 * 
 */
public class readFile {
	private static int[] by = new int[256];
	private Code SaveCode[] = new Code[256];
	private static HafNode root;

	/**
	 * 1.统计各字节频率，创建节点，并存入优先队列中 2.把优先队列创建成哈夫曼树
	 * 
	 * @param path
	 *            ：路径
	 * @throws IOException
	 */
	public void readF(String path) throws IOException {
		// 创建文件输入流——用来获取频率
		FileInputStream ins = new FileInputStream(path);
		// 创建缓冲流
		BufferedInputStream bis = new BufferedInputStream(ins);
		while (bis.available() > 0) {
			int t = bis.read();
			by[t]++;
		}
		// 创建优先队列
		PriorityQueue<HafNode> queue = new PriorityQueue<HafNode>();
		// 把所有节点都放入队列中
		for (int i = 0; i < by.length; i++) {
			if (by[i] != 0) {
				HafNode node = new HafNode(i, by[i]);
				queue.add(node);
			}
		}
		// 构建哈夫曼树
		HafTree(queue);
	}

	// 创建哈夫曼树
	public void HafTree(PriorityQueue<HafNode> queue) {
		while (queue.size() > 1) {
			HafNode min1, min2;
			min1 = queue.poll();
			min2 = queue.poll();
			// 创建合并的节点
			HafNode result = new HafNode(min1.getNum() + min2.getNum(),
					min1.getPl() + min2.getPl());
			result.setLeft(min1);
			result.setRight(min2);
			min1.setFather(result);
			min2.setFather(result);
			queue.add(result);
		}
		root = queue.peek();
	}

	// 获得各叶节点的哈弗曼编码
	public void getCode(HafNode a, String Code) {

		if (a.getLeft() == null && a.getRight() == null) {
			System.out.println("字节" + a.getNum() + "的哈夫曼编码为：\t" + Code);
			Code b = new Code(Code.length(), Code);
			SaveCode[a.getNum()] = b;
		}
		if (a.getLeft() != null) {
			getCode(a.getLeft(), Code + '0');
		}
		if (a.getRight() != null) {
			getCode(a.getRight(), Code + '1');
		}
	}

	/**
	 * 将读取的文件的字节转换成哈夫曼编码写入压缩文件 1.把每个字节对应的编码长度写入文件，长度256
	 * 2.从Code数组中依次读取代表字节的哈弗曼编码，并八位八位的转化成int写入文件
	 * 
	 * @param path1
	 *            :要压缩的文件
	 * @param path2
	 *            :压缩后的文件路径
	 * @throws IOException
	 */
	public void writeFile(String path1, String path2) throws IOException {

		// 新建文件输出流
		FileOutputStream fos = new FileOutputStream(path2);
		// 包装为缓冲流
		BufferedOutputStream bos = new BufferedOutputStream(fos);
		// 先把每个字节的编码的长度写入文件（长度为256）
		for (int i = 0; i < SaveCode.length; i++) {
			if (SaveCode[i] == null) {
				bos.write(0);
				bos.flush();
			} else {
				bos.write(SaveCode[i].getN());
				bos.flush();
			}
		}
		// 写出每一个字节对应的编码
		int count = 0;// 记录中专的字符个数
		int i = 0;// 第i个字节
		String writes = "";
		String tranString = "";// 中转的字符串
		String waiteString;// 保存所转化的所有字符串
		while ((i < 256) || (count >= 8)) {
			// 如果缓冲区的字节数大于等于8
			if (count >= 8) {
				waiteString = "";// 清空要转化的码
				for (int t = 0; t < 8; t++) {
					waiteString = waiteString + writes.charAt(t);
				}
				// 将writes前八位删掉
				if (writes.length() > 8) {
					tranString = "";
					for (int t = 8; t < writes.length(); t++) {
						tranString = tranString + writes.charAt(t);
					}
					writes = "";
					writes = tranString;
				} else {
					writes = "";
				}
				count = count - 8;// 写出一个8位的字节
				int intw = changeString(waiteString);// 转化为int
				// 写入文件
				bos.write(intw);
				bos.flush();
				// System.out.println("写入了->"+waiteString);
			} else {
				// 得到第i个字节的编码信息，等待写入
				if (SaveCode[i] != null) {
					count = count + SaveCode[i].getN();
					writes = writes + SaveCode[i].getNode();
				}
				i++;
			}
		}
		// 把编码没有足够8的整数倍的String补0凑8，再写入
		if (count > 0) {
			// 编码最后补的0的个数
			int endz = 0;
			waiteString = "";// 清空要转化的代码
			for (int t = 0; t < 8; t++) {
				if (t < writes.length()) {
					waiteString = waiteString + writes.charAt(t);
				} else {
					waiteString = waiteString + '0';
					endz++;
				}
			}
			bos.write(changeString(waiteString));
			bos.flush();
			bos.write(endz);
			bos.flush();
			System.out.println("编码区写入了->" + endz + "个0");
		} else {
			System.out.println("编码区写入了->" + 0 + "个0");
			bos.write(0);
			bos.flush();
		}
		/************************ 再次读入文件信息，对应每一个字节写入编码 *********************/

		// 再次读入文件信息，对应每一个字节写入编码
		// 用来读取数据的文件输入流
		FileInputStream ins = new FileInputStream(path1);
		// 包装成缓冲流
		BufferedInputStream bis = new BufferedInputStream(ins);

		count = 0;
		writes = "";
		tranString = "";
		int idata = bis.read();
		// 文件没有读完的时候
		while ((bis.available() > 0) || (count >= 8)) {
			// 如果缓冲区等待字符大于等于8
			// System.out.println(count+"<<>>"+writes.length());
			if (count >= 8) {
				waiteString = "";// 清空要转化的码
				for (int t = 0; t < 8; t++) {
					waiteString = waiteString + writes.charAt(t);
				}
				// 删除前八位
				writes = writes.substring(8);

				count -= 8;// 写出一个8位的字节
				int intw = changeString(waiteString);
				bos.write(intw);// 写入到文件中
				bos.flush();
			} else {
				// 读入idata字节 ，对应编码写出信息
				count += SaveCode[idata].getN();
				writes += SaveCode[idata].getNode();
				// System.out.println(SaveCode[idata].getN()+"<<>>"+SaveCode[idata].getNode().length());
				idata = bis.read();
			}
		}
		count += SaveCode[idata].getN();
		writes += SaveCode[idata].getNode();
		// 把count 剩下的写入
		int endsint = 0;
		if (count > 0) {
			waiteString = "";// 清空要转化的码
			for (int t = 0; t < 8; t++) {
				if (t < writes.length()) {
					waiteString += writes.charAt(t);
				} else {
					waiteString += '0';
					endsint++;
				}
			}
			// System.out.println(waiteString.length());
			bos.write(changeString(waiteString));// 写入最后补的0
			bos.flush();
			bos.write(endsint);// 写入最后补的0的个数
			bos.flush();
			System.out.println("压缩区写入了->" + endsint + "个0");
		}
	}

	/**
	 * 将八位字符串转化为一个整数
	 * 
	 * @param s
	 * @return
	 */
	public int changeString(String s) {
		return ((int) s.charAt(0) - 48) * 128 + ((int) s.charAt(1) - 48) * 64
				+ ((int) s.charAt(2) - 48) * 32 + ((int) s.charAt(3) - 48) * 16
				+ ((int) s.charAt(4) - 48) * 8 + ((int) s.charAt(5) - 48) * 4
				+ ((int) s.charAt(6) - 48) * 2 + ((int) s.charAt(7) - 48);
	}

	// 程序入口
	public static void main(String[] args) throws IOException {
		readFile rf = new readFile();
		// 要压缩的文件路径
		String path = "D:\\新建文本文档.txt";
		// 压缩后的文件路径
		String path1 = "D:\\123.txt";
		rf.readF(path);
		// 获得各叶节点的哈弗曼编码
		String Code = "";
		rf.getCode(root, Code);
		// 写入压缩文件
		rf.writeFile(path, path1);
	}
}