哈弗曼编码/译码器

enefry

浏览: 36836 次
性别:
来自: 广州

最近访客更多访客>>

雪糕Java

fasmot

lkf35210

lupingzi

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

算法数据结构 J#

在离散数学中就看到了哈弗曼编码这个东西....但是当时没有比较具体的思考....当数据结构到来时才选择了这个课程设计...(采用 java 实现)
    实现这个程序有几个核心(实现算法方法有多种):
        1 : 建树
        2 : 查找数据节点(分编码、译码两种)
        3 : 将编码用二进制数据写入
    这个我总共修改了 n 遍, 第一版本是采用 char (java 的char是 16位 ) 是为了压缩文本数据.但是这个的速度确实有点慢(因为全部都是无序链表形式),但是文本压缩率约 58%,之后为了可以压缩所有文件采用了 byte (压缩率有所下降) 进行压缩.这样也减少了内存使用后来使用.后来进行了新的统计查找算法(采用哈希表),极大提高了压缩执行速度..
    压缩char 的程序做了比较美观的界面,可以显示文本内容,界面比较完善.(课程设计嘛).byte 的界面十分简单 ,只是为了实现我的想法.提高可应用性.但是一开始采用的还是链表的方法,执行速度还是有点慢.暑假回家后没事就顺便修改了一下.某天突然来了灵感就改成哈希表形式(压缩速度大大提高).但是这个已经不可能再作为课程设计上交了(小小郁闷一下) ...后来也完善了大文件(2G内)的压缩算法(其实没什么.这里不介绍,下面是界面)

/* 先介绍一下节点类*/
class ByteHfmNode {
    byte data;    int weight;    long code;    int deep;    ByteHfmNode rchild;    ByteHfmNode lchild;    ByteHfmNode next;    private boolean isleaf;
     ByteHfmNode();                               // 无参数构造器   由于构造树的根
     ByteHfmNode(byte);                           //通过数据        构造节点
     ByteHfmNode(byte,int);                       //通过数据和其权构造叶子节点
     ByteHfmNdoe(ByteHfmNode,ByteHfmNode);        //通过两个孩子节点构造新节点
     boolean isLeaf();                            // 检验叶子节点
     boolean isRoot();                            // 检验根节点
}
/*********************** 在介绍主要实现功能的方法*************************************/
/************************ 1：用于统计字符的方法:************************************/
    /**
     * 编码过程添加数据记录(用于统计)
     * @param data      字符
     */
    void append(byte data){
        pos = data+128;
        if(appendArray[pos] == null){
            appendArray[pos] = new ByteHfmNode(data);
            this.LEAFS ++;
        }else{
            appendArray[pos].weight ++;
        }
    }
    /**
     * 解码过程添加数据(用于统计)
     * @param data      字符
     * @param weight    权
     */
    void append(byte data,int weight){
        unCodeappendTmp.next = new ByteHfmNode(data,weight);
        unCodeappendTmp = unCodeappendTmp.next;
    }
/*********************** 2 用于建树的方法   ***************************/
/**
     * 建立哈弗曼编码树
     * @throws Exception
     */
    void createTree()throws Exception {
        /*根据编码译码进行建树前初始化*/
        if(Coding){
            this.initSortArray();       //更加原来的统计数组生成可以排序的数据
            this.sort();               // 排序
            this.arrrayToLink();        //将数组转换为链表(这样方便于建树)
        }else{
            //译码不需要进行初始化;
        }
        ByteHfmNode tmp = head;
ByteHfmNode stmp = head;
while(head.next.next != null){
stmp = head;
tmp = new ByteHfmNode(head.next,head.next.next);
head.next = head.next.next.next;
while(stmp.next != null&&stmp.next.weight < tmp.weight){
   stmp = stmp.next;
}
if(stmp.next == null){
   stmp.next = tmp;
    //stmp = head;
}else{
                 tmp.next = stmp.next;
   stmp.next = tmp;
}
}
head.rchild = head.lchild = head.next;
        /* 解码时候需要先进行初始化搜索缓存节点*/
        if(!Coding){
           searchTmp = head.lchild;
        }
    }
/****** 建树使用到的方法 *****/
    /**
     * 对编码树进行编码
     * @throws Exception
     */
    void codeTree()throws Exception{
        if(head.lchild.isRoot()){
            this.CODETREE(head.lchild, 0, 0);
        }else{
            CODETREE(head.lchild,1,1);
        }

    }
    /**
     * 对哈弗曼数组进行排序（按权值升序排序）
     */
    private void sort(){
        int i=0,j=0;
        int length = sortArray.length;
ByteHfmNode temp;
for(int gap = length>>1; gap>0;gap = gap==2?1:(int )(gap/2.2)){
                  for( i = gap;i<length;i++){
                     temp = sortArray[i];
                     j=i;
                     for(;j>= gap && temp.weight < sortArray[j - gap].weight ;j-= gap){
                         sortArray[j] = sortArray[j - gap];
                     }
                     sortArray[j] = temp;
                  }
                }
                        for( i=0;i<sortArray.length;i++){

        }
    }
    /**
     * 将排好序的数组转换为链表同时记录下字符及其权值
     */
    private void arrrayToLink(){
        head.next = sortArray[0];
        datas = null;
        weights = null;
        datas = new byte[sortArray.length];
        weights = new int[sortArray.length];
        datas[0] = sortArray[0].data;
        weights[0] = sortArray[0].weight;
        //System.out.println("Length:"+sortArray.length);
/******** 现在需要先将排序号的数据保存下来这样解码才有数据 ****/
        for(int i = sortArray.length - 1;i>0;i--){
            sortArray[ i-1 ].next = sortArray[i];
            datas[i] = sortArray[i].data;
            weights[i] = sortArray[i].weight;
        }
    }

/******************************** 移位写入文件 ****************************************/
/**                    在这里必须先介绍我的文件结构:
     * 文件结构：
     *      文件头： ‘B’0xff    2 byte
     *      文件长度： count       4 byte
     *      字符种类： data.length 1 byte
     *      最大权值： weightSize 1 byte
     *      间隔符： 0xff 0xff   2 byte
     *      权表：{ 字符，对应的权，下一字符，对应权 ..... }
     *      间隔符　： 0xff 0xff   2 byte
     *      编码数据：二进制文件
     */
public byte[] getHuffmanByteArray()throws Exception{
        if (lastCount == count) {
                return hfmOutput.toByteArray();
            }
            int lastSize = 0;
            if(hfmOutput!=null){
                lastSize = hfmOutput.size();
            }

            hfmOutput = null;
            hfmOutput = new ByteArrayOutputStream();
            //******************************************************************
            bhm.createTree();
            bhm.codeTree();
            byte [] datas = bhm.getDatas();
            int [] weights = bhm.getWeight();
            /* 文件头标志位 byte 哈弗曼编码文件 */
            byte[] fileHead = {'B',(byte)0xff};
            /* 文件内部间隔符 */
            byte[] mark = {(byte)0xff,(byte)0xff};
            int weightSize = weights[weights.length-1];
            /* 根据本次编码最大权值占用 byte 数控制写入权值的位数 */
            if(weightSize<0xff){
                weightSize = 1;
            }else{
                if(weightSize<0xffff){
                    weightSize = 2;
                }else{
                    if(weightSize<0xffffff){
                        weightSize = 3;
                    }else{
                        weightSize = 4;
                    }
                }
            }

            hfmOutput.write(fileHead);
            hfmOutput.write(intToBytes(count));
            hfmOutput.write(datas.length);
            hfmOutput.write(weightSize);
            hfmOutput.write(mark);
            byte wtmp[] = new byte[4];
            for(int i = 0 ;i<datas.length;i++){
                hfmOutput.write(datas[i]);
                wtmp = intToBytes(weights[i]);
                hfmOutput.write(wtmp,0,weightSize);
            }
            hfmOutput.write(mark);
            /* 权表保存完毕,下面写入数据*/
            /* 临时缓存变量*/
            byte ct =0;
            /* 记录上次写入剩余数据*/
            byte buffer = 0;
            /* 记录本次写入开始位*/
            int nowPos = 0;
            /* 记录本字符编码*/
            long code = 0;
            /* 记录本字符编码长度*/
            int deep = 0;
            /* 计数器*/
            int k = 0 ;
            ByteHfmNode hfmTmp;
           // System.out.println(count);
            int bit =0;
            int i = 0;
            int writeTime = 0;
            for( i=0;i<count;i++){
                hfmTmp = bhm.getNode(buf[i]);
                code = hfmTmp.code;
                deep = hfmTmp.deep;
                bit+=deep;
                if(nowPos+deep>8){
                /* 编码已经超过一个字节先写入*/
                ct = (byte)((code>>(deep - ( 8 - nowPos)))&0xff);
                ct = (byte)((ct)|(buffer));
                hfmOutput.write(ct);
                writeTime++;
                for(k = 1; deep -(k<<3) + nowPos >8; ){
                    ct = (byte) ( (code>>( deep - ((++k)<<3) +nowPos ) )& 0xff );
                    hfmOutput.write(ct);
                    //writeTime++;
                }
                nowPos = (deep+nowPos)- (k<<3) ;
                buffer = (byte)((code<<(8-nowPos ))&0xff);
            }else{
                /*添加后编码不够一个字节长度 */
               if(nowPos+deep =={
                    ct = (byte)code;
                    ct = (byte)((ct)|(buffer));
                    hfmOutput.write(ct);
                    writeTime++;
                    nowPos = 0;
                    buffer =0;
         }else{
                    nowPos += deep;
                    ct = (byte)((code<<(8 - nowPos))& 0xff);
                    buffer =(byte)(ct|buffer);
         }
            }

     }
            /*如果还有剩余数据写入*/
            if(nowPos >0){
                  hfmOutput.write(buffer);
          }
            //System.out.println("\nOK!"+hfmOutput.size()+" bit"+bit + " i:"+i+" writeTimt:"+writeTime);
        return hfmOutput.toByteArray();
}

/*************************   最后介绍一下解码过程吧   *************************/
     public byte[] getunCodeBytes()throws Exception{
        if(this.buf[0] != 'B'||this.buf[1]!=-1 ||this.count<12||buf[8] != -1 || buf[9] != -1 ){
            throw new UnsupportedEncodingException();
        }
        int nowPos = 10;
        int contentLength;
        int dataLength = buf[6]&0x000000ff;
        int weightSize = buf[7];
        if(dataLength == 0& weightSize!= 0){
            dataLength = 256;
        }
        int i,j,k;
        int intTmp;
        byte byteTmp;
        byte[] bytesTmp = new byte[4];
        bytesTmp[0] = buf[2];
        bytesTmp[1] = buf[3];
        bytesTmp[2] = buf[4];
        bytesTmp[3] = buf[5];
        contentLength = bytesToInt(bytesTmp);

        for(i=0;i<4;i++){
            bytesTmp[i] = 0;
        }
        /*读取权表*/
        for(i=0;i<dataLength;i++){
            byteTmp = buf[nowPos++];
            for(j =0 ;j<weightSize;j++){
                bytesTmp[j] = buf[nowPos++];
            }
            intTmp = bytesToInt(bytesTmp);
            bhm.append(byteTmp, intTmp);
        }
        if(buf[nowPos++] != -1|| buf[nowPos++]!= -1){
            throw new UnsupportedEncodingException();
        }
        /* 建树编码*/
        bhm.createTree();
        //bhm.codeTree();
        boolean rchild;
        int time = 0;
        ByteHfmNode tmp;
        /*权表读取完毕，开始解码*/
while(nowPos<count){
byteTmp = buf[nowPos++];
for(j = 7;j>=0;j--){
   intTmp = (byteTmp>>j);
                        intTmp = intTmp&0x1;
   rchild = (intTmp == 1 );
   if(bhm.searchNode(rchild)){
    time++;
                                tmp =bhm.getSearchNode();
    bos.write(tmp.data);
    if(time >= contentLength){
     //System.out.println(time+" "+contentLength);
     break;
    }
   }
}
                byteTmp = 0;
}
        return bos.toByteArray();
}

使用大文件压缩器时间
(压缩一个 2.09m 的exe文件时间: 273 ms 压缩后 1.71m ,
一个 61.3m 的exe 时间:2723ms 压缩后文件大小基本没变 )
压缩率跟哈弗曼算法有关