Range Coding 是算术编码的变种,二者的效率几乎没有差别,Range Coding 速度更快,且没有专利问题。下面的程序移植和改进自一个非常清晰简洁的C++实现。当然,直接使用下面的代码去压缩文件效果并不好,速度慢压缩率也低,Range Coding 更适合作为其他算法的后端,比如 LZ77、Block Sorting。
如果你看到这里一头雾水的话,可以上 wikipedia 参考“算术编码”,不过更好的选择是找一篇名为《笨笨数据压缩教程》的系列文章来入门。
D1.0 Code
-
-
-
-
-
-
-
-
-
-
-
import std.stdio;
-
-
template
RangeCoding64Base()
-
{
-
const
ulong Top = 1UL << 56UL;
-
const
ulong Bottom = 1UL << 48UL;
-
const
ulong MaxRange = Bottom;
-
ulong m_low = 0;
-
ulong m_range = ulong.max;
-
}
-
-
struct
RangeEncoding64
-
{
-
mixin RangeCoding64Base;
-
-
private
bool
m_flushed =
false
;
-
private
void
delegate(ubyte) m_sink = null;
-
-
void
init(
void
delegate(ubyte) sink)
-
{
-
assert(sink !is null);
-
m_sink = sink;
-
}
-
-
void
close()
-
{
-
if
(!m_flushed)
-
flush();
-
}
-
-
void
encode(ulong symbolLow, ulong symbolHigh, ulong totalRange)
-
{
-
m_range /= totalRange;
-
m_low += symbolLow * m_range;
-
m_range *= symbolHigh - symbolLow;
-
-
while
((m_low ^ (m_low + m_range)) < Top || m_range < Bottom && ((m_range = -m_low & (Bottom - 1)),
true
))
-
{
-
ubyte b = m_low >> (m_low.sizeof
* 8 - 8);
-
m_sink(b);
-
m_range <<= 8;
-
m_low <<= 8;
-
}
-
}
-
-
void
flush()
-
{
-
if
(!m_flushed)
-
{
-
for
(
int
i = 0; i < m_low.
sizeof
; i++)
-
{
-
ubyte b = m_low >> (m_low.sizeof
* 8 - 8);
-
m_sink(b);
-
m_low <<= 8;
-
}
-
m_flushed = true
;
-
}
-
}
-
}
-
-
struct
RangeDecoding64
-
{
-
mixin RangeCoding64Base;
-
-
private
ulong m_code;
-
private
ubyte delegate() m_emitter;
-
-
void
init(ubyte delegate() emitter)
-
{
-
assert(emitter !is null);
-
m_emitter = emitter;
-
for
(
size_t
i = 0; i < m_code.
sizeof
; i++)
-
{
-
m_code = (m_code << 8) | emitter();
-
}
-
}
-
-
ulong currentCount(ulong totalRange)
-
{
-
return
(m_code - m_low) / (m_range /= totalRange);
-
}
-
-
void
decode(ulong symbolLow, ulong symbolHigh, ulong totalRange)
-
{
-
m_low += symbolLow * m_range;
-
m_range *= symbolHigh - symbolLow;
-
-
while
((m_low ^ m_low + m_range) < Top || m_range < Bottom && ((m_range = -m_low & Bottom - 1),
true
))
-
{
-
m_code= m_code << 8 | m_emitter(), m_range <<= 8, m_low <<= 8;
-
}
-
-
}
-
}
-
-
-
struct
OrderZeroModel(uint SymMax = 255)
-
{
-
public
const
uint SymbolMax = SymMax;
-
public
const
uint NoOfSymbols = SymbolMax + 2;
-
-
-
private
ulong[SymbolMax + 2] m_freq;
-
-
void
init()
-
{
-
for
(
size_t
i = 0; i < m_freq.length; i++)
-
{
-
m_freq[i] = i;
-
}
-
}
-
-
private
void
rescale()
-
{
-
ulong newTotal = 0;
-
for
(
size_t
i = 1; i < m_freq.length - 1; i++)
-
{
-
newTotal += ((m_freq[i] - m_freq[i - 1]) / 2) + 1;
-
-
m_freq[i] = m_freq[i - 1] + ((m_freq[i] - m_freq[i - 1]) / 2) + 1;
-
}
-
m_freq[m_freq.length - 1] = newTotal;
-
}
-
-
void
update(uint sym)
-
{
-
for
(
size_t
i = sym + 1; i < m_freq.length; i++) {
-
m_freq[i]++;
-
}
-
-
if
(total() >= RangeCoding64Base!().MaxRange)
-
rescale();
-
}
-
-
uint getSymbol(ulong n)
-
{
-
uint sym = SymbolMax;
-
while
(m_freq[sym] > n) --sym;
-
-
return
sym;
-
}
-
-
uint total()
-
{
-
return
m_freq[m_freq.length - 1];
-
}
-
-
ulong low(uint sym)
-
{
-
return
m_freq[sym];
-
}
-
-
ulong high(uint sym)
-
{
-
return
m_freq[sym + 1];
-
}
-
-
ulong opIndex(uint rhs)
-
{
-
return
m_freq[rhs];
-
}
-
}
-
-
-
-
int
main(string[] args)
-
{
-
const
uint EndOfStream = 256;
-
-
ubyte[] compressed;
-
void
sink(ubyte u)
-
{
-
compressed ~= u;
-
}
-
-
ubyte[] origin;
-
for
(
int
i = 0; i < 10000; i++)
-
origin ~= ['A', 'B', 'C', 'D', 'E', 'F', 'G'];
-
-
RangeEncoding64 encoder;
-
encoder.init(&sink);
-
OrderZeroModel!(EndOfStream) model;
-
model.init();
-
-
writefln("compression started..."
);
-
foreach(ubyte b; origin)
-
{
-
encoder.encode(model.low(b), model.high(b), model.total);
-
model.update(b);
-
}
-
encoder.encode(model.low(EndOfStream), model.high(EndOfStream), model.total);
-
model.update(EndOfStream);
-
encoder.close();
-
-
writefln("originial size: %d"
, origin.length);
-
writefln("compressed size: %d (%d%%)"
, compressed.length,
-
(origin.length - compressed.length) * 100 / origin.length);
-
-
writefln("decoding...."
);
-
-
size_t
pos = 0;
-
-
ubyte delegate() emitter = {
-
return
compressed[pos++];
-
};
-
-
model.init();
-
RangeDecoding64 dec;
-
dec.init(emitter);
-
-
ubyte[] decompressed;
-
-
while
(
true
)
-
{
-
ulong count = dec.currentCount(model.total);
-
uint sym = model.getSymbol(count);
-
if
(sym == 256)
break
;
-
decompressed ~= sym;
-
dec.decode(model.low(sym), model.high(sym), model.total);
-
model.update(sym);
-
}
-
-
writefln(decompressed.length);
-
-
assert(decompressed[] == origin[]);
-
-
return
0;
-
}
分享到:
相关推荐
print '%d %d\t' % (col,row), print '' if __name__ == '__main__': gen(4) ``` 知识点: * 函数定义:使用 `def` 关键字定义一个函数,函数名为 `gen`,参数为 `line_cnt`。 * 函数调用:使用函数名加括号 `...
本文实例讲述了Python实现的rsa加密算法。分享给大家供大家参考,具体如下: 算法过程 1. 随意选择两个大的质数p和q,p不等于q,计算N=pq。 2. 根据欧拉函数,不大于N且与N互质的整数個数為(p-1)(q-1)。 3. 选择一个...
在Python编程语言中,编码测试(Coding Test)是评估开发者技能和能力的一种常见方式,它通常涉及编写代码来解决特定的问题或实现特定的功能。在这个名为“coding_test:python编码测试”的压缩包中,我们可能找到了...
#coding=utf-8 from threading import Thread import time g_num = 100 def work1(): global g_num for i in range(3): g_num += 1 print("----in work1, g_num is %d---"%g_num) def work2(): global g_num ...
#coding:utf8 import random,wx def password(event): a = [chr(i) for i in range(97,123)] b = [chr(i) for i in range(65,91)] c = ['0','1','2','3','4','5','6','7','8','9'] d = ['!','@','#','$','%','^'...
python 实现 PC端剪映字幕转换SRT格式工具代码-Python 实现,# -*- coding: utf-8 -*- import getpass import os import json import re def get_time(time_int): # 使用正则表达式处理时间格式化问题 if time_...
result2 = [p for p in range(2, N) if 0 not in [p % d for d in range(2, int(sqrt(p)) + 1)]] print(result2) ``` ### 扩展知识点 #### os模块的功能 `os`模块是Python的一个标准库,提供了许多与操作系统交互...
# -*- coding: gbk -*- import sys print sys.argv if __name__=='__main__': print Program name, sys.argv[0] for i in range(1, len(sys.argv)): print arg%d%i,sys.argv[i] 测试: python test.py 1 2 3 4 5...
### Python 实现带百分比的进度条 #### 第一种方法:基本进度条 在 Python 中,我们可以使用标准库 `sys` 和 `time` 来创建一个简单的带百分比的进度条。这种方法适用于需要显示某个过程进度的情况,例如数据处理...
# -*- coding: utf-8 -*- import RPi.GPIO as GPIO import time Clock = 16 Address = 20 DataOut = 21 DOUT = 17 def ADC_Read(channel): # ADC读取函数 value = 0 for i in range(0, 4): if ((channel >> (3...
时间码A、B、E和G通常采用曼彻斯特II编码(Manchester II Coding),这是一种自时钟编码方法,即数据位的中间位置有一个跳变,通过这种跳变来表示数据“0”或“1”,同时还能提供接收端的时钟恢复功能。曼彻斯特II...
KMP 算法(Knuth-Morris-Pratt 算法)是由 D.E.Knuth、J.H.Morris 和 V.R.Pratt 三位计算机科学家共同提出的一种高效的字符串匹配算法。它在模式匹配过程中充分利用了已经比较过的部分信息,从而避免了不必要的比较...
# coding=gbk from time import sleep, ctime import threading ``` 接下来,定义两个函数`muisc`和`move`,它们分别代表两个不同的`while True`循环。这两个函数内部包含了一个打印语句和一个`sleep`调用,模拟了...
# -*- coding: utf-8 -*- ``` 然后可以正常打印中文: ```python print(u'中文') ``` #### 5. 布尔运算 - Python中,布尔类型可以与其他数据类型进行逻辑运算,包括`and`、`or`和`not`。 #### 6. 列表...
#coding=utf-8 import itchat import xlrd from apscheduler.schedulers.background import BlockingScheduler import os ``` 这里导入了必要的库。 ```python def SentChatRoomsMsg(name, context): itchat.get_...
# -*- coding:utf-8 -*- import requests import threading import argparse import sys import time from queue import Queue url_list = [] queue = Queue() headers = { 'Connection': 'keep-alive', 'Accept'...