This commit is contained in:
parent
7321c3c9e2
commit
74cd548adb
1 changed files with 226 additions and 150 deletions
320
ac_encode.py
320
ac_encode.py
|
@ -1,177 +1,253 @@
|
||||||
## Arithmetic coding compressor and uncompressor for binary source.
|
# Arithmetic coding compressor and decompressor for binary strings.
|
||||||
## This is a cleaned-up version of AEncode.py
|
# via: http://www.inference.org.uk/mackay/python/compress/ac/ac_encode.py
|
||||||
|
# main page: http://www.inference.org.uk/mackay/python/compress/
|
||||||
|
# this has been cleaned up (passes pycodestyle) and ported to python 3.
|
||||||
|
|
||||||
|
# default prior distribution
|
||||||
|
BETA0 = 1
|
||||||
|
BETA1 = 1
|
||||||
|
|
||||||
|
M = 30
|
||||||
|
ONE = 1 << M
|
||||||
|
HALF = 1 << (M - 1)
|
||||||
|
QUARTER = 1 << (M - 2)
|
||||||
|
THREEQU = HALF + QUARTER
|
||||||
|
|
||||||
|
|
||||||
BETA0=1;BETA1=1 ## default prior distribution
|
|
||||||
M = 30 ; ONE = (1<<M) ; HALF = (1<<(M-1))
|
|
||||||
QUARTER = (1<<(M-2)) ; THREEQU = HALF+QUARTER
|
|
||||||
def clear(c, charstack):
|
def clear(c, charstack):
|
||||||
## print out character c, and other queued characters
|
# print out character c, and other queued characters
|
||||||
a = `c`+`(1-c)`*charstack[0]
|
a = repr(c) + repr(1 - c) * charstack[0]
|
||||||
charstack[0] = 0
|
charstack[0] = 0
|
||||||
return a
|
return a
|
||||||
pass
|
|
||||||
|
|
||||||
def encode(string, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
|
def encode(string, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
|
||||||
b=ONE; a=0; tot0=0;tot1=0; assert c0>0; assert c1>0
|
assert c0 > 0
|
||||||
|
assert c1 > 0
|
||||||
|
|
||||||
|
b = ONE
|
||||||
|
a = 0
|
||||||
|
tot0 = 0
|
||||||
|
tot1 = 0
|
||||||
if adaptive == 0:
|
if adaptive == 0:
|
||||||
p0 = c0*1.0/(c0+c1)
|
p0 = c0 * 1 / (c0 + c1)
|
||||||
pass
|
ans = ""
|
||||||
ans="";
|
charstack = [0] # how many undecided characters remain to print
|
||||||
charstack=[0] ## how many undecided characters remain to print
|
|
||||||
for c in string:
|
for c in string:
|
||||||
w = b - a
|
w = b - a
|
||||||
if adaptive:
|
if adaptive:
|
||||||
cT = c0 + c1
|
cT = c0 + c1
|
||||||
p0 = c0 * 1.0 / cT
|
p0 = c0 * 1.0 / cT
|
||||||
pass
|
|
||||||
boundary = a + int(p0 * w)
|
boundary = a + int(p0 * w)
|
||||||
if (boundary == a): boundary += 1; print "warningA"; pass # these warnings mean that some of the probabilities
|
|
||||||
if (boundary == b): boundary -= 1; print "warningB"; pass # requested by the probabilistic model
|
# these warnings mean that some of the probabilities
|
||||||
## are so small (compared to our integers) that we had to round them up to bigger values
|
# requested by the probabilistic model are so small
|
||||||
if (c=='1') :
|
# (compared to our integers) that we had to round them up
|
||||||
|
# to bigger values.
|
||||||
|
if boundary == a:
|
||||||
|
boundary += 1
|
||||||
|
print("warningA")
|
||||||
|
if boundary == b:
|
||||||
|
boundary -= 1
|
||||||
|
print("warningB")
|
||||||
|
|
||||||
|
if c == '1':
|
||||||
a = boundary
|
a = boundary
|
||||||
tot1 += 1 ; if adaptive: c1 += 1.0 ; pass
|
tot1 += 1
|
||||||
elif (c=='0'):
|
if adaptive:
|
||||||
|
c1 += 1.0
|
||||||
|
elif c == '0':
|
||||||
b = boundary
|
b = boundary
|
||||||
tot0 +=1 ; if adaptive: c0 += 1.0 ; pass
|
tot0 += 1
|
||||||
pass ## ignore other characters
|
if adaptive:
|
||||||
|
c0 += 1.0
|
||||||
|
# ignore other characters
|
||||||
|
|
||||||
while ( (a>=HALF) or (b<=HALF) ) : ## output bits
|
while a >= HALF or b <= HALF: # output bits
|
||||||
if (a>=HALF) :
|
if a >= HALF:
|
||||||
ans = ans + clear(1,charstack)
|
ans += clear(1, charstack)
|
||||||
a = a-HALF ;
|
a -= HALF
|
||||||
b = b-HALF ;
|
b -= HALF
|
||||||
else:
|
else:
|
||||||
ans = ans + clear(0,charstack)
|
ans += clear(0, charstack)
|
||||||
pass
|
a *= 2
|
||||||
a *= 2 ; b *= 2
|
b *= 2
|
||||||
pass
|
|
||||||
|
|
||||||
assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
|
assert a <= HALF
|
||||||
## if the gap a-b is getting small, rescale it
|
assert b >= HALF
|
||||||
while ( (a>QUARTER) and (b<THREEQU) ):
|
assert a >= 0
|
||||||
|
assert b <= ONE
|
||||||
|
|
||||||
|
# if the gap a-b is getting small, rescale it
|
||||||
|
while a > QUARTER and b < THREEQU:
|
||||||
charstack[0] += 1
|
charstack[0] += 1
|
||||||
a = 2*a-HALF
|
a = 2*a-HALF
|
||||||
b = 2*b-HALF
|
b = 2*b-HALF
|
||||||
pass
|
|
||||||
|
|
||||||
assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
|
assert a <= HALF
|
||||||
pass
|
assert b >= HALF
|
||||||
|
assert a >= 0
|
||||||
|
assert b <= ONE
|
||||||
|
|
||||||
# terminate
|
# terminate
|
||||||
if ( (HALF-a) > (b-HALF) ) :
|
if HALF - a > b - HALF:
|
||||||
w = (HALF-a) ;
|
w = HALF - a
|
||||||
ans = ans + clear(0,charstack)
|
ans += clear(0, charstack)
|
||||||
while ( w < HALF ) :
|
while w < HALF:
|
||||||
ans = ans + clear(1,charstack)
|
ans += clear(1, charstack)
|
||||||
w *= 2
|
w *= 2
|
||||||
pass
|
|
||||||
pass
|
|
||||||
else:
|
else:
|
||||||
w = (b-HALF) ;
|
w = b - HALF
|
||||||
ans = ans + clear(1,charstack)
|
ans += clear(1, charstack)
|
||||||
while ( w < HALF ) :
|
while w < HALF:
|
||||||
ans = ans + clear(0,charstack)
|
ans += clear(0, charstack)
|
||||||
w *= 2
|
w *= 2
|
||||||
pass
|
|
||||||
pass
|
|
||||||
return ans
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
return ans
|
||||||
|
|
||||||
|
|
||||||
def decode(string, N=10000, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
|
def decode(string, N=10000, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
|
||||||
## must supply N, the number of source characters remaining.
|
# must supply N, the number of source characters remaining.
|
||||||
b=ONE ; a=0 ; tot0=0;tot1=0 ; assert c0>0 ; assert c1>0
|
assert c0 > 0
|
||||||
|
assert c1 > 0
|
||||||
|
|
||||||
|
b = ONE
|
||||||
|
a = 0
|
||||||
|
tot0 = 0
|
||||||
|
tot1 = 0
|
||||||
model_needs_updating = 1
|
model_needs_updating = 1
|
||||||
if adaptive == 0:
|
if adaptive == 0:
|
||||||
p0 = c0*1.0/(c0+c1)
|
p0 = c0 * 1 / (c0 + c1)
|
||||||
pass
|
|
||||||
ans = ""
|
ans = ""
|
||||||
u=0 ; v=ONE
|
|
||||||
|
u = 0
|
||||||
|
v = ONE
|
||||||
for c in string:
|
for c in string:
|
||||||
if N <= 0:
|
if N <= 0:
|
||||||
break ## break out of the string-reading loop
|
break # out of the string-reading loop
|
||||||
assert N > 0
|
assert N > 0
|
||||||
## // (u,v) is the current "encoded alphabet" binary interval, and halfway is its midpoint.
|
# (u,v) is the current "encoded alphabet" binary interval,
|
||||||
## // (a,b) is the current "source alphabet" interval, and boundary is the "midpoint"
|
# and halfway is its midpoint.
|
||||||
assert u>=0 ; assert v<=ONE
|
# (a,b) is the current "source alphabet" interval,
|
||||||
|
# and boundary is the "midpoint"
|
||||||
|
assert u >= 0
|
||||||
|
assert v <= ONE
|
||||||
halfway = u + (v - u) / 2
|
halfway = u + (v - u) / 2
|
||||||
if( c == '1' ) :
|
if c == '1':
|
||||||
u = halfway
|
u = halfway
|
||||||
elif ( c=='0' ):
|
elif c == '0':
|
||||||
v = halfway
|
v = halfway
|
||||||
else:
|
|
||||||
pass
|
# Read bits until we can decide what the source symbol was.
|
||||||
## // Read bits until we can decide what the source symbol was.
|
# Then emulate the encoder's computations,
|
||||||
## // Then emulate the encoder's computations, and tie (u,v) to tag along for the ride.
|
# and tie (u,v) to tag along for the ride.
|
||||||
while (1): ## condition at end
|
while 1:
|
||||||
firsttime = 0
|
firsttime = 0
|
||||||
if(model_needs_updating):
|
if model_needs_updating:
|
||||||
w = b - a
|
w = b - a
|
||||||
if adaptive:
|
if adaptive:
|
||||||
cT = c0 + c1 ; p0 = c0 *1.0/cT
|
cT = c0 + c1
|
||||||
pass
|
p0 = c0 * 1 / cT
|
||||||
boundary = a + int(p0*w)
|
boundary = a + int(p0*w)
|
||||||
if (boundary == a): boundary += 1; print "warningA"; pass
|
if boundary == a:
|
||||||
if (boundary == b): boundary -= 1; print "warningB"; pass
|
boundary += 1
|
||||||
|
print("warningA")
|
||||||
|
if boundary == b:
|
||||||
|
boundary -= 1
|
||||||
|
print("warningB")
|
||||||
model_needs_updating = 0
|
model_needs_updating = 0
|
||||||
pass
|
|
||||||
if ( boundary <= u ) :
|
|
||||||
ans = ans + "1"; tot1 +=1 ; if adaptive: c1 += 1.0 ; pass
|
|
||||||
a = boundary ; model_needs_updating = 1 ; N-=1
|
|
||||||
elif ( boundary >= v ) :
|
|
||||||
ans = ans + "0"; tot0 +=1 ; if adaptive: c0 += 1.0 ; pass
|
|
||||||
b = boundary ; model_needs_updating = 1 ; N-=1
|
|
||||||
## // every time we discover a source bit, implement exactly the
|
|
||||||
## // computations that were done by the encoder (below).
|
|
||||||
else :
|
|
||||||
## // not enough bits have yet been read to know the decision.
|
|
||||||
pass
|
|
||||||
|
|
||||||
## // emulate outputting of bits by the encoder, and tie (u,v) to tag along for the ride.
|
if boundary <= u:
|
||||||
while ( (a>=HALF) or (b<=HALF) ) :
|
ans += "1"
|
||||||
if (a>=HALF) :
|
tot1 += 1
|
||||||
a = a-HALF ; b = b-HALF ; u = u-HALF ; v = v-HALF
|
if adaptive:
|
||||||
pass
|
c1 += 1.0
|
||||||
else :
|
a = boundary
|
||||||
pass
|
|
||||||
a *= 2 ; b *= 2 ; u *= 2 ; v *= 2 ;
|
|
||||||
model_needs_updating = 1
|
model_needs_updating = 1
|
||||||
|
N -= 1
|
||||||
|
elif boundary >= v:
|
||||||
|
ans += "0"
|
||||||
|
tot0 += 1
|
||||||
|
if adaptive:
|
||||||
|
c0 += 1.0
|
||||||
|
b = boundary
|
||||||
|
model_needs_updating = 1
|
||||||
|
N -= 1
|
||||||
|
else:
|
||||||
|
# not enough bits have yet been read to know the decision.
|
||||||
pass
|
pass
|
||||||
|
|
||||||
assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
|
# emulate outputting of bits by the encoder,
|
||||||
## if the gap a-b is getting small, rescale it
|
# and tie (u,v) to tag along for the ride.
|
||||||
while ( (a>QUARTER) and (b<THREEQU) ):
|
while a >= HALF or b <= HALF:
|
||||||
a = 2*a-HALF; b = 2*b-HALF ; u = 2*u-HALF ; v = 2*v-HALF
|
if a >= HALF:
|
||||||
pass
|
a = a - HALF
|
||||||
if not (N>0 and model_needs_updating) : ## this is the "while" for this "do" loop
|
b = b - HALF
|
||||||
|
u = u - HALF
|
||||||
|
v = v - HALF
|
||||||
|
a *= 2
|
||||||
|
b *= 2
|
||||||
|
u *= 2
|
||||||
|
v *= 2
|
||||||
|
model_needs_updating = 1
|
||||||
|
|
||||||
|
assert a <= HALF
|
||||||
|
assert b >= HALF
|
||||||
|
assert a >= 0
|
||||||
|
assert b <= ONE
|
||||||
|
|
||||||
|
# if the gap a-b is getting small, rescale it
|
||||||
|
while a > QUARTER and b < THREEQU:
|
||||||
|
a = 2 * a - HALF
|
||||||
|
b = 2 * b - HALF
|
||||||
|
u = 2 * u - HALF
|
||||||
|
v = 2 * v - HALF
|
||||||
|
|
||||||
|
# this is the condition for this do-while loop
|
||||||
|
if not (N > 0 and model_needs_updating):
|
||||||
break
|
break
|
||||||
pass
|
|
||||||
pass
|
|
||||||
return ans
|
return ans
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def test():
|
def test():
|
||||||
sl=["1010"]
|
tests = [
|
||||||
sl=["1010", "111", "00001000000000000000",\
|
"1010",
|
||||||
"1", "10" , "01" , "0" ,"0000000", \
|
"111",
|
||||||
"000000000000000100000000000000000000000000000000100000000000000000011000000" ]
|
"00001000000000000000",
|
||||||
for s in sl:
|
"1",
|
||||||
print "encoding", s
|
"10",
|
||||||
N=len(s)
|
"01",
|
||||||
e = encode(s,10,1)
|
"0",
|
||||||
print "decoding", e
|
"0000000",
|
||||||
ds = decode(e,N,10,1)
|
"""
|
||||||
print ds
|
00000000000000010000000000000000
|
||||||
if (ds != s) :
|
00000000000000001000000000000000
|
||||||
print s
|
00011000000
|
||||||
print "ERR@"
|
""",
|
||||||
pass
|
]
|
||||||
else:
|
|
||||||
print "ok ---------- "
|
|
||||||
pass
|
|
||||||
pass
|
|
||||||
|
|
||||||
if __name__ == '__main__': test()
|
for s in tests:
|
||||||
|
# an ugly way to remove whitespace and newlines from the test strings:
|
||||||
|
s = "".join(s.split())
|
||||||
|
|
||||||
|
N = len(s) # required for decoding later.
|
||||||
|
print("original:", s)
|
||||||
|
|
||||||
|
e = encode(s, 10, 1)
|
||||||
|
print("encoded: ", e)
|
||||||
|
|
||||||
|
ds = decode(e, N, 10, 1)
|
||||||
|
print("decoded: ", ds)
|
||||||
|
|
||||||
|
if ds != s:
|
||||||
|
print("FAIL")
|
||||||
|
else:
|
||||||
|
print("PASS")
|
||||||
|
|
||||||
|
print()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
test()
|
||||||
|
|
Loading…
Reference in a new issue