This commit is contained in:
Connor 2018-09-10 12:08:36 -07:00 committed by GitHub
parent 7321c3c9e2
commit 74cd548adb

View file

@ -1,177 +1,253 @@
## Arithmetic coding compressor and uncompressor for binary source. # Arithmetic coding compressor and decompressor for binary strings.
## This is a cleaned-up version of AEncode.py # via: http://www.inference.org.uk/mackay/python/compress/ac/ac_encode.py
# main page: http://www.inference.org.uk/mackay/python/compress/
# this has been cleaned up (passes pycodestyle) and ported to python 3.
BETA0=1;BETA1=1 ## default prior distribution # default prior distribution
M = 30 ; ONE = (1<<M) ; HALF = (1<<(M-1)) BETA0 = 1
QUARTER = (1<<(M-2)) ; THREEQU = HALF+QUARTER BETA1 = 1
def clear (c,charstack):
## print out character c, and other queued characters M = 30
a = `c`+`(1-c)`*charstack[0] ONE = 1 << M
charstack[0]=0 HALF = 1 << (M - 1)
QUARTER = 1 << (M - 2)
THREEQU = HALF + QUARTER
def clear(c, charstack):
# print out character c, and other queued characters
a = repr(c) + repr(1 - c) * charstack[0]
charstack[0] = 0
return a return a
pass
def encode (string, c0=BETA0, c1=BETA1, adaptive=1,verbose=0):
b=ONE; a=0; tot0=0;tot1=0; assert c0>0; assert c1>0 def encode(string, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
if adaptive==0: assert c0 > 0
p0 = c0*1.0/(c0+c1) assert c1 > 0
pass
ans=""; b = ONE
charstack=[0] ## how many undecided characters remain to print a = 0
tot0 = 0
tot1 = 0
if adaptive == 0:
p0 = c0 * 1 / (c0 + c1)
ans = ""
charstack = [0] # how many undecided characters remain to print
for c in string: for c in string:
w=b-a w = b - a
if adaptive : if adaptive:
cT = c0+c1 cT = c0 + c1
p0 = c0*1.0/cT p0 = c0 * 1.0 / cT
pass boundary = a + int(p0 * w)
boundary = a + int(p0*w)
if (boundary == a): boundary += 1; print "warningA"; pass # these warnings mean that some of the probabilities # these warnings mean that some of the probabilities
if (boundary == b): boundary -= 1; print "warningB"; pass # requested by the probabilistic model # requested by the probabilistic model are so small
## are so small (compared to our integers) that we had to round them up to bigger values # (compared to our integers) that we had to round them up
if (c=='1') : # to bigger values.
if boundary == a:
boundary += 1
print("warningA")
if boundary == b:
boundary -= 1
print("warningB")
if c == '1':
a = boundary a = boundary
tot1 += 1 ; if adaptive: c1 += 1.0 ; pass tot1 += 1
elif (c=='0'): if adaptive:
c1 += 1.0
elif c == '0':
b = boundary b = boundary
tot0 +=1 ; if adaptive: c0 += 1.0 ; pass tot0 += 1
pass ## ignore other characters if adaptive:
c0 += 1.0
# ignore other characters
while ( (a>=HALF) or (b<=HALF) ) : ## output bits while a >= HALF or b <= HALF: # output bits
if (a>=HALF) : if a >= HALF:
ans = ans + clear(1,charstack) ans += clear(1, charstack)
a = a-HALF ; a -= HALF
b = b-HALF ; b -= HALF
else : else:
ans = ans + clear(0,charstack) ans += clear(0, charstack)
pass a *= 2
a *= 2 ; b *= 2 b *= 2
pass
assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE assert a <= HALF
## if the gap a-b is getting small, rescale it assert b >= HALF
while ( (a>QUARTER) and (b<THREEQU) ): assert a >= 0
assert b <= ONE
# if the gap a-b is getting small, rescale it
while a > QUARTER and b < THREEQU:
charstack[0] += 1 charstack[0] += 1
a = 2*a-HALF a = 2*a-HALF
b = 2*b-HALF b = 2*b-HALF
pass
assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE assert a <= HALF
pass assert b >= HALF
assert a >= 0
assert b <= ONE
# terminate # terminate
if ( (HALF-a) > (b-HALF) ) : if HALF - a > b - HALF:
w = (HALF-a) ; w = HALF - a
ans = ans + clear(0,charstack) ans += clear(0, charstack)
while ( w < HALF ) : while w < HALF:
ans = ans + clear(1,charstack) ans += clear(1, charstack)
w *=2 w *= 2
pass
pass
else :
w = (b-HALF) ;
ans = ans + clear(1,charstack)
while ( w < HALF ) :
ans = ans + clear(0,charstack)
w *=2
pass
pass
return ans
pass
def decode (string, N=10000, c0=BETA0, c1=BETA1, adaptive=1,verbose=0):
## must supply N, the number of source characters remaining.
b=ONE ; a=0 ; tot0=0;tot1=0 ; assert c0>0 ; assert c1>0
model_needs_updating = 1
if adaptive==0:
p0 = c0*1.0/(c0+c1)
pass
ans=""
u=0 ; v=ONE
for c in string :
if N<=0 :
break ## break out of the string-reading loop
assert N>0
## // (u,v) is the current "encoded alphabet" binary interval, and halfway is its midpoint.
## // (a,b) is the current "source alphabet" interval, and boundary is the "midpoint"
assert u>=0 ; assert v<=ONE
halfway = u + (v-u)/2
if( c == '1' ) :
u = halfway
elif ( c=='0' ):
v = halfway
else: else:
pass w = b - HALF
## // Read bits until we can decide what the source symbol was. ans += clear(1, charstack)
## // Then emulate the encoder's computations, and tie (u,v) to tag along for the ride. while w < HALF:
while (1): ## condition at end ans += clear(0, charstack)
firsttime = 0 w *= 2
if(model_needs_updating):
w = b-a
if adaptive :
cT = c0 + c1 ; p0 = c0 *1.0/cT
pass
boundary = a + int(p0*w)
if (boundary == a): boundary += 1; print "warningA"; pass
if (boundary == b): boundary -= 1; print "warningB"; pass
model_needs_updating = 0
pass
if ( boundary <= u ) :
ans = ans + "1"; tot1 +=1 ; if adaptive: c1 += 1.0 ; pass
a = boundary ; model_needs_updating = 1 ; N-=1
elif ( boundary >= v ) :
ans = ans + "0"; tot0 +=1 ; if adaptive: c0 += 1.0 ; pass
b = boundary ; model_needs_updating = 1 ; N-=1
## // every time we discover a source bit, implement exactly the
## // computations that were done by the encoder (below).
else :
## // not enough bits have yet been read to know the decision.
pass
## // emulate outputting of bits by the encoder, and tie (u,v) to tag along for the ride.
while ( (a>=HALF) or (b<=HALF) ) :
if (a>=HALF) :
a = a-HALF ; b = b-HALF ; u = u-HALF ; v = v-HALF
pass
else :
pass
a *= 2 ; b *= 2 ; u *= 2 ; v *= 2 ;
model_needs_updating = 1
pass
assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
## if the gap a-b is getting small, rescale it
while ( (a>QUARTER) and (b<THREEQU) ):
a = 2*a-HALF; b = 2*b-HALF ; u = 2*u-HALF ; v = 2*v-HALF
pass
if not (N>0 and model_needs_updating) : ## this is the "while" for this "do" loop
break
pass
pass
return ans return ans
def decode(string, N=10000, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
# must supply N, the number of source characters remaining.
assert c0 > 0
assert c1 > 0
b = ONE
a = 0
tot0 = 0
tot1 = 0
model_needs_updating = 1
if adaptive == 0:
p0 = c0 * 1 / (c0 + c1)
ans = ""
u = 0
v = ONE
for c in string:
if N <= 0:
break # out of the string-reading loop
assert N > 0
# (u,v) is the current "encoded alphabet" binary interval,
# and halfway is its midpoint.
# (a,b) is the current "source alphabet" interval,
# and boundary is the "midpoint"
assert u >= 0
assert v <= ONE
halfway = u + (v - u) / 2
if c == '1':
u = halfway
elif c == '0':
v = halfway
# Read bits until we can decide what the source symbol was.
# Then emulate the encoder's computations,
# and tie (u,v) to tag along for the ride.
while 1:
firsttime = 0
if model_needs_updating:
w = b - a
if adaptive:
cT = c0 + c1
p0 = c0 * 1 / cT
boundary = a + int(p0*w)
if boundary == a:
boundary += 1
print("warningA")
if boundary == b:
boundary -= 1
print("warningB")
model_needs_updating = 0
if boundary <= u:
ans += "1"
tot1 += 1
if adaptive:
c1 += 1.0
a = boundary
model_needs_updating = 1
N -= 1
elif boundary >= v:
ans += "0"
tot0 += 1
if adaptive:
c0 += 1.0
b = boundary
model_needs_updating = 1
N -= 1
else:
# not enough bits have yet been read to know the decision.
pass pass
# emulate outputting of bits by the encoder,
# and tie (u,v) to tag along for the ride.
while a >= HALF or b <= HALF:
if a >= HALF:
a = a - HALF
b = b - HALF
u = u - HALF
v = v - HALF
a *= 2
b *= 2
u *= 2
v *= 2
model_needs_updating = 1
assert a <= HALF
assert b >= HALF
assert a >= 0
assert b <= ONE
# if the gap a-b is getting small, rescale it
while a > QUARTER and b < THREEQU:
a = 2 * a - HALF
b = 2 * b - HALF
u = 2 * u - HALF
v = 2 * v - HALF
# this is the condition for this do-while loop
if not (N > 0 and model_needs_updating):
break
return ans
def test(): def test():
sl=["1010"] tests = [
sl=["1010", "111", "00001000000000000000",\ "1010",
"1", "10" , "01" , "0" ,"0000000", \ "111",
"000000000000000100000000000000000000000000000000100000000000000000011000000" ] "00001000000000000000",
for s in sl: "1",
print "encoding", s "10",
N=len(s) "01",
e = encode(s,10,1) "0",
print "decoding", e "0000000",
ds = decode(e,N,10,1) """
print ds 00000000000000010000000000000000
if (ds != s) : 00000000000000001000000000000000
print s 00011000000
print "ERR@" """,
pass ]
else:
print "ok ---------- "
pass
pass
if __name__ == '__main__': test() for s in tests:
# an ugly way to remove whitespace and newlines from the test strings:
s = "".join(s.split())
N = len(s) # required for decoding later.
print("original:", s)
e = encode(s, 10, 1)
print("encoded: ", e)
ds = decode(e, N, 10, 1)
print("decoded: ", ds)
if ds != s:
print("FAIL")
else:
print("PASS")
print()
if __name__ == '__main__':
test()