From 74cd548adb06bcc6ae595b3b0e2e14e97532253a Mon Sep 17 00:00:00 2001 From: Connor Date: Mon, 10 Sep 2018 12:08:36 -0700 Subject: [PATCH] --- ac_encode.py | 376 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 226 insertions(+), 150 deletions(-) diff --git a/ac_encode.py b/ac_encode.py index cad56a8..9bf93d4 100644 --- a/ac_encode.py +++ b/ac_encode.py @@ -1,177 +1,253 @@ -## Arithmetic coding compressor and uncompressor for binary source. -## This is a cleaned-up version of AEncode.py +# Arithmetic coding compressor and decompressor for binary strings. +# via: http://www.inference.org.uk/mackay/python/compress/ac/ac_encode.py +# main page: http://www.inference.org.uk/mackay/python/compress/ +# this has been cleaned up (passes pycodestyle) and ported to python 3. -BETA0=1;BETA1=1 ## default prior distribution -M = 30 ; ONE = (1<0; assert c1>0 - if adaptive==0: - p0 = c0*1.0/(c0+c1) - pass - ans=""; - charstack=[0] ## how many undecided characters remain to print + +def encode(string, c0=BETA0, c1=BETA1, adaptive=1, verbose=0): + assert c0 > 0 + assert c1 > 0 + + b = ONE + a = 0 + tot0 = 0 + tot1 = 0 + if adaptive == 0: + p0 = c0 * 1 / (c0 + c1) + ans = "" + charstack = [0] # how many undecided characters remain to print + for c in string: - w=b-a - if adaptive : - cT = c0+c1 - p0 = c0*1.0/cT - pass - boundary = a + int(p0*w) - if (boundary == a): boundary += 1; print "warningA"; pass # these warnings mean that some of the probabilities - if (boundary == b): boundary -= 1; print "warningB"; pass # requested by the probabilistic model - ## are so small (compared to our integers) that we had to round them up to bigger values - if (c=='1') : + w = b - a + if adaptive: + cT = c0 + c1 + p0 = c0 * 1.0 / cT + boundary = a + int(p0 * w) + + # these warnings mean that some of the probabilities + # requested by the probabilistic model are so small + # (compared to our integers) that we had to round them up + # to bigger values. + if boundary == a: + boundary += 1 + print("warningA") + if boundary == b: + boundary -= 1 + print("warningB") + + if c == '1': a = boundary - tot1 += 1 ; if adaptive: c1 += 1.0 ; pass - elif (c=='0'): - b = boundary - tot0 +=1 ; if adaptive: c0 += 1.0 ; pass - pass ## ignore other characters + tot1 += 1 + if adaptive: + c1 += 1.0 + elif c == '0': + b = boundary + tot0 += 1 + if adaptive: + c0 += 1.0 + # ignore other characters - while ( (a>=HALF) or (b<=HALF) ) : ## output bits - if (a>=HALF) : - ans = ans + clear(1,charstack) - a = a-HALF ; - b = b-HALF ; - else : - ans = ans + clear(0,charstack) - pass - a *= 2 ; b *= 2 - pass + while a >= HALF or b <= HALF: # output bits + if a >= HALF: + ans += clear(1, charstack) + a -= HALF + b -= HALF + else: + ans += clear(0, charstack) + a *= 2 + b *= 2 - assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE - ## if the gap a-b is getting small, rescale it - while ( (a>QUARTER) and (b= HALF + assert a >= 0 + assert b <= ONE + + # if the gap a-b is getting small, rescale it + while a > QUARTER and b < THREEQU: charstack[0] += 1 a = 2*a-HALF b = 2*b-HALF - pass - - assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE - pass + + assert a <= HALF + assert b >= HALF + assert a >= 0 + assert b <= ONE # terminate - if ( (HALF-a) > (b-HALF) ) : - w = (HALF-a) ; - ans = ans + clear(0,charstack) - while ( w < HALF ) : - ans = ans + clear(1,charstack) - w *=2 - pass - pass - else : - w = (b-HALF) ; - ans = ans + clear(1,charstack) - while ( w < HALF ) : - ans = ans + clear(0,charstack) - w *=2 - pass - pass + if HALF - a > b - HALF: + w = HALF - a + ans += clear(0, charstack) + while w < HALF: + ans += clear(1, charstack) + w *= 2 + else: + w = b - HALF + ans += clear(1, charstack) + while w < HALF: + ans += clear(0, charstack) + w *= 2 + return ans - pass +def decode(string, N=10000, c0=BETA0, c1=BETA1, adaptive=1, verbose=0): + # must supply N, the number of source characters remaining. + assert c0 > 0 + assert c1 > 0 -def decode (string, N=10000, c0=BETA0, c1=BETA1, adaptive=1,verbose=0): - ## must supply N, the number of source characters remaining. - b=ONE ; a=0 ; tot0=0;tot1=0 ; assert c0>0 ; assert c1>0 + b = ONE + a = 0 + tot0 = 0 + tot1 = 0 model_needs_updating = 1 - if adaptive==0: - p0 = c0*1.0/(c0+c1) - pass - ans="" - u=0 ; v=ONE - for c in string : - if N<=0 : - break ## break out of the string-reading loop - assert N>0 -## // (u,v) is the current "encoded alphabet" binary interval, and halfway is its midpoint. -## // (a,b) is the current "source alphabet" interval, and boundary is the "midpoint" - assert u>=0 ; assert v<=ONE - halfway = u + (v-u)/2 - if( c == '1' ) : - u = halfway - elif ( c=='0' ): + if adaptive == 0: + p0 = c0 * 1 / (c0 + c1) + ans = "" + + u = 0 + v = ONE + for c in string: + if N <= 0: + break # out of the string-reading loop + assert N > 0 + # (u,v) is the current "encoded alphabet" binary interval, + # and halfway is its midpoint. + # (a,b) is the current "source alphabet" interval, + # and boundary is the "midpoint" + assert u >= 0 + assert v <= ONE + halfway = u + (v - u) / 2 + if c == '1': + u = halfway + elif c == '0': v = halfway - else: - pass -## // Read bits until we can decide what the source symbol was. -## // Then emulate the encoder's computations, and tie (u,v) to tag along for the ride. - while (1): ## condition at end + + # Read bits until we can decide what the source symbol was. + # Then emulate the encoder's computations, + # and tie (u,v) to tag along for the ride. + while 1: firsttime = 0 - if(model_needs_updating): - w = b-a - if adaptive : - cT = c0 + c1 ; p0 = c0 *1.0/cT - pass - boundary = a + int(p0*w) - if (boundary == a): boundary += 1; print "warningA"; pass - if (boundary == b): boundary -= 1; print "warningB"; pass + if model_needs_updating: + w = b - a + if adaptive: + cT = c0 + c1 + p0 = c0 * 1 / cT + boundary = a + int(p0*w) + if boundary == a: + boundary += 1 + print("warningA") + if boundary == b: + boundary -= 1 + print("warningB") model_needs_updating = 0 - pass - if ( boundary <= u ) : - ans = ans + "1"; tot1 +=1 ; if adaptive: c1 += 1.0 ; pass - a = boundary ; model_needs_updating = 1 ; N-=1 - elif ( boundary >= v ) : - ans = ans + "0"; tot0 +=1 ; if adaptive: c0 += 1.0 ; pass - b = boundary ; model_needs_updating = 1 ; N-=1 -## // every time we discover a source bit, implement exactly the -## // computations that were done by the encoder (below). - else : -## // not enough bits have yet been read to know the decision. + + if boundary <= u: + ans += "1" + tot1 += 1 + if adaptive: + c1 += 1.0 + a = boundary + model_needs_updating = 1 + N -= 1 + elif boundary >= v: + ans += "0" + tot0 += 1 + if adaptive: + c0 += 1.0 + b = boundary + model_needs_updating = 1 + N -= 1 + else: + # not enough bits have yet been read to know the decision. pass -## // emulate outputting of bits by the encoder, and tie (u,v) to tag along for the ride. - while ( (a>=HALF) or (b<=HALF) ) : - if (a>=HALF) : - a = a-HALF ; b = b-HALF ; u = u-HALF ; v = v-HALF - pass - else : - pass - a *= 2 ; b *= 2 ; u *= 2 ; v *= 2 ; - model_needs_updating = 1 - pass + # emulate outputting of bits by the encoder, + # and tie (u,v) to tag along for the ride. + while a >= HALF or b <= HALF: + if a >= HALF: + a = a - HALF + b = b - HALF + u = u - HALF + v = v - HALF + a *= 2 + b *= 2 + u *= 2 + v *= 2 + model_needs_updating = 1 - assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE - ## if the gap a-b is getting small, rescale it - while ( (a>QUARTER) and (b0 and model_needs_updating) : ## this is the "while" for this "do" loop + assert a <= HALF + assert b >= HALF + assert a >= 0 + assert b <= ONE + + # if the gap a-b is getting small, rescale it + while a > QUARTER and b < THREEQU: + a = 2 * a - HALF + b = 2 * b - HALF + u = 2 * u - HALF + v = 2 * v - HALF + + # this is the condition for this do-while loop + if not (N > 0 and model_needs_updating): break - pass - pass - return ans - pass - + return ans + + def test(): - sl=["1010"] - sl=["1010", "111", "00001000000000000000",\ - "1", "10" , "01" , "0" ,"0000000", \ - "000000000000000100000000000000000000000000000000100000000000000000011000000" ] - for s in sl: - print "encoding", s - N=len(s) - e = encode(s,10,1) - print "decoding", e - ds = decode(e,N,10,1) - print ds - if (ds != s) : - print s - print "ERR@" - pass + tests = [ + "1010", + "111", + "00001000000000000000", + "1", + "10", + "01", + "0", + "0000000", + """ + 00000000000000010000000000000000 + 00000000000000001000000000000000 + 00011000000 + """, + ] + + for s in tests: + # an ugly way to remove whitespace and newlines from the test strings: + s = "".join(s.split()) + + N = len(s) # required for decoding later. + print("original:", s) + + e = encode(s, 10, 1) + print("encoded: ", e) + + ds = decode(e, N, 10, 1) + print("decoded: ", ds) + + if ds != s: + print("FAIL") else: - print "ok ---------- " - pass - pass - -if __name__ == '__main__': test() \ No newline at end of file + print("PASS") + + print() + + +if __name__ == '__main__': + test()