commit 7321c3c9e2a0d08113b92d33fe9f711bf25bf7d2 Author: Connor Date: Mon Sep 10 12:08:23 2018 -0700 diff --git a/ac_encode.py b/ac_encode.py new file mode 100644 index 0000000..cad56a8 --- /dev/null +++ b/ac_encode.py @@ -0,0 +1,177 @@ +## Arithmetic coding compressor and uncompressor for binary source. +## This is a cleaned-up version of AEncode.py + +BETA0=1;BETA1=1 ## default prior distribution +M = 30 ; ONE = (1<0; assert c1>0 + if adaptive==0: + p0 = c0*1.0/(c0+c1) + pass + ans=""; + charstack=[0] ## how many undecided characters remain to print + for c in string: + w=b-a + if adaptive : + cT = c0+c1 + p0 = c0*1.0/cT + pass + boundary = a + int(p0*w) + if (boundary == a): boundary += 1; print "warningA"; pass # these warnings mean that some of the probabilities + if (boundary == b): boundary -= 1; print "warningB"; pass # requested by the probabilistic model + ## are so small (compared to our integers) that we had to round them up to bigger values + if (c=='1') : + a = boundary + tot1 += 1 ; if adaptive: c1 += 1.0 ; pass + elif (c=='0'): + b = boundary + tot0 +=1 ; if adaptive: c0 += 1.0 ; pass + pass ## ignore other characters + + while ( (a>=HALF) or (b<=HALF) ) : ## output bits + if (a>=HALF) : + ans = ans + clear(1,charstack) + a = a-HALF ; + b = b-HALF ; + else : + ans = ans + clear(0,charstack) + pass + a *= 2 ; b *= 2 + pass + + assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE + ## if the gap a-b is getting small, rescale it + while ( (a>QUARTER) and (b=HALF; assert a>=0; assert b<=ONE + pass + + # terminate + if ( (HALF-a) > (b-HALF) ) : + w = (HALF-a) ; + ans = ans + clear(0,charstack) + while ( w < HALF ) : + ans = ans + clear(1,charstack) + w *=2 + pass + pass + else : + w = (b-HALF) ; + ans = ans + clear(1,charstack) + while ( w < HALF ) : + ans = ans + clear(0,charstack) + w *=2 + pass + pass + return ans + pass + + + +def decode (string, N=10000, c0=BETA0, c1=BETA1, adaptive=1,verbose=0): + ## must supply N, the number of source characters remaining. + b=ONE ; a=0 ; tot0=0;tot1=0 ; assert c0>0 ; assert c1>0 + model_needs_updating = 1 + if adaptive==0: + p0 = c0*1.0/(c0+c1) + pass + ans="" + u=0 ; v=ONE + for c in string : + if N<=0 : + break ## break out of the string-reading loop + assert N>0 +## // (u,v) is the current "encoded alphabet" binary interval, and halfway is its midpoint. +## // (a,b) is the current "source alphabet" interval, and boundary is the "midpoint" + assert u>=0 ; assert v<=ONE + halfway = u + (v-u)/2 + if( c == '1' ) : + u = halfway + elif ( c=='0' ): + v = halfway + else: + pass +## // Read bits until we can decide what the source symbol was. +## // Then emulate the encoder's computations, and tie (u,v) to tag along for the ride. + while (1): ## condition at end + firsttime = 0 + if(model_needs_updating): + w = b-a + if adaptive : + cT = c0 + c1 ; p0 = c0 *1.0/cT + pass + boundary = a + int(p0*w) + if (boundary == a): boundary += 1; print "warningA"; pass + if (boundary == b): boundary -= 1; print "warningB"; pass + model_needs_updating = 0 + pass + if ( boundary <= u ) : + ans = ans + "1"; tot1 +=1 ; if adaptive: c1 += 1.0 ; pass + a = boundary ; model_needs_updating = 1 ; N-=1 + elif ( boundary >= v ) : + ans = ans + "0"; tot0 +=1 ; if adaptive: c0 += 1.0 ; pass + b = boundary ; model_needs_updating = 1 ; N-=1 +## // every time we discover a source bit, implement exactly the +## // computations that were done by the encoder (below). + else : +## // not enough bits have yet been read to know the decision. + pass + +## // emulate outputting of bits by the encoder, and tie (u,v) to tag along for the ride. + while ( (a>=HALF) or (b<=HALF) ) : + if (a>=HALF) : + a = a-HALF ; b = b-HALF ; u = u-HALF ; v = v-HALF + pass + else : + pass + a *= 2 ; b *= 2 ; u *= 2 ; v *= 2 ; + model_needs_updating = 1 + pass + + assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE + ## if the gap a-b is getting small, rescale it + while ( (a>QUARTER) and (b0 and model_needs_updating) : ## this is the "while" for this "do" loop + break + pass + pass + return ans + pass + + +def test(): + sl=["1010"] + sl=["1010", "111", "00001000000000000000",\ + "1", "10" , "01" , "0" ,"0000000", \ + "000000000000000100000000000000000000000000000000100000000000000000011000000" ] + for s in sl: + print "encoding", s + N=len(s) + e = encode(s,10,1) + print "decoding", e + ds = decode(e,N,10,1) + print ds + if (ds != s) : + print s + print "ERR@" + pass + else: + print "ok ---------- " + pass + pass + +if __name__ == '__main__': test() \ No newline at end of file