2018-09-10 12:08:36 -07:00 · 2018-09-10 12:08:36 -07:00 · 74cd548adb
commit 74cd548adb
parent 7321c3c9e2
1 changed files with 226 additions and 150 deletions
--- a/ac_encode.py
+++ b/ac_encode.py
@ -1,177 +1,253 @@
-## Arithmetic coding compressor and uncompressor for binary source.
-## This is a cleaned-up version of AEncode.py 
+# Arithmetic coding compressor and decompressor for binary strings.
+# via: http://www.inference.org.uk/mackay/python/compress/ac/ac_encode.py
+# main page: http://www.inference.org.uk/mackay/python/compress/
+# this has been cleaned up (passes pycodestyle) and ported to python 3.

-BETA0=1;BETA1=1 ## default prior distribution
-M = 30 ; ONE = (1<<M) ; HALF = (1<<(M-1))
-QUARTER = (1<<(M-2)) ; THREEQU = HALF+QUARTER
-def clear (c,charstack):
-    ## print out character c, and other queued characters 
-    a = `c`+`(1-c)`*charstack[0]
-    charstack[0]=0
+# default prior distribution
+BETA0 = 1
+BETA1 = 1
+
+M = 30
+ONE = 1 << M
+HALF = 1 << (M - 1)
+QUARTER = 1 << (M - 2)
+THREEQU = HALF + QUARTER
+
+
+def clear(c, charstack):
+    # print out character c, and other queued characters
+    a = repr(c) + repr(1 - c) * charstack[0]
+    charstack[0] = 0
    return a
-    pass

-def encode (string, c0=BETA0, c1=BETA1, adaptive=1,verbose=0):
-    b=ONE; a=0;  tot0=0;tot1=0;     assert c0>0; assert c1>0
-    if adaptive==0:
-        p0 = c0*1.0/(c0+c1)
-        pass
-    ans="";
-    charstack=[0] ## how many undecided characters remain to print
+
+def encode(string, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
+    assert c0 > 0
+    assert c1 > 0
+
+    b = ONE
+    a = 0
+    tot0 = 0
+    tot1 = 0
+    if adaptive == 0:
+        p0 = c0 * 1 / (c0 + c1)
+    ans = ""
+    charstack = [0]  # how many undecided characters remain to print
+
    for c in string:
-        w=b-a
-        if adaptive :
-            cT = c0+c1
-            p0 = c0*1.0/cT
-            pass
-        boundary = a + int(p0*w)
-        if (boundary == a): boundary += 1; print "warningA"; pass # these warnings mean that some of the probabilities 
-        if (boundary == b): boundary -= 1; print "warningB"; pass # requested by the probabilistic model
-        ## are so small (compared to our integers) that we had to round them up to bigger values
-        if (c=='1') :
+        w = b - a
+        if adaptive:
+            cT = c0 + c1
+            p0 = c0 * 1.0 / cT
+        boundary = a + int(p0 * w)
+
+        # these warnings mean that some of the probabilities
+        # requested by the probabilistic model are so small
+        # (compared to our integers) that we had to round them up
+        # to bigger values.
+        if boundary == a:
+            boundary += 1
+            print("warningA")
+        if boundary == b:
+            boundary -= 1
+            print("warningB")
+
+        if c == '1':
            a = boundary
-            tot1 += 1 ; if adaptive: c1 += 1.0 ; pass
-        elif (c=='0'):
-            b = boundary 
-            tot0 +=1 ; if adaptive: c0 += 1.0 ; pass
-            pass ## ignore other characters
+            tot1 += 1
+            if adaptive:
+                c1 += 1.0
+        elif c == '0':
+            b = boundary
+            tot0 += 1
+            if adaptive:
+                c0 += 1.0
+            # ignore other characters

-        while ( (a>=HALF) or (b<=HALF) ) :   ## output bits 
-            if (a>=HALF) :
-                ans = ans + clear(1,charstack)
-                a = a-HALF ;
-                b = b-HALF ;
-            else :
-                ans = ans + clear(0,charstack)
-                pass
-            a *= 2 ;      b *= 2
-            pass
+        while a >= HALF or b <= HALF:  # output bits
+            if a >= HALF:
+                ans += clear(1, charstack)
+                a -= HALF
+                b -= HALF
+            else:
+                ans += clear(0, charstack)
+            a *= 2
+            b *= 2

-        assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
-        ## if the gap a-b is getting small, rescale it
-        while ( (a>QUARTER) and (b<THREEQU) ):
+        assert a <= HALF
+        assert b >= HALF
+        assert a >= 0
+        assert b <= ONE
+
+        # if the gap a-b is getting small, rescale it
+        while a > QUARTER and b < THREEQU:
            charstack[0] += 1
            a = 2*a-HALF
            b = 2*b-HALF
-            pass
-        
-        assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
-        pass
+
+        assert a <= HALF
+        assert b >= HALF
+        assert a >= 0
+        assert b <= ONE

    # terminate
-    if ( (HALF-a) > (b-HALF) ) :
-        w = (HALF-a) ;
-        ans = ans + clear(0,charstack)
-        while ( w < HALF ) :
-            ans = ans + clear(1,charstack)
-            w *=2
-            pass
-        pass
-    else :
-        w = (b-HALF) ;
-        ans = ans + clear(1,charstack)
-        while ( w < HALF ) :
-            ans = ans + clear(0,charstack)
-            w *=2
-            pass
-        pass
+    if HALF - a > b - HALF:
+        w = HALF - a
+        ans += clear(0, charstack)
+        while w < HALF:
+            ans += clear(1, charstack)
+            w *= 2
+    else:
+        w = b - HALF
+        ans += clear(1, charstack)
+        while w < HALF:
+            ans += clear(0, charstack)
+            w *= 2
+
    return ans
-    pass


+def decode(string, N=10000, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
+    # must supply N, the number of source characters remaining.
+    assert c0 > 0
+    assert c1 > 0

-def decode (string, N=10000, c0=BETA0, c1=BETA1, adaptive=1,verbose=0):
-    ## must supply N, the number of source characters remaining.
-    b=ONE ; a=0 ;      tot0=0;tot1=0  ;     assert c0>0 ; assert c1>0
+    b = ONE
+    a = 0
+    tot0 = 0
+    tot1 = 0
    model_needs_updating = 1
-    if adaptive==0:
-        p0 = c0*1.0/(c0+c1)
-        pass
-    ans=""
-    u=0 ; v=ONE
-    for c in string :
-        if N<=0 :
-            break ## break out of the string-reading loop
-        assert N>0
-##    // (u,v) is the current "encoded alphabet" binary interval, and halfway is its midpoint.
-##    // (a,b) is the current "source alphabet" interval, and boundary is the "midpoint"
-        assert u>=0 ; assert v<=ONE
-        halfway = u + (v-u)/2 
-        if( c == '1' ) :
-            u = halfway 
-        elif ( c=='0' ):
+    if adaptive == 0:
+        p0 = c0 * 1 / (c0 + c1)
+    ans = ""
+
+    u = 0
+    v = ONE
+    for c in string:
+        if N <= 0:
+            break  # out of the string-reading loop
+        assert N > 0
+        # (u,v) is the current "encoded alphabet" binary interval,
+        # and halfway is its midpoint.
+        # (a,b) is the current "source alphabet" interval,
+        # and boundary is the "midpoint"
+        assert u >= 0
+        assert v <= ONE
+        halfway = u + (v - u) / 2
+        if c == '1':
+            u = halfway
+        elif c == '0':
            v = halfway
-        else:
-            pass
-##    // Read bits until we can decide what the source symbol was.
-##    // Then emulate the encoder's computations, and tie (u,v) to tag along for the ride.
-        while (1): ## condition at end 
+
+        # Read bits until we can decide what the source symbol was.
+        # Then emulate the encoder's computations,
+        # and tie (u,v) to tag along for the ride.
+        while 1:
            firsttime = 0
-            if(model_needs_updating):
-                w = b-a
-                if adaptive :
-                    cT = c0 + c1 ;   p0 = c0 *1.0/cT 
-                    pass
-                boundary = a + int(p0*w)  
-                if (boundary == a): boundary += 1; print "warningA"; pass
-                if (boundary == b): boundary -= 1; print "warningB"; pass
+            if model_needs_updating:
+                w = b - a
+                if adaptive:
+                    cT = c0 + c1
+                    p0 = c0 * 1 / cT
+                boundary = a + int(p0*w)
+                if boundary == a:
+                    boundary += 1
+                    print("warningA")
+                if boundary == b:
+                    boundary -= 1
+                    print("warningB")
                model_needs_updating = 0
-                pass
-            if  ( boundary <= u ) :
-                ans = ans + "1";             tot1 +=1 ; if adaptive: c1 += 1.0 ; pass
-                a = boundary ;	model_needs_updating = 1 ; 	N-=1 
-            elif ( boundary >= v )  :
-                ans = ans + "0";             tot0 +=1 ; if adaptive: c0 += 1.0 ; pass
-                b = boundary ;	model_needs_updating = 1 ; 	N-=1 
-##	// every time we discover a source bit, implement exactly the 
-##	// computations that were done by the encoder (below). 
-            else :
-##	// not enough bits have yet been read to know the decision.
+
+            if boundary <= u:
+                ans += "1"
+                tot1 += 1
+                if adaptive:
+                    c1 += 1.0
+                a = boundary
+                model_needs_updating = 1
+                N -= 1
+            elif boundary >= v:
+                ans += "0"
+                tot0 += 1
+                if adaptive:
+                    c0 += 1.0
+                b = boundary
+                model_needs_updating = 1
+                N -= 1
+            else:
+                # not enough bits have yet been read to know the decision.
                pass

-##      // emulate outputting of bits by the encoder, and tie (u,v) to tag along for the ride.
-            while ( (a>=HALF) or (b<=HALF) ) :
-                if (a>=HALF) :
-                    a = a-HALF ;  b = b-HALF ;    u = u-HALF ;     v = v-HALF 
-                    pass
-                else :
-                    pass
-                a *= 2 ;      b *= 2 ;      u *= 2 ;      v *= 2 ;
-                model_needs_updating = 1 
-                pass
+            # emulate outputting of bits by the encoder,
+            # and tie (u,v) to tag along for the ride.
+            while a >= HALF or b <= HALF:
+                if a >= HALF:
+                    a = a - HALF
+                    b = b - HALF
+                    u = u - HALF
+                    v = v - HALF
+                a *= 2
+                b *= 2
+                u *= 2
+                v *= 2
+                model_needs_updating = 1

-            assert a<=HALF;            assert b>=HALF;            assert a>=0;            assert b<=ONE
-        ## if the gap a-b is getting small, rescale it
-            while ( (a>QUARTER) and (b<THREEQU) ):
-                a = 2*a-HALF;  b = 2*b-HALF ; u = 2*u-HALF ;  v = 2*v-HALF
-                pass
-            if not (N>0 and model_needs_updating) : ## this is the "while" for this "do" loop
+            assert a <= HALF
+            assert b >= HALF
+            assert a >= 0
+            assert b <= ONE
+
+            # if the gap a-b is getting small, rescale it
+            while a > QUARTER and b < THREEQU:
+                a = 2 * a - HALF
+                b = 2 * b - HALF
+                u = 2 * u - HALF
+                v = 2 * v - HALF
+
+            # this is the condition for this do-while loop
+            if not (N > 0 and model_needs_updating):
                break
-            pass
-        pass
-    return ans
-    pass        

-    
+    return ans
+
+
 def test():
-    sl=["1010"] 
-    sl=["1010", "111", "00001000000000000000",\
-        "1", "10" , "01" , "0" ,"0000000", \
-        "000000000000000100000000000000000000000000000000100000000000000000011000000" ]
-    for s in sl:
-        print "encoding", s
-        N=len(s)
-        e = encode(s,10,1)
-        print "decoding", e
-        ds = decode(e,N,10,1)
-        print ds
-        if  (ds != s) :
-            print s 
-            print "ERR@"
-            pass
+    tests = [
+        "1010",
+        "111",
+        "00001000000000000000",
+        "1",
+        "10",
+        "01",
+        "0",
+        "0000000",
+        """
+        00000000000000010000000000000000
+        00000000000000001000000000000000
+        00011000000
+        """,
+    ]
+
+    for s in tests:
+        # an ugly way to remove whitespace and newlines from the test strings:
+        s = "".join(s.split())
+
+        N = len(s)  # required for decoding later.
+        print("original:", s)
+
+        e = encode(s, 10, 1)
+        print("encoded: ", e)
+
+        ds = decode(e, N, 10, 1)
+        print("decoded: ", ds)
+
+        if ds != s:
+            print("FAIL")
        else:
-            print "ok ---------- "
-        pass
-    pass
-    
-if __name__ == '__main__': test()
+            print("PASS")
+
+        print()
+
+
+if __name__ == '__main__':
+    test()