2018-09-10 12:08:36 -07:00 · 2018-09-10 12:08:36 -07:00 · 74cd548adb
commit 74cd548adb
parent 7321c3c9e2
1 changed files with 226 additions and 150 deletions
--- a/ac_encode.py
+++ b/ac_encode.py
@ -1,177 +1,253 @@
-## Arithmetic coding compressor and uncompressor for binary source.
+# Arithmetic coding compressor and decompressor for binary strings.
-## This is a cleaned-up version of AEncode.py 
+# via: http://www.inference.org.uk/mackay/python/compress/ac/ac_encode.py
 # main page: http://www.inference.org.uk/mackay/python/compress/
 # this has been cleaned up (passes pycodestyle) and ported to python 3.
 # default prior distribution
 BETA0 = 1
 BETA1 = 1
 M = 30
 ONE = 1 << M
 HALF = 1 << (M - 1)
 QUARTER = 1 << (M - 2)
 THREEQU = HALF + QUARTER
 BETA0=1;BETA1=1 ## default prior distribution
 M = 30 ; ONE = (1<<M) ; HALF = (1<<(M-1))
 QUARTER = (1<<(M-2)) ; THREEQU = HALF+QUARTER
 def clear(c, charstack):
-    ## print out character c, and other queued characters 
+    # print out character c, and other queued characters
-    a = `c`+`(1-c)`*charstack[0]
+    a = repr(c) + repr(1 - c) * charstack[0]
    charstack[0] = 0
    return a
-    pass
+
 def encode(string, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
-    b=ONE; a=0;  tot0=0;tot1=0;     assert c0>0; assert c1>0
+    assert c0 > 0
    assert c1 > 0
    b = ONE
    a = 0
    tot0 = 0
    tot1 = 0
    if adaptive == 0:
-        p0 = c0*1.0/(c0+c1)
+        p0 = c0 * 1 / (c0 + c1)
-        pass
+    ans = ""
-    ans="";
+    charstack = [0]  # how many undecided characters remain to print
-    charstack=[0] ## how many undecided characters remain to print
+
    for c in string:
        w = b - a
        if adaptive:
            cT = c0 + c1
            p0 = c0 * 1.0 / cT
            pass
        boundary = a + int(p0 * w)
-        if (boundary == a): boundary += 1; print "warningA"; pass # these warnings mean that some of the probabilities 
+
-        if (boundary == b): boundary -= 1; print "warningB"; pass # requested by the probabilistic model
+        # these warnings mean that some of the probabilities
-        ## are so small (compared to our integers) that we had to round them up to bigger values
+        # requested by the probabilistic model are so small
-        if (c=='1') :
+        # (compared to our integers) that we had to round them up
        # to bigger values.
        if boundary == a:
            boundary += 1
            print("warningA")
        if boundary == b:
            boundary -= 1
            print("warningB")
        if c == '1':
            a = boundary
-            tot1 += 1 ; if adaptive: c1 += 1.0 ; pass
+            tot1 += 1
-        elif (c=='0'):
+            if adaptive:
                c1 += 1.0
        elif c == '0':
            b = boundary
-            tot0 +=1 ; if adaptive: c0 += 1.0 ; pass
+            tot0 += 1
-            pass ## ignore other characters
+            if adaptive:
                c0 += 1.0
            # ignore other characters
-        while ( (a>=HALF) or (b<=HALF) ) :   ## output bits 
+        while a >= HALF or b <= HALF:  # output bits
-            if (a>=HALF) :
+            if a >= HALF:
-                ans = ans + clear(1,charstack)
+                ans += clear(1, charstack)
-                a = a-HALF ;
+                a -= HALF
-                b = b-HALF ;
+                b -= HALF
            else:
-                ans = ans + clear(0,charstack)
+                ans += clear(0, charstack)
-                pass
+            a *= 2
-            a *= 2 ;      b *= 2
+            b *= 2
            pass
-        assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
+        assert a <= HALF
-        ## if the gap a-b is getting small, rescale it
+        assert b >= HALF
-        while ( (a>QUARTER) and (b<THREEQU) ):
+        assert a >= 0
        assert b <= ONE
        # if the gap a-b is getting small, rescale it
        while a > QUARTER and b < THREEQU:
            charstack[0] += 1
            a = 2*a-HALF
            b = 2*b-HALF
            pass
-        assert a<=HALF; assert b>=HALF; assert a>=0; assert b<=ONE
+        assert a <= HALF
-        pass
+        assert b >= HALF
        assert a >= 0
        assert b <= ONE
    # terminate
-    if ( (HALF-a) > (b-HALF) ) :
+    if HALF - a > b - HALF:
-        w = (HALF-a) ;
+        w = HALF - a
-        ans = ans + clear(0,charstack)
+        ans += clear(0, charstack)
-        while ( w < HALF ) :
+        while w < HALF:
-            ans = ans + clear(1,charstack)
+            ans += clear(1, charstack)
            w *= 2
            pass
        pass
    else:
-        w = (b-HALF) ;
+        w = b - HALF
-        ans = ans + clear(1,charstack)
+        ans += clear(1, charstack)
-        while ( w < HALF ) :
+        while w < HALF:
-            ans = ans + clear(0,charstack)
+            ans += clear(0, charstack)
            w *= 2
            pass
        pass
    return ans
    pass
    return ans
 def decode(string, N=10000, c0=BETA0, c1=BETA1, adaptive=1, verbose=0):
-    ## must supply N, the number of source characters remaining.
+    # must supply N, the number of source characters remaining.
-    b=ONE ; a=0 ;      tot0=0;tot1=0  ;     assert c0>0 ; assert c1>0
+    assert c0 > 0
    assert c1 > 0
    b = ONE
    a = 0
    tot0 = 0
    tot1 = 0
    model_needs_updating = 1
    if adaptive == 0:
-        p0 = c0*1.0/(c0+c1)
+        p0 = c0 * 1 / (c0 + c1)
        pass
    ans = ""
-    u=0 ; v=ONE
+
    u = 0
    v = ONE
    for c in string:
        if N <= 0:
-            break ## break out of the string-reading loop
+            break  # out of the string-reading loop
        assert N > 0
-##    // (u,v) is the current "encoded alphabet" binary interval, and halfway is its midpoint.
+        # (u,v) is the current "encoded alphabet" binary interval,
-##    // (a,b) is the current "source alphabet" interval, and boundary is the "midpoint"
+        # and halfway is its midpoint.
-        assert u>=0 ; assert v<=ONE
+        # (a,b) is the current "source alphabet" interval,
        # and boundary is the "midpoint"
        assert u >= 0
        assert v <= ONE
        halfway = u + (v - u) / 2
-        if( c == '1' ) :
+        if c == '1':
            u = halfway
-        elif ( c=='0' ):
+        elif c == '0':
            v = halfway
-        else:
+
-            pass
+        # Read bits until we can decide what the source symbol was.
-##    // Read bits until we can decide what the source symbol was.
+        # Then emulate the encoder's computations,
-##    // Then emulate the encoder's computations, and tie (u,v) to tag along for the ride.
+        # and tie (u,v) to tag along for the ride.
-        while (1): ## condition at end 
+        while 1:
            firsttime = 0
-            if(model_needs_updating):
+            if model_needs_updating:
                w = b - a
                if adaptive:
-                    cT = c0 + c1 ;   p0 = c0 *1.0/cT 
+                    cT = c0 + c1
-                    pass
+                    p0 = c0 * 1 / cT
                boundary = a + int(p0*w)
-                if (boundary == a): boundary += 1; print "warningA"; pass
+                if boundary == a:
-                if (boundary == b): boundary -= 1; print "warningB"; pass
+                    boundary += 1
                    print("warningA")
                if boundary == b:
                    boundary -= 1
                    print("warningB")
                model_needs_updating = 0
                pass
            if  ( boundary <= u ) :
                ans = ans + "1";             tot1 +=1 ; if adaptive: c1 += 1.0 ; pass
                a = boundary ;	model_needs_updating = 1 ; 	N-=1 
            elif ( boundary >= v )  :
                ans = ans + "0";             tot0 +=1 ; if adaptive: c0 += 1.0 ; pass
                b = boundary ;	model_needs_updating = 1 ; 	N-=1 
 ##	// every time we discover a source bit, implement exactly the 
 ##	// computations that were done by the encoder (below). 
            else :
 ##	// not enough bits have yet been read to know the decision.
                pass
-##      // emulate outputting of bits by the encoder, and tie (u,v) to tag along for the ride.
+            if boundary <= u:
-            while ( (a>=HALF) or (b<=HALF) ) :
+                ans += "1"
-                if (a>=HALF) :
+                tot1 += 1
-                    a = a-HALF ;  b = b-HALF ;    u = u-HALF ;     v = v-HALF 
+                if adaptive:
-                    pass
+                    c1 += 1.0
-                else :
+                a = boundary
                    pass
                a *= 2 ;      b *= 2 ;      u *= 2 ;      v *= 2 ;
                model_needs_updating = 1
                N -= 1
            elif boundary >= v:
                ans += "0"
                tot0 += 1
                if adaptive:
                    c0 += 1.0
                b = boundary
                model_needs_updating = 1
                N -= 1
            else:
                # not enough bits have yet been read to know the decision.
                pass
-            assert a<=HALF;            assert b>=HALF;            assert a>=0;            assert b<=ONE
+            # emulate outputting of bits by the encoder,
-        ## if the gap a-b is getting small, rescale it
+            # and tie (u,v) to tag along for the ride.
-            while ( (a>QUARTER) and (b<THREEQU) ):
+            while a >= HALF or b <= HALF:
-                a = 2*a-HALF;  b = 2*b-HALF ; u = 2*u-HALF ;  v = 2*v-HALF
+                if a >= HALF:
-                pass
+                    a = a - HALF
-            if not (N>0 and model_needs_updating) : ## this is the "while" for this "do" loop
+                    b = b - HALF
                    u = u - HALF
                    v = v - HALF
                a *= 2
                b *= 2
                u *= 2
                v *= 2
                model_needs_updating = 1
            assert a <= HALF
            assert b >= HALF
            assert a >= 0
            assert b <= ONE
            # if the gap a-b is getting small, rescale it
            while a > QUARTER and b < THREEQU:
                a = 2 * a - HALF
                b = 2 * b - HALF
                u = 2 * u - HALF
                v = 2 * v - HALF
            # this is the condition for this do-while loop
            if not (N > 0 and model_needs_updating):
                break
-            pass
+
        pass
    return ans
    pass        
 def test():
-    sl=["1010"] 
+    tests = [
-    sl=["1010", "111", "00001000000000000000",\
+        "1010",
-        "1", "10" , "01" , "0" ,"0000000", \
+        "111",
-        "000000000000000100000000000000000000000000000000100000000000000000011000000" ]
+        "00001000000000000000",
-    for s in sl:
+        "1",
-        print "encoding", s
+        "10",
-        N=len(s)
+        "01",
-        e = encode(s,10,1)
+        "0",
-        print "decoding", e
+        "0000000",
-        ds = decode(e,N,10,1)
+        """
-        print ds
+        00000000000000010000000000000000
-        if  (ds != s) :
+        00000000000000001000000000000000
-            print s 
+        00011000000
-            print "ERR@"
+        """,
-            pass
+    ]
        else:
            print "ok ---------- "
        pass
    pass
-if __name__ == '__main__': test()
+    for s in tests:
        # an ugly way to remove whitespace and newlines from the test strings:
        s = "".join(s.split())
        N = len(s)  # required for decoding later.
        print("original:", s)
        e = encode(s, 10, 1)
        print("encoded: ", e)
        ds = decode(e, N, 10, 1)
        print("decoded: ", ds)
        if ds != s:
            print("FAIL")
        else:
            print("PASS")
        print()
 if __name__ == '__main__':
    test()