diff --git a/idup.py b/idup.py index 9fadad9..5e78699 100644 --- a/idup.py +++ b/idup.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # find duplicate images given a hamming distance threshold. # employs dhash to do the heavy lifting. -# does not recurse into "./_duplicate" so you can dump things there if you wish. +# doesn't recurse into "./_duplicate/" so you can dump things there if you wish. # dependencies: pillow, dhash import sys, os, os.path, pickle @@ -11,12 +11,15 @@ import dhash def lament(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) -def result(diff, ours, theirs): # TODO: rename - print("{}\t{}\t{}".format(diff, ours, theirs)) +def result(diff, p1, p2): # TODO: rename + print("{}\t{}\t{}".format(diff, p1, p2)) dbname = "idup.db" exts = ".jpeg .jpg .png".split() +rootpath = "." +ignore_dir = os.path.join(rootpath, "_duplicate") + """verbosity: -1: only unrecoverable errors. 0: include failures. @@ -34,9 +37,7 @@ args = sys.argv[1:] threshold = int(args[0]) -ignore_dir = os.path.join(rootpath, "_duplicate") - -paths = {} +paths = {} # path to hash mapping. if os.path.exists(dbname) and os.path.getsize(dbname) > 0: with open(dbname, "rb") as f: @@ -55,7 +56,7 @@ for path in paths.keys(): paths = existing def compare_hash(h1, h2): - # hashes are in byte format, so we have to convert them to integers. + # hashes are in byte strings, so we have to convert them to integers. i1 = int.from_bytes(h1, byteorder="big") i2 = int.from_bytes(h2, byteorder="big") # return the hamming distance.