This commit is contained in:
parent
7ccb616fee
commit
12a9b1640d
1 changed files with 8 additions and 7 deletions
15
idup.py
15
idup.py
|
@ -1,7 +1,7 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
# find duplicate images given a hamming distance threshold.
|
# find duplicate images given a hamming distance threshold.
|
||||||
# employs dhash to do the heavy lifting.
|
# employs dhash to do the heavy lifting.
|
||||||
# does not recurse into "./_duplicate" so you can dump things there if you wish.
|
# doesn't recurse into "./_duplicate/" so you can dump things there if you wish.
|
||||||
# dependencies: pillow, dhash
|
# dependencies: pillow, dhash
|
||||||
|
|
||||||
import sys, os, os.path, pickle
|
import sys, os, os.path, pickle
|
||||||
|
@ -11,12 +11,15 @@ import dhash
|
||||||
def lament(*args, **kwargs):
|
def lament(*args, **kwargs):
|
||||||
print(*args, file=sys.stderr, **kwargs)
|
print(*args, file=sys.stderr, **kwargs)
|
||||||
|
|
||||||
def result(diff, ours, theirs): # TODO: rename
|
def result(diff, p1, p2): # TODO: rename
|
||||||
print("{}\t{}\t{}".format(diff, ours, theirs))
|
print("{}\t{}\t{}".format(diff, p1, p2))
|
||||||
|
|
||||||
dbname = "idup.db"
|
dbname = "idup.db"
|
||||||
exts = ".jpeg .jpg .png".split()
|
exts = ".jpeg .jpg .png".split()
|
||||||
|
|
||||||
|
rootpath = "."
|
||||||
|
ignore_dir = os.path.join(rootpath, "_duplicate")
|
||||||
|
|
||||||
"""verbosity:
|
"""verbosity:
|
||||||
-1: only unrecoverable errors.
|
-1: only unrecoverable errors.
|
||||||
0: include failures.
|
0: include failures.
|
||||||
|
@ -34,9 +37,7 @@ args = sys.argv[1:]
|
||||||
|
|
||||||
threshold = int(args[0])
|
threshold = int(args[0])
|
||||||
|
|
||||||
ignore_dir = os.path.join(rootpath, "_duplicate")
|
paths = {} # path to hash mapping.
|
||||||
|
|
||||||
paths = {}
|
|
||||||
|
|
||||||
if os.path.exists(dbname) and os.path.getsize(dbname) > 0:
|
if os.path.exists(dbname) and os.path.getsize(dbname) > 0:
|
||||||
with open(dbname, "rb") as f:
|
with open(dbname, "rb") as f:
|
||||||
|
@ -55,7 +56,7 @@ for path in paths.keys():
|
||||||
paths = existing
|
paths = existing
|
||||||
|
|
||||||
def compare_hash(h1, h2):
|
def compare_hash(h1, h2):
|
||||||
# hashes are in byte format, so we have to convert them to integers.
|
# hashes are in byte strings, so we have to convert them to integers.
|
||||||
i1 = int.from_bytes(h1, byteorder="big")
|
i1 = int.from_bytes(h1, byteorder="big")
|
||||||
i2 = int.from_bytes(h2, byteorder="big")
|
i2 = int.from_bytes(h2, byteorder="big")
|
||||||
# return the hamming distance.
|
# return the hamming distance.
|
||||||
|
|
Loading…
Reference in a new issue