merge arrays of Checks and partition by failures

This commit is contained in:
Connor Olding 2020-09-04 16:06:37 +02:00
parent 788648aeb0
commit 2b8f1dacc7
2 changed files with 28 additions and 21 deletions

View file

@ -7,6 +7,16 @@ def order_by_failures(checks): # descending
return sorted(checks, key=lambda check: -check.failures)
def partition_checks(checks, n):
more, less = [], []
for check in checks:
if check.failures >= n:
more.append(check)
else:
less.append(check)
return more, less
first = [
# checking this first this avoids issues with censorship in China. see:
# https://www.bortzmeyer.org/sichuan-pepper.html
@ -16,54 +26,50 @@ first = [
_sinkhole = "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"
_weirdsub = concat_nonsense("javarevisited.blogspot.com")
likely = order_by_failures([
checks = order_by_failures([
# these checks are, in practice, the most likely
# to weed out unwanted DNS servers.
# Check("common", "naver.com", 57),
# Check("weird", "archive.is", 0),
Check("adtrack", "ad.doubleclick.net", 81),
Check("adtrack", "google-analytics.com", 75),
Check("adtrack", "media.fastclick.net", 116),
Check("adware", rot13("nqf789.pbz"), 168),
Check("bad", concat_nonsense("com"), 153),
Check("badsub", concat_nonsense("google.com"), 63),
Check("common", "aliexpress.com", 2),
Check("common", "archive.org", 98),
Check("common", "duckduckgo.com", 78),
Check("common", "ebay.com", 4),
Check("common", "en.wikipedia.org", 75),
Check("common", "facebook.com", 94),
Check("common", "google.com", 69),
# Check("common", "naver.com", 57),
Check("common", "paypal.com", 74),
Check("common", "qq.com", 15),
Check("common", "stackoverflow.com", 1),
Check("common", "tmall.com", 8),
Check("common", "wikileaks.com", 86),
Check("common", "xn--b1aew.xn--p1ai", 85),
Check("gambling", "bet365.com", 157),
Check("gambling", "betonline.ag", 168),
Check("gambling", "unibet.com", 137),
Check("infosec", _sinkhole, 98),
Check("infosec", "scan.shadowserver.org", 73),
Check("infosec", _sinkhole, 98),
Check("news", "huanqiu.com", 435),
Check("news", "nytimes.com", 6),
Check("news", "scmp.com", 3),
Check("news", "telegram.com", 71),
Check("parking", "scmp.org", 132),
Check("piracy", "thehiddenbay.org", 77),
Check("piracy", "thepiratebay.org", 24),
Check("porn", "chaturbate.com", 18),
Check("porn", "pornhub.com", 151),
Check("porn", "xvideos.com", 23),
Check("shock", rot13("tbng.pk"), 209),
Check("uncommon", "cybre.space", 88),
Check("uncommon", "react.uni-saarland.de", 74),
Check("usercontent", "4chan.org", 116),
# Check("weird", "archive.is", 0),
Check("weirdsub", _weirdsub, 126),
])
unlikely = order_by_failures([
Check("common", "aliexpress.com", 2),
Check("common", "ebay.com", 4),
Check("common", "qq.com", 15),
Check("common", "stackoverflow.com", 1),
Check("common", "tmall.com", 8),
Check("news", "nytimes.com", 6),
Check("news", "scmp.com", 3),
Check("piracy", "thepiratebay.org", 24),
Check("porn", "chaturbate.com", 18),
Check("porn", "xvideos.com", 23),
Check("usercontent", "facebook.com", 12),
Check("usercontent", "flickr.com", 5),
Check("usercontent", "github.com", 19),
@ -78,8 +84,11 @@ unlikely = order_by_failures([
Check("video", "netflix.com", 14),
Check("video", "twitch.tv", 16),
Check("video", "youtube.com", 11),
Check("weirdsub", _weirdsub, 126),
])
likely, unlikely = partition_checks(checks, 50)
defunct = [
"panda.tv", # imochen.github.io
]

View file

@ -21,9 +21,7 @@ def ui(program, args):
a = parser.parse_args(args)
checks = []
checks += chk.first
checks += chk.likely
checks = chk.first + chk.likely
opts = Options()
opts.dry = a.database is None