From f6e3a28aff0c5740999586f2ee3a8f7cc60616f7 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Fri, 4 Sep 2020 15:39:08 +0200 Subject: [PATCH] add failures field to Check and sort by them this commit removes the comments on individual checks. these comments should be restored at a later point. --- respodns/checks.py | 159 ++++++++++++++++++-------------------------- respodns/structs.py | 2 +- 2 files changed, 65 insertions(+), 96 deletions(-) diff --git a/respodns/checks.py b/respodns/checks.py index 0b82911..889d20c 100644 --- a/respodns/checks.py +++ b/respodns/checks.py @@ -2,109 +2,78 @@ from .top1m import retrieve_top1m_entries from .util import concat_nonsense, rot13, head from .structs import Check +def order_by_failures(checks): # descending + return sorted(checks, key=lambda check: -check.failures) + first = [ # checking this first this avoids issues with censorship in China. see: # https://www.bortzmeyer.org/sichuan-pepper.html - Check("common", "baidu.com"), + Check("common", "baidu.com", 491), ] -likely = [ +likely = order_by_failures([ # these checks are, in practice, the most likely # to weed out unwanted DNS servers. - Check("news", "huanqiu.com"), - Check("adware", rot13("nqf789.pbz")), + Check("adtrack", "ad.doubleclick.net", 81), + Check("adtrack", "google-analytics.com", 75), + Check("adtrack", "media.fastclick.net", 116), + Check("adware", rot13("nqf789.pbz"), 168), + Check("bad", concat_nonsense("com"), 153), + Check("badsub", concat_nonsense("google.com"), 63), + Check("common", "archive.org", 98), + Check("common", "duckduckgo.com", 78), + Check("common", "en.wikipedia.org", 75), + Check("common", "facebook.com", 94), + Check("common", "google.com", 69), + # Check("common", "naver.com", 57), + Check("common", "paypal.com", 74), + Check("common", "wikileaks.com", 86), + Check("common", "xn--b1aew.xn--p1ai", 85), + Check("gambling", "bet365.com", 157), + Check("gambling", "betonline.ag", 168), + Check("gambling", "unibet.com", 137), + Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com", 98), + Check("infosec", "scan.shadowserver.org", 73), + Check("news", "huanqiu.com", 435), + Check("news", "telegram.com", 71), + Check("parking", "scmp.org", 132), + Check("piracy", "thehiddenbay.org", 77), + Check("porn", "pornhub.com", 151), + Check("shock", rot13("tbng.pk"), 209), + Check("uncommon", "cybre.space", 88), + Check("uncommon", "react.uni-saarland.de", 74), + Check("usercontent", "4chan.org", 116), + # Check("weird", "archive.is", 0), + Check("weirdsub", concat_nonsense("javarevisited.blogspot.com"), 126), +]) - # some servers block shock sites, which isn't a terrible idea, - # but it's inaccurate. - Check("shock", rot13("tbng.pk")), # actually parked at this time - - # some servers block piracy and porn sites for being taboo, or whatever. - Check("porn", "pornhub.com"), - - Check("adtrack", "media.fastclick.net"), - - # dns.watch fails here: domain parking is evil, but servers must abide. - Check("parking", "scmp.org"), - - # some servers block sites driven by their chaotic user-created content. - Check("usercontent", "4chan.org"), - - # some servers like to redirect nonexistent domains. see: - # https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/ - Check("bad", concat_nonsense("com")), - - # blogspot handles these strangely; DNS servers likewise - Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), - - # NOTE: disabled for being wildly inconsistent: - # Cloudflare fails here. see: - # https://jarv.is/notes/cloudflare-dns-archive-is-blocked/ - # Check("weird", "archive.is"), - - Check("common", "archive.org"), - - # this is one of the WannaCry sinkholes, it's kinda important. - Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), - - # try out internationalized domains. - Check("common", "xn--b1aew.xn--p1ai"), - - Check("common", "wikileaks.com"), - - # i suppose this doubles as a check for the new TLDs. - Check("uncommon", "cybre.space"), - - # some servers block piracy and porn sites for being taboo, or whatever - Check("piracy", "thehiddenbay.org"), - - # some servers block this. see: - # https://scan.shadowserver.org/dns/ - Check("infosec", "scan.shadowserver.org"), - - # a few servers block this for some reason. - Check("common", "duckduckgo.com"), - - # DNS poisoning may yield an unwanted result here. - Check("badsub", concat_nonsense("google.com")), - - Check("common", "en.wikipedia.org"), - Check("adtrack", "google-analytics.com"), - Check("adtrack", "ad.doubleclick.net"), - # baidu goes here...? - Check("common", "naver.com"), - - # surely a fully-functional server would resolve - # the most popular domain in existence, right? - Check("common", "google.com"), -] - -unlikely = [ - Check("piracy", "thepiratebay.org"), - Check("porn", "xvideos.com"), - Check("usercontent", "imgur.com"), - Check("usercontent", "twitter.com"), - Check("usercontent", "weibo.com"), - Check("usercontent", "github.com"), - Check("porn", "chaturbate.com"), - Check("video", "bilibili.com"), - Check("video", "twitch.tv"), - Check("common", "qq.com"), - Check("video", "netflix.com"), - Check("usercontent", "reddit.com"), - Check("usercontent", "facebook.com"), - Check("video", "youtube.com"), - Check("usercontent", "tumblr.com"), - Check("usercontent", "wordpress.com"), - Check("common", "tmall.com"), - Check("usercontent", "instagram.com"), - Check("news", "nytimes.com"), - Check("usercontent", "flickr.com"), - Check("common", "ebay.com"), - Check("news", "scmp.com"), - Check("common", "aliexpress.com"), - Check("common", "stackoverflow.com"), -] +unlikely = order_by_failures([ + Check("common", "aliexpress.com", 2), + Check("common", "ebay.com", 4), + Check("common", "qq.com", 15), + Check("common", "stackoverflow.com", 1), + Check("common", "tmall.com", 8), + Check("news", "nytimes.com", 6), + Check("news", "scmp.com", 3), + Check("piracy", "thepiratebay.org", 24), + Check("porn", "chaturbate.com", 18), + Check("porn", "xvideos.com", 23), + Check("usercontent", "facebook.com", 12), + Check("usercontent", "flickr.com", 5), + Check("usercontent", "github.com", 19), + Check("usercontent", "imgur.com", 22), + Check("usercontent", "instagram.com", 7), + Check("usercontent", "reddit.com", 13), + Check("usercontent", "tumblr.com", 10), + Check("usercontent", "twitter.com", 21), + Check("usercontent", "weibo.com", 20), + Check("usercontent", "wordpress.com", 9), + Check("video", "bilibili.com", 17), + Check("video", "netflix.com", 14), + Check("video", "twitch.tv", 16), + Check("video", "youtube.com", 11), +]) defunct = [ "panda.tv", # imochen.github.io @@ -112,7 +81,7 @@ defunct = [ def _top1m_gen(): - return (Check("top", entry) + return (Check("top", entry, 0) for i, entry in retrieve_top1m_entries() if entry not in defunct) diff --git a/respodns/structs.py b/respodns/structs.py index cd541cd..bf8f937 100644 --- a/respodns/structs.py +++ b/respodns/structs.py @@ -35,4 +35,4 @@ class Entry: execution: object -Check = namedtuple("Check", ("kind", "domain")) +Check = namedtuple("Check", ("kind", "domain", "failures"))