respodns/respodns/checks.py

from .top1m import retrieve_top1m_entries
from .util import concat_nonsense, rot13, head
from .structs import Check

first = [
    Check("common",     "baidu.com"),  # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html
]

new = [
    # via dnsvalidator
    Check("adtrack",    "bet365.com"),
    Check("common",     "facebook.com"),
    Check("common",     "google.com"),
    Check("common",     "paypal.com"),
    Check("common",     "wikileaks.com"),
    Check("news",       "telegram.com"),
]

likely = [
    # these checks are, in practice, the most likely to weed out unwanted DNS servers.
    Check("news",       "huanqiu.com"),
    Check("adware",     rot13("nqf789.pbz")),
    Check("shock",      rot13("tbng.pk")),  # some servers block shock sites, which isn't a terrible idea, but it's inaccurate
    Check("porn",       "pornhub.com"),  # some servers block piracy and porn sites for being taboo, or whatever
    Check("adtrack",    "media.fastclick.net"),
    Check("parking",    "scmp.org"),  # dns.watch fails here: domain parking is evil, but servers must abide
    Check("usercontent","4chan.org"),  # some servers block sites driven by their chaotic user-created content
    Check("bad",        concat_nonsense("com")),  # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
    Check("weirdsub",   concat_nonsense("javarevisited.blogspot.com")),  # blogspot handles these strangely; DNS servers likewise
    # NOTE: disabled for being wildly inconsistent:
#   Check("weird",      "archive.is"),  # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
    Check("common",     "archive.org"),
    Check("infosec",    "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"),  # one of the WannaCry sinkholes, kinda important that it resolves
    Check("common",     "xn--b1aew.xn--p1ai"),  # just to test internationalized domains
    Check("common",     "wikileaks.com"),
    Check("uncommon",   "cybre.space"),  # also doubles as a check for the new TLDs
    Check("piracy",     "thehiddenbay.org"),  # some servers block piracy and porn sites for being taboo, or whatever
    Check("infosec",    "scan.shadowserver.org"),  # some servers block this: https://scan.shadowserver.org/dns/
    Check("common",     "duckduckgo.com"),  # a few servers block this for some reason?
    Check("badsub",     concat_nonsense("google.com")),  # poisoning may yield an unwanted result here
    Check("common",     "en.wikipedia.org"),
    Check("adtrack",    "google-analytics.com"),
    Check("adtrack",    "ad.doubleclick.net"),
    # baidu goes here...?
    Check("common",     "naver.com"),
    Check("common",     "google.com"),  # surely a fully-functional server would resolve the most popular domain in existence
]

unlikely = [
    Check("piracy",     "thepiratebay.org"),
    Check("porn",       "xvideos.com"),
    Check("usercontent","imgur.com"),
    Check("usercontent","twitter.com"),
    Check("usercontent","weibo.com"),
    Check("usercontent","github.com"),
    Check("porn",       "chaturbate.com"),
    Check("video",      "bilibili.com"),
    Check("video",      "twitch.tv"),
    Check("common",     "qq.com"),
    Check("video",      "netflix.com"),
    Check("usercontent","reddit.com"),
    Check("usercontent","facebook.com"),
    Check("video",      "youtube.com"),
    Check("usercontent","tumblr.com"),
    Check("usercontent","wordpress.com"),
    Check("common",     "tmall.com"),
    Check("usercontent","instagram.com"),
    Check("news",       "nytimes.com"),
    Check("usercontent","flickr.com"),
    Check("common",     "ebay.com"),
    Check("news",       "scmp.com"),
    Check("common",     "aliexpress.com"),
    Check("common",     "stackoverflow.com"),
]

defunct = [
    "panda.tv",  # imochen.github.io
]

def _top1m_gen():
    return (Check("top", entry)
            for i, entry in retrieve_top1m_entries()
            if entry not in defunct)

top100 = head(100, _top1m_gen())
top1000 = head(1000, _top1m_gen())
init 2020-08-29 01:16:06 -07:00			`from .top1m import retrieve_top1m_entries`
begin reorganizing code 2020-08-29 05:54:07 -07:00			`from .util import concat_nonsense, rot13, head`
			`from .structs import Check`
init 2020-08-29 01:16:06 -07:00
reorganize checks 2020-08-29 01:28:26 -07:00			`first = [`
init 2020-08-29 01:16:06 -07:00			`Check("common", "baidu.com"), # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html`
			`]`

reorganize checks 2020-08-29 01:28:26 -07:00			`new = [`
init 2020-08-29 01:16:06 -07:00			`# via dnsvalidator`
			`Check("adtrack", "bet365.com"),`
			`Check("common", "facebook.com"),`
			`Check("common", "google.com"),`
			`Check("common", "paypal.com"),`
			`Check("common", "wikileaks.com"),`
			`Check("news", "telegram.com"),`
			`]`

reorganize checks 2020-08-29 01:28:26 -07:00			`likely = [`
init 2020-08-29 01:16:06 -07:00			`# these checks are, in practice, the most likely to weed out unwanted DNS servers.`
			`Check("news", "huanqiu.com"),`
			`Check("adware", rot13("nqf789.pbz")),`
			`Check("shock", rot13("tbng.pk")), # some servers block shock sites, which isn't a terrible idea, but it's inaccurate`
			`Check("porn", "pornhub.com"), # some servers block piracy and porn sites for being taboo, or whatever`
			`Check("adtrack", "media.fastclick.net"),`
			`Check("parking", "scmp.org"), # dns.watch fails here: domain parking is evil, but servers must abide`
			`Check("usercontent","4chan.org"), # some servers block sites driven by their chaotic user-created content`
			`Check("bad", concat_nonsense("com")), # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/`
			`Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), # blogspot handles these strangely; DNS servers likewise`
			`# NOTE: disabled for being wildly inconsistent:`
			`# Check("weird", "archive.is"), # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/`
			`Check("common", "archive.org"),`
			`Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), # one of the WannaCry sinkholes, kinda important that it resolves`
			`Check("common", "xn--b1aew.xn--p1ai"), # just to test internationalized domains`
			`Check("common", "wikileaks.com"),`
			`Check("uncommon", "cybre.space"), # also doubles as a check for the new TLDs`
			`Check("piracy", "thehiddenbay.org"), # some servers block piracy and porn sites for being taboo, or whatever`
			`Check("infosec", "scan.shadowserver.org"), # some servers block this: https://scan.shadowserver.org/dns/`
			`Check("common", "duckduckgo.com"), # a few servers block this for some reason?`
			`Check("badsub", concat_nonsense("google.com")), # poisoning may yield an unwanted result here`
			`Check("common", "en.wikipedia.org"),`
			`Check("adtrack", "google-analytics.com"),`
			`Check("adtrack", "ad.doubleclick.net"),`
			`# baidu goes here...?`
			`Check("common", "naver.com"),`
			`Check("common", "google.com"), # surely a fully-functional server would resolve the most popular domain in existence`
			`]`

reorganize checks 2020-08-29 01:28:26 -07:00			`unlikely = [`
init 2020-08-29 01:16:06 -07:00			`Check("piracy", "thepiratebay.org"),`
			`Check("porn", "xvideos.com"),`
			`Check("usercontent","imgur.com"),`
			`Check("usercontent","twitter.com"),`
			`Check("usercontent","weibo.com"),`
			`Check("usercontent","github.com"),`
			`Check("porn", "chaturbate.com"),`
			`Check("video", "bilibili.com"),`
			`Check("video", "twitch.tv"),`
			`Check("common", "qq.com"),`
			`Check("video", "netflix.com"),`
			`Check("usercontent","reddit.com"),`
			`Check("usercontent","facebook.com"),`
			`Check("video", "youtube.com"),`
			`Check("usercontent","tumblr.com"),`
			`Check("usercontent","wordpress.com"),`
			`Check("common", "tmall.com"),`
			`Check("usercontent","instagram.com"),`
			`Check("news", "nytimes.com"),`
			`Check("usercontent","flickr.com"),`
			`Check("common", "ebay.com"),`
			`Check("news", "scmp.com"),`
			`Check("common", "aliexpress.com"),`
			`Check("common", "stackoverflow.com"),`
			`]`

			`defunct = [`
			`"panda.tv", # imochen.github.io`
			`]`

			`def _top1m_gen():`
			`return (Check("top", entry)`
			`for i, entry in retrieve_top1m_entries()`
			`if entry not in defunct)`

			`top100 = head(100, _top1m_gen())`
			`top1000 = head(1000, _top1m_gen())`