respodns/respodns/checks.py
2020-08-29 10:16:06 +02:00

115 lines
4.8 KiB
Python

from collections import namedtuple
from .nonsense import nonsense_consistent
from .top1m import retrieve_top1m_entries
rot13_mapping = {}
for a, b, c, d in zip("anAN05", "mzMZ49", "naNA50", "zmZM94"):
rot13_mapping.update(dict((chr(k), chr(v))
for k, v in zip(range(ord(a), ord(b) + 1),
range(ord(c), ord(d) + 1))))
def rot13(s):
return "".join(rot13_mapping.get(c, c) for c in s)
def concat_nonsense(domain):
return nonsense_consistent(domain) + "." + domain
def head(n, it):
res = []
try:
while len(res) < n:
res.append(next(it))
except StopIteration:
pass
return res
Check = namedtuple("Check", ("kind", "domain"))
first_checks = [
Check("common", "baidu.com"), # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html
]
new_checks = [
# via dnsvalidator
Check("adtrack", "bet365.com"),
Check("common", "facebook.com"),
Check("common", "google.com"),
Check("common", "paypal.com"),
Check("common", "wikileaks.com"),
Check("news", "telegram.com"),
]
likely_checks = [
# these checks are, in practice, the most likely to weed out unwanted DNS servers.
Check("news", "huanqiu.com"),
Check("adware", rot13("nqf789.pbz")),
Check("shock", rot13("tbng.pk")), # some servers block shock sites, which isn't a terrible idea, but it's inaccurate
Check("porn", "pornhub.com"), # some servers block piracy and porn sites for being taboo, or whatever
Check("adtrack", "media.fastclick.net"),
Check("parking", "scmp.org"), # dns.watch fails here: domain parking is evil, but servers must abide
Check("usercontent","4chan.org"), # some servers block sites driven by their chaotic user-created content
Check("bad", concat_nonsense("com")), # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), # blogspot handles these strangely; DNS servers likewise
# NOTE: disabled for being wildly inconsistent:
# Check("weird", "archive.is"), # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
Check("common", "archive.org"),
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), # one of the WannaCry sinkholes, kinda important that it resolves
Check("common", "xn--b1aew.xn--p1ai"), # just to test internationalized domains
Check("common", "wikileaks.com"),
Check("uncommon", "cybre.space"), # also doubles as a check for the new TLDs
Check("piracy", "thehiddenbay.org"), # some servers block piracy and porn sites for being taboo, or whatever
Check("infosec", "scan.shadowserver.org"), # some servers block this: https://scan.shadowserver.org/dns/
Check("common", "duckduckgo.com"), # a few servers block this for some reason?
Check("badsub", concat_nonsense("google.com")), # poisoning may yield an unwanted result here
Check("common", "en.wikipedia.org"),
Check("adtrack", "google-analytics.com"),
Check("adtrack", "ad.doubleclick.net"),
# baidu goes here...?
Check("common", "naver.com"),
Check("common", "google.com"), # surely a fully-functional server would resolve the most popular domain in existence
]
unlikely_checks = [
Check("piracy", "thepiratebay.org"),
Check("porn", "xvideos.com"),
Check("usercontent","imgur.com"),
Check("usercontent","twitter.com"),
Check("usercontent","weibo.com"),
Check("usercontent","github.com"),
Check("porn", "chaturbate.com"),
Check("video", "bilibili.com"),
Check("video", "twitch.tv"),
Check("common", "qq.com"),
Check("video", "netflix.com"),
Check("usercontent","reddit.com"),
Check("usercontent","facebook.com"),
Check("video", "youtube.com"),
Check("usercontent","tumblr.com"),
Check("usercontent","wordpress.com"),
Check("common", "tmall.com"),
Check("usercontent","instagram.com"),
Check("news", "nytimes.com"),
Check("usercontent","flickr.com"),
Check("common", "ebay.com"),
Check("news", "scmp.com"),
Check("common", "aliexpress.com"),
Check("common", "stackoverflow.com"),
]
defunct = [
"panda.tv", # imochen.github.io
]
def _top1m_gen():
return (Check("top", entry)
for i, entry in retrieve_top1m_entries()
if entry not in defunct)
top100 = head(100, _top1m_gen())
top1000 = head(1000, _top1m_gen())
#__all__ = [
# "first_checks", "new_checks", "likely_checks", "unlikely_checks", "top100",
# "defunct",
#]