respodns/respodns/checks.py

122 lines
4.2 KiB
Python
Raw Normal View History

2020-08-29 01:16:06 -07:00
from .top1m import retrieve_top1m_entries
2020-08-29 05:54:07 -07:00
from .util import concat_nonsense, rot13, head
from .structs import Check
2020-08-29 01:16:06 -07:00
2020-08-29 01:28:26 -07:00
first = [
2020-09-03 04:31:21 -07:00
# checking this first this avoids issues with censorship in China. see:
2020-08-29 06:34:46 -07:00
# https://www.bortzmeyer.org/sichuan-pepper.html
Check("common", "baidu.com"),
2020-08-29 01:16:06 -07:00
]
2020-08-29 01:28:26 -07:00
likely = [
2020-08-29 06:34:46 -07:00
# these checks are, in practice, the most likely
# to weed out unwanted DNS servers.
2020-08-30 03:19:42 -07:00
2020-08-29 06:34:46 -07:00
Check("news", "huanqiu.com"),
Check("adware", rot13("nqf789.pbz")),
# some servers block shock sites, which isn't a terrible idea,
# but it's inaccurate.
Check("shock", rot13("tbng.pk")), # actually parked at this time
2020-08-29 06:34:46 -07:00
# some servers block piracy and porn sites for being taboo, or whatever.
Check("porn", "pornhub.com"),
2020-08-30 03:19:42 -07:00
2020-08-29 06:34:46 -07:00
Check("adtrack", "media.fastclick.net"),
# dns.watch fails here: domain parking is evil, but servers must abide.
Check("parking", "scmp.org"),
# some servers block sites driven by their chaotic user-created content.
Check("usercontent", "4chan.org"),
# some servers like to redirect nonexistent domains. see:
# https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
Check("bad", concat_nonsense("com")),
# blogspot handles these strangely; DNS servers likewise
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")),
2020-08-29 01:16:06 -07:00
# NOTE: disabled for being wildly inconsistent:
2020-08-29 06:34:46 -07:00
# Cloudflare fails here. see:
# https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
2020-08-29 06:44:16 -07:00
# Check("weird", "archive.is"),
2020-08-29 06:34:46 -07:00
Check("common", "archive.org"),
# this is one of the WannaCry sinkholes, it's kinda important.
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"),
# try out internationalized domains.
Check("common", "xn--b1aew.xn--p1ai"),
Check("common", "wikileaks.com"),
# i suppose this doubles as a check for the new TLDs.
Check("uncommon", "cybre.space"),
# some servers block piracy and porn sites for being taboo, or whatever
Check("piracy", "thehiddenbay.org"),
# some servers block this. see:
# https://scan.shadowserver.org/dns/
Check("infosec", "scan.shadowserver.org"),
# a few servers block this for some reason.
Check("common", "duckduckgo.com"),
# DNS poisoning may yield an unwanted result here.
Check("badsub", concat_nonsense("google.com")),
Check("common", "en.wikipedia.org"),
Check("adtrack", "google-analytics.com"),
Check("adtrack", "ad.doubleclick.net"),
2020-08-29 01:16:06 -07:00
# baidu goes here...?
2020-08-29 06:34:46 -07:00
Check("common", "naver.com"),
# surely a fully-functional server would resolve
# the most popular domain in existence, right?
Check("common", "google.com"),
2020-08-29 01:16:06 -07:00
]
2020-08-29 01:28:26 -07:00
unlikely = [
2020-08-29 06:34:46 -07:00
Check("piracy", "thepiratebay.org"),
Check("porn", "xvideos.com"),
Check("usercontent", "imgur.com"),
Check("usercontent", "twitter.com"),
Check("usercontent", "weibo.com"),
Check("usercontent", "github.com"),
Check("porn", "chaturbate.com"),
Check("video", "bilibili.com"),
Check("video", "twitch.tv"),
Check("common", "qq.com"),
Check("video", "netflix.com"),
Check("usercontent", "reddit.com"),
Check("usercontent", "facebook.com"),
Check("video", "youtube.com"),
Check("usercontent", "tumblr.com"),
Check("usercontent", "wordpress.com"),
Check("common", "tmall.com"),
Check("usercontent", "instagram.com"),
Check("news", "nytimes.com"),
Check("usercontent", "flickr.com"),
Check("common", "ebay.com"),
Check("news", "scmp.com"),
Check("common", "aliexpress.com"),
Check("common", "stackoverflow.com"),
2020-08-29 01:16:06 -07:00
]
defunct = [
"panda.tv", # imochen.github.io
]
2020-08-29 06:34:46 -07:00
2020-08-29 01:16:06 -07:00
def _top1m_gen():
return (Check("top", entry)
for i, entry in retrieve_top1m_entries()
if entry not in defunct)
2020-08-29 06:34:46 -07:00
2020-08-29 01:16:06 -07:00
top100 = head(100, _top1m_gen())
top1000 = head(1000, _top1m_gen())