add failures field to Check and sort by them
this commit removes the comments on individual checks. these comments should be restored at a later point.
This commit is contained in:
parent
35f2ce9206
commit
f6e3a28aff
|
@ -2,109 +2,78 @@ from .top1m import retrieve_top1m_entries
|
||||||
from .util import concat_nonsense, rot13, head
|
from .util import concat_nonsense, rot13, head
|
||||||
from .structs import Check
|
from .structs import Check
|
||||||
|
|
||||||
|
def order_by_failures(checks): # descending
|
||||||
|
return sorted(checks, key=lambda check: -check.failures)
|
||||||
|
|
||||||
first = [
|
first = [
|
||||||
# checking this first this avoids issues with censorship in China. see:
|
# checking this first this avoids issues with censorship in China. see:
|
||||||
# https://www.bortzmeyer.org/sichuan-pepper.html
|
# https://www.bortzmeyer.org/sichuan-pepper.html
|
||||||
Check("common", "baidu.com"),
|
Check("common", "baidu.com", 491),
|
||||||
]
|
]
|
||||||
|
|
||||||
likely = [
|
likely = order_by_failures([
|
||||||
# these checks are, in practice, the most likely
|
# these checks are, in practice, the most likely
|
||||||
# to weed out unwanted DNS servers.
|
# to weed out unwanted DNS servers.
|
||||||
|
|
||||||
Check("news", "huanqiu.com"),
|
Check("adtrack", "ad.doubleclick.net", 81),
|
||||||
Check("adware", rot13("nqf789.pbz")),
|
Check("adtrack", "google-analytics.com", 75),
|
||||||
|
Check("adtrack", "media.fastclick.net", 116),
|
||||||
|
Check("adware", rot13("nqf789.pbz"), 168),
|
||||||
|
Check("bad", concat_nonsense("com"), 153),
|
||||||
|
Check("badsub", concat_nonsense("google.com"), 63),
|
||||||
|
Check("common", "archive.org", 98),
|
||||||
|
Check("common", "duckduckgo.com", 78),
|
||||||
|
Check("common", "en.wikipedia.org", 75),
|
||||||
|
Check("common", "facebook.com", 94),
|
||||||
|
Check("common", "google.com", 69),
|
||||||
|
# Check("common", "naver.com", 57),
|
||||||
|
Check("common", "paypal.com", 74),
|
||||||
|
Check("common", "wikileaks.com", 86),
|
||||||
|
Check("common", "xn--b1aew.xn--p1ai", 85),
|
||||||
|
Check("gambling", "bet365.com", 157),
|
||||||
|
Check("gambling", "betonline.ag", 168),
|
||||||
|
Check("gambling", "unibet.com", 137),
|
||||||
|
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com", 98),
|
||||||
|
Check("infosec", "scan.shadowserver.org", 73),
|
||||||
|
Check("news", "huanqiu.com", 435),
|
||||||
|
Check("news", "telegram.com", 71),
|
||||||
|
Check("parking", "scmp.org", 132),
|
||||||
|
Check("piracy", "thehiddenbay.org", 77),
|
||||||
|
Check("porn", "pornhub.com", 151),
|
||||||
|
Check("shock", rot13("tbng.pk"), 209),
|
||||||
|
Check("uncommon", "cybre.space", 88),
|
||||||
|
Check("uncommon", "react.uni-saarland.de", 74),
|
||||||
|
Check("usercontent", "4chan.org", 116),
|
||||||
|
# Check("weird", "archive.is", 0),
|
||||||
|
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com"), 126),
|
||||||
|
])
|
||||||
|
|
||||||
# some servers block shock sites, which isn't a terrible idea,
|
unlikely = order_by_failures([
|
||||||
# but it's inaccurate.
|
Check("common", "aliexpress.com", 2),
|
||||||
Check("shock", rot13("tbng.pk")), # actually parked at this time
|
Check("common", "ebay.com", 4),
|
||||||
|
Check("common", "qq.com", 15),
|
||||||
# some servers block piracy and porn sites for being taboo, or whatever.
|
Check("common", "stackoverflow.com", 1),
|
||||||
Check("porn", "pornhub.com"),
|
Check("common", "tmall.com", 8),
|
||||||
|
Check("news", "nytimes.com", 6),
|
||||||
Check("adtrack", "media.fastclick.net"),
|
Check("news", "scmp.com", 3),
|
||||||
|
Check("piracy", "thepiratebay.org", 24),
|
||||||
# dns.watch fails here: domain parking is evil, but servers must abide.
|
Check("porn", "chaturbate.com", 18),
|
||||||
Check("parking", "scmp.org"),
|
Check("porn", "xvideos.com", 23),
|
||||||
|
Check("usercontent", "facebook.com", 12),
|
||||||
# some servers block sites driven by their chaotic user-created content.
|
Check("usercontent", "flickr.com", 5),
|
||||||
Check("usercontent", "4chan.org"),
|
Check("usercontent", "github.com", 19),
|
||||||
|
Check("usercontent", "imgur.com", 22),
|
||||||
# some servers like to redirect nonexistent domains. see:
|
Check("usercontent", "instagram.com", 7),
|
||||||
# https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
|
Check("usercontent", "reddit.com", 13),
|
||||||
Check("bad", concat_nonsense("com")),
|
Check("usercontent", "tumblr.com", 10),
|
||||||
|
Check("usercontent", "twitter.com", 21),
|
||||||
# blogspot handles these strangely; DNS servers likewise
|
Check("usercontent", "weibo.com", 20),
|
||||||
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")),
|
Check("usercontent", "wordpress.com", 9),
|
||||||
|
Check("video", "bilibili.com", 17),
|
||||||
# NOTE: disabled for being wildly inconsistent:
|
Check("video", "netflix.com", 14),
|
||||||
# Cloudflare fails here. see:
|
Check("video", "twitch.tv", 16),
|
||||||
# https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
|
Check("video", "youtube.com", 11),
|
||||||
# Check("weird", "archive.is"),
|
])
|
||||||
|
|
||||||
Check("common", "archive.org"),
|
|
||||||
|
|
||||||
# this is one of the WannaCry sinkholes, it's kinda important.
|
|
||||||
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"),
|
|
||||||
|
|
||||||
# try out internationalized domains.
|
|
||||||
Check("common", "xn--b1aew.xn--p1ai"),
|
|
||||||
|
|
||||||
Check("common", "wikileaks.com"),
|
|
||||||
|
|
||||||
# i suppose this doubles as a check for the new TLDs.
|
|
||||||
Check("uncommon", "cybre.space"),
|
|
||||||
|
|
||||||
# some servers block piracy and porn sites for being taboo, or whatever
|
|
||||||
Check("piracy", "thehiddenbay.org"),
|
|
||||||
|
|
||||||
# some servers block this. see:
|
|
||||||
# https://scan.shadowserver.org/dns/
|
|
||||||
Check("infosec", "scan.shadowserver.org"),
|
|
||||||
|
|
||||||
# a few servers block this for some reason.
|
|
||||||
Check("common", "duckduckgo.com"),
|
|
||||||
|
|
||||||
# DNS poisoning may yield an unwanted result here.
|
|
||||||
Check("badsub", concat_nonsense("google.com")),
|
|
||||||
|
|
||||||
Check("common", "en.wikipedia.org"),
|
|
||||||
Check("adtrack", "google-analytics.com"),
|
|
||||||
Check("adtrack", "ad.doubleclick.net"),
|
|
||||||
# baidu goes here...?
|
|
||||||
Check("common", "naver.com"),
|
|
||||||
|
|
||||||
# surely a fully-functional server would resolve
|
|
||||||
# the most popular domain in existence, right?
|
|
||||||
Check("common", "google.com"),
|
|
||||||
]
|
|
||||||
|
|
||||||
unlikely = [
|
|
||||||
Check("piracy", "thepiratebay.org"),
|
|
||||||
Check("porn", "xvideos.com"),
|
|
||||||
Check("usercontent", "imgur.com"),
|
|
||||||
Check("usercontent", "twitter.com"),
|
|
||||||
Check("usercontent", "weibo.com"),
|
|
||||||
Check("usercontent", "github.com"),
|
|
||||||
Check("porn", "chaturbate.com"),
|
|
||||||
Check("video", "bilibili.com"),
|
|
||||||
Check("video", "twitch.tv"),
|
|
||||||
Check("common", "qq.com"),
|
|
||||||
Check("video", "netflix.com"),
|
|
||||||
Check("usercontent", "reddit.com"),
|
|
||||||
Check("usercontent", "facebook.com"),
|
|
||||||
Check("video", "youtube.com"),
|
|
||||||
Check("usercontent", "tumblr.com"),
|
|
||||||
Check("usercontent", "wordpress.com"),
|
|
||||||
Check("common", "tmall.com"),
|
|
||||||
Check("usercontent", "instagram.com"),
|
|
||||||
Check("news", "nytimes.com"),
|
|
||||||
Check("usercontent", "flickr.com"),
|
|
||||||
Check("common", "ebay.com"),
|
|
||||||
Check("news", "scmp.com"),
|
|
||||||
Check("common", "aliexpress.com"),
|
|
||||||
Check("common", "stackoverflow.com"),
|
|
||||||
]
|
|
||||||
|
|
||||||
defunct = [
|
defunct = [
|
||||||
"panda.tv", # imochen.github.io
|
"panda.tv", # imochen.github.io
|
||||||
|
@ -112,7 +81,7 @@ defunct = [
|
||||||
|
|
||||||
|
|
||||||
def _top1m_gen():
|
def _top1m_gen():
|
||||||
return (Check("top", entry)
|
return (Check("top", entry, 0)
|
||||||
for i, entry in retrieve_top1m_entries()
|
for i, entry in retrieve_top1m_entries()
|
||||||
if entry not in defunct)
|
if entry not in defunct)
|
||||||
|
|
||||||
|
|
|
@ -35,4 +35,4 @@ class Entry:
|
||||||
execution: object
|
execution: object
|
||||||
|
|
||||||
|
|
||||||
Check = namedtuple("Check", ("kind", "domain"))
|
Check = namedtuple("Check", ("kind", "domain", "failures"))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user