abide to PEP 808 (passes pycodestyle)

This commit is contained in:
Connor Olding 2020-08-29 15:34:46 +02:00
parent 2a00288a9c
commit bdb2a88f40
11 changed files with 193 additions and 105 deletions

View File

@ -3,84 +3,127 @@ from .util import concat_nonsense, rot13, head
from .structs import Check
first = [
Check("common", "baidu.com"), # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html
# this avoids issues with chinese censorship. see:
# https://www.bortzmeyer.org/sichuan-pepper.html
Check("common", "baidu.com"),
]
new = [
# via dnsvalidator
Check("adtrack", "bet365.com"),
Check("common", "facebook.com"),
Check("common", "google.com"),
Check("common", "paypal.com"),
Check("common", "wikileaks.com"),
Check("news", "telegram.com"),
Check("adtrack", "bet365.com"),
Check("common", "facebook.com"),
Check("common", "google.com"),
Check("common", "paypal.com"),
Check("common", "wikileaks.com"),
Check("news", "telegram.com"),
]
likely = [
# these checks are, in practice, the most likely to weed out unwanted DNS servers.
Check("news", "huanqiu.com"),
Check("adware", rot13("nqf789.pbz")),
Check("shock", rot13("tbng.pk")), # some servers block shock sites, which isn't a terrible idea, but it's inaccurate
Check("porn", "pornhub.com"), # some servers block piracy and porn sites for being taboo, or whatever
Check("adtrack", "media.fastclick.net"),
Check("parking", "scmp.org"), # dns.watch fails here: domain parking is evil, but servers must abide
Check("usercontent","4chan.org"), # some servers block sites driven by their chaotic user-created content
Check("bad", concat_nonsense("com")), # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), # blogspot handles these strangely; DNS servers likewise
# these checks are, in practice, the most likely
# to weed out unwanted DNS servers.
Check("news", "huanqiu.com"),
Check("adware", rot13("nqf789.pbz")),
# some servers block shock sites, which isn't a terrible idea,
# but it's inaccurate.
Check("shock", rot13("tbng.pk")),
# some servers block piracy and porn sites for being taboo, or whatever.
Check("porn", "pornhub.com"),
Check("adtrack", "media.fastclick.net"),
# dns.watch fails here: domain parking is evil, but servers must abide.
Check("parking", "scmp.org"),
# some servers block sites driven by their chaotic user-created content.
Check("usercontent", "4chan.org"),
# some servers like to redirect nonexistent domains. see:
# https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
Check("bad", concat_nonsense("com")),
# blogspot handles these strangely; DNS servers likewise
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")),
# NOTE: disabled for being wildly inconsistent:
# Check("weird", "archive.is"), # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
Check("common", "archive.org"),
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), # one of the WannaCry sinkholes, kinda important that it resolves
Check("common", "xn--b1aew.xn--p1ai"), # just to test internationalized domains
Check("common", "wikileaks.com"),
Check("uncommon", "cybre.space"), # also doubles as a check for the new TLDs
Check("piracy", "thehiddenbay.org"), # some servers block piracy and porn sites for being taboo, or whatever
Check("infosec", "scan.shadowserver.org"), # some servers block this: https://scan.shadowserver.org/dns/
Check("common", "duckduckgo.com"), # a few servers block this for some reason?
Check("badsub", concat_nonsense("google.com")), # poisoning may yield an unwanted result here
Check("common", "en.wikipedia.org"),
Check("adtrack", "google-analytics.com"),
Check("adtrack", "ad.doubleclick.net"),
# Cloudflare fails here. see:
# https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
Check("weird", "archive.is"),
Check("common", "archive.org"),
# this is one of the WannaCry sinkholes, it's kinda important.
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"),
# try out internationalized domains.
Check("common", "xn--b1aew.xn--p1ai"),
Check("common", "wikileaks.com"),
# i suppose this doubles as a check for the new TLDs.
Check("uncommon", "cybre.space"),
# some servers block piracy and porn sites for being taboo, or whatever
Check("piracy", "thehiddenbay.org"),
# some servers block this. see:
# https://scan.shadowserver.org/dns/
Check("infosec", "scan.shadowserver.org"),
# a few servers block this for some reason.
Check("common", "duckduckgo.com"),
# DNS poisoning may yield an unwanted result here.
Check("badsub", concat_nonsense("google.com")),
Check("common", "en.wikipedia.org"),
Check("adtrack", "google-analytics.com"),
Check("adtrack", "ad.doubleclick.net"),
# baidu goes here...?
Check("common", "naver.com"),
Check("common", "google.com"), # surely a fully-functional server would resolve the most popular domain in existence
Check("common", "naver.com"),
# surely a fully-functional server would resolve
# the most popular domain in existence, right?
Check("common", "google.com"),
]
unlikely = [
Check("piracy", "thepiratebay.org"),
Check("porn", "xvideos.com"),
Check("usercontent","imgur.com"),
Check("usercontent","twitter.com"),
Check("usercontent","weibo.com"),
Check("usercontent","github.com"),
Check("porn", "chaturbate.com"),
Check("video", "bilibili.com"),
Check("video", "twitch.tv"),
Check("common", "qq.com"),
Check("video", "netflix.com"),
Check("usercontent","reddit.com"),
Check("usercontent","facebook.com"),
Check("video", "youtube.com"),
Check("usercontent","tumblr.com"),
Check("usercontent","wordpress.com"),
Check("common", "tmall.com"),
Check("usercontent","instagram.com"),
Check("news", "nytimes.com"),
Check("usercontent","flickr.com"),
Check("common", "ebay.com"),
Check("news", "scmp.com"),
Check("common", "aliexpress.com"),
Check("common", "stackoverflow.com"),
Check("piracy", "thepiratebay.org"),
Check("porn", "xvideos.com"),
Check("usercontent", "imgur.com"),
Check("usercontent", "twitter.com"),
Check("usercontent", "weibo.com"),
Check("usercontent", "github.com"),
Check("porn", "chaturbate.com"),
Check("video", "bilibili.com"),
Check("video", "twitch.tv"),
Check("common", "qq.com"),
Check("video", "netflix.com"),
Check("usercontent", "reddit.com"),
Check("usercontent", "facebook.com"),
Check("video", "youtube.com"),
Check("usercontent", "tumblr.com"),
Check("usercontent", "wordpress.com"),
Check("common", "tmall.com"),
Check("usercontent", "instagram.com"),
Check("news", "nytimes.com"),
Check("usercontent", "flickr.com"),
Check("common", "ebay.com"),
Check("news", "scmp.com"),
Check("common", "aliexpress.com"),
Check("common", "stackoverflow.com"),
]
defunct = [
"panda.tv", # imochen.github.io
]
def _top1m_gen():
return (Check("top", entry)
for i, entry in retrieve_top1m_entries()
if entry not in defunct)
top100 = head(100, _top1m_gen())
top1000 = head(1000, _top1m_gen())

View File

@ -5,6 +5,7 @@ from .tables import TKind, TDomain, TRecord, TMessage
from .ip_util import addr_to_int
import storm.locals as rain
class Execution:
def __init__(self, db):
self.db = db
@ -20,16 +21,21 @@ class Execution:
completed = exc_type is None
self.db.finish_execution(self.execution, right_now(), completed)
def is_column(ref):
return isinstance(ref, PropertyColumn) or isinstance(ref, rain.Reference)
def apply_properties(obj, d):
from storm.properties import PropertyColumn
for k, v in d.items():
ref = getattr(obj.__class__, k)
assert ref is not None, (type(obj), k)
assert isinstance(ref, PropertyColumn) or isinstance(ref, rain.Reference), \
(type(obj), k)
assert is_column(ref), (type(obj), k)
setattr(obj, k, v)
return obj
class RespoDB:
def __init__(self, uri, setup=False, create=False):
self.uri = uri
@ -48,7 +54,7 @@ class RespoDB:
self.setup_messages()
for q in create_view_statements:
self._conn.execute(q, noresult=True)
self._fire(q)
assert setup or create or db_exists, "database was never setup"
self.execution = Execution(self)
@ -106,12 +112,16 @@ class RespoDB:
assert self._conn is not None
return self._conn.add(apply_properties(TMessage(), kwargs))
def _fire(self, statement):
assert self._conn is not None
self._conn.execute(statement, noresult=True)
def setup_executions(self):
self._conn.execute(create_table_statements["executions"], noresult=True)
self._fire(create_table_statements["executions"])
def setup_exceptions(self):
# careful not to call them "errors" since NXDOMAIN is not an error.
self._conn.execute(create_table_statements["exceptions"], noresult=True)
self._fire(create_table_statements["exceptions"])
# TODO: upsert?
@ -123,7 +133,7 @@ class RespoDB:
def setup_ips(self):
from .ips import china, blocks
self._conn.execute(create_table_statements["ips"], noresult=True)
self._fire(create_table_statements["ips"])
# TODO: upsert?
@ -135,22 +145,23 @@ class RespoDB:
self.new_address(ip=addr_to_int(ip), block_target=True)
def setup_kinds(self):
self._conn.execute(create_table_statements["kinds"], noresult=True)
self._fire(create_table_statements["kinds"])
# TODO: upsert?
#NXDOMAIN = self.find_one(TException, TException.name == "NXDOMAIN")
#self.new_kind(name="bad", exception=NXDOMAIN)
#self.new_kind(name="badsub", exception=NXDOMAIN)
if 0:
NXDOMAIN = self.find_one(TException, TException.name == "NXDOMAIN")
self.new_kind(name="bad", exception=NXDOMAIN)
self.new_kind(name="badsub", exception=NXDOMAIN)
def setup_domains(self):
self._conn.execute(create_table_statements["domains"], noresult=True)
self._fire(create_table_statements["domains"])
def setup_records(self):
self._conn.execute(create_table_statements["records"], noresult=True)
self._fire(create_table_statements["records"])
def setup_messages(self):
self._conn.execute(create_table_statements["messages"], noresult=True)
self._fire(create_table_statements["messages"])
for trig in table_triggers["messages"]:
self._conn.execute(trig)
@ -173,7 +184,8 @@ class RespoDB:
def find_record_id(self, addresses):
address_ids = list(address.address_id for address in addresses)
record_ids = list(self._conn.find(TRecord, TRecord.address_id.is_in(address_ids)).values(TRecord.record_id))
temp = self._conn.find(TRecord, TRecord.address_id.is_in(address_ids))
record_ids = list(temp.values(TRecord.record_id))
if not record_ids:
return None
unique_ids = sorted(set(record_ids))
@ -188,7 +200,8 @@ class RespoDB:
if not kind:
kind = self.new_kind(name=entry.kind)
if entry.kind.startswith("bad"):
exception = self.find_one(TException, TException.name == "NXDOMAIN")
exception = self.find_one(TException,
TException.name == "NXDOMAIN")
assert exception is not None
kind.exception = exception
@ -230,7 +243,8 @@ class RespoDB:
server.server = True
if entry.exception:
exception = self.find_one(TException, TException.name == entry.exception)
exception = self.find_one(TException,
TException.name == entry.exception)
assert exception is not None
else:
exception = None

View File

@ -1,23 +1,28 @@
from .structs import Options
def detect_gfw(r, ip, check):
# attempt to detect interference from the Great Firewall of China.
#from .ips import china
#if r in china: return True
# class D or class E, neither of which are correct for a (public?) DNS.
#if int(r.partition(".")[0]) >= 224: return True
def rs(prefix):
return r.startswith(prefix)
def de(suffix):
return check.domain.endswith(suffix)
rs = lambda prefix: r.startswith(prefix)
de = lambda suffix: check.domain.endswith(suffix)
hosted = de("facebook.com") or de("instagram.com") or de("whatsapp.com")
if rs("31.13.") and not hosted: return True
if rs("66.220."): return True
if rs("69.63."): return True
if rs("69.171.") and not rs("69.171.250."): return True
if rs("74.86."): return True
if rs("75.126."): return True
if r == "64.13.192.74": return True
if (
(rs("31.13.") and not hosted) or
(rs("66.220.")) or
(rs("69.63.")) or
(rs("69.171.") and not rs("69.171.250.")) or
(rs("74.86.")) or
(rs("75.126.")) or
(r == "64.13.192.74")
):
return True
# more non-facebook GFW stuff:
# 31.13.64.33
# 31.13.70.1
@ -31,6 +36,7 @@ def detect_gfw(r, ip, check):
return False
async def getaddrs(server, domain, opts):
from .ip_util import ipkey
from dns.asyncresolver import Resolver
@ -54,6 +60,7 @@ async def getaddrs(server, domain, opts):
return ["Timeout"]
return sorted(set(rr.address for rr in ans.rrset), key=ipkey)
def process_result(res, ip, check, opts: Options):
from .ips import is_bogon, blocks
from .util import right_now
@ -106,6 +113,7 @@ def process_result(res, ip, check, opts: Options):
execution=opts.execution,
)
async def try_ip(db, server_ip, checks, opts: Options):
from .util import make_pooler
from asyncio import sleep
@ -113,6 +121,7 @@ async def try_ip(db, server_ip, checks, opts: Options):
entries = []
success = True
def finisher(done, pending):
nonlocal success
for task in done:
@ -122,8 +131,8 @@ async def try_ip(db, server_ip, checks, opts: Options):
if not entry.success:
if opts.early_stopping and success: # only cancel once
for pend in pending:
#print("CANCEL", file=stderr)
# FIXME: this can still, somehow, cancel the main function.
# FIXME: this can still, somehow,
# cancel the main function.
pend.cancel()
success = False
@ -159,7 +168,6 @@ async def try_ip(db, server_ip, checks, opts: Options):
first_failure = None
assert len(entries) > 0
for entry in entries:
#print(entry, file=stderr)
if not entry.success:
first_failure = entry
break
@ -168,6 +176,7 @@ async def try_ip(db, server_ip, checks, opts: Options):
return server_ip, first_failure
return server_ip, None
async def main(db, filepath, checks, opts: Options):
from .ip_util import read_ips
from .util import make_pooler

View File

@ -1,5 +1,6 @@
import re
ipv4_pattern = re.compile("(\d+)\.(\d+)\.(\d+)\.(\d+)", re.ASCII)
ipv4_pattern = re.compile(r"(\d+)\.(\d+)\.(\d+)\.(\d+)", re.ASCII)
def read_ips(f):
# TODO: make async and more robust. (regex pls)
@ -12,6 +13,7 @@ def read_ips(f):
continue
yield ip
def addr_to_int(ip):
match = ipv4_pattern.fullmatch(ip)
assert match is not None, row
@ -20,6 +22,7 @@ def addr_to_int(ip):
numeric = segs[0] << 24 | segs[1] << 16 | segs[2] << 8 | segs[3]
return numeric
def ipkey(ip_string):
# this is more lenient than addr_to_int.
segs = [int(s) for s in ip_string.replace(":", ".").split(".")]

View File

@ -59,5 +59,6 @@ bogon_checks = [
"{}.".format(i) for i in range(224, 256)
]
def is_bogon(ip):
return any(ip.startswith(check) for check in bogon_checks)

View File

@ -106,7 +106,8 @@ BEFORE INSERT
ON Messages
BEGIN
SELECT CASE
WHEN NEW.RecordId NOTNULL AND NOT EXISTS(SELECT 1 FROM Records WHERE Records.RecordID = NEW.RecordId)
WHEN NEW.RecordId NOTNULL AND NOT EXISTS(
SELECT 1 FROM Records WHERE Records.RecordID = NEW.RecordId)
THEN raise(FAIL, "RecordId does not exist")
END;
END

View File

@ -1,6 +1,7 @@
from collections import namedtuple
from dataclasses import dataclass
@dataclass
class Options:
execution: object = None
@ -12,10 +13,11 @@ class Options:
domain_wait: float = 0.25
impatient: bool = False # reduce retries and times for timeouts
early_stopping: bool = True # stop at the first invalid domain (best with dry)
early_stopping: bool = True # stop at the first invalid domain
dry: bool = True # don't write anything to database
progress: bool = False
@dataclass
class Entry:
from datetime import datetime
@ -30,4 +32,5 @@ class Entry:
reason: str
execution: object
Check = namedtuple("Check", ("kind", "domain"))

View File

@ -1,12 +1,14 @@
from .util import AttrCheck
import storm.locals as rain
class TException(rain.Storm, AttrCheck):
__storm_table__ = "Exceptions"
exception_id = rain.Int("ExceptionId", primary=True)
name = rain.Unicode("Name")
fail = rain.Bool("Fail")
class TExecution(rain.Storm, AttrCheck):
__storm_table__ = "Executions"
execution_id = rain.Int("ExecutionId", primary=True)
@ -14,6 +16,7 @@ class TExecution(rain.Storm, AttrCheck):
finish_date = rain.DateTime("FinishDate")
completed = rain.Bool("Completed")
class TAddress(rain.Storm, AttrCheck):
__storm_table__ = "Ips"
address_id = rain.Int("IpId", primary=True)
@ -25,6 +28,7 @@ class TAddress(rain.Storm, AttrCheck):
redirect_target = rain.Bool("RedirectTarget")
gfw_target = rain.Bool("GfwTarget")
class TKind(rain.Storm, AttrCheck):
__storm_table__ = "Kinds"
kind_id = rain.Int("KindId", primary=True)
@ -32,6 +36,7 @@ class TKind(rain.Storm, AttrCheck):
xxid = rain.Int("ExpectExceptionId")
exception = rain.Reference(xxid, "TException.exception_id")
class TDomain(rain.Storm, AttrCheck):
__storm_table__ = "Domains"
domain_id = rain.Int("DomainId", primary=True)
@ -39,6 +44,7 @@ class TDomain(rain.Storm, AttrCheck):
kind_id = rain.Int("KindId")
kind = rain.Reference(kind_id, "TKind.kind_id")
class TRecord(rain.Storm, AttrCheck):
__storm_table__ = "Records"
row_id = rain.Int("rowid", primary=True)
@ -46,6 +52,7 @@ class TRecord(rain.Storm, AttrCheck):
address_id = rain.Int("IpId")
address = rain.Reference(address_id, "TAddress.address_id")
class TMessage(rain.Storm, AttrCheck):
__storm_table__ = "Messages"
message_id = rain.Int("MessageId", primary=True)
@ -57,5 +64,4 @@ class TMessage(rain.Storm, AttrCheck):
execution = rain.Reference(execution_id, "TExecution.execution_id")
server = rain.Reference(server_id, "TAddress.address_id")
domain = rain.Reference(domain_id, "TDomain.domain_id")
#record = rain.Reference(record_id, "TRecord.record_id")
exception = rain.Reference(exception_id, "TException.exception_id")

View File

@ -3,12 +3,14 @@ csvfn_default = "top-1m.csv"
one_week = 7 * 24 * 60 * 60 # in seconds
def alive(fp, expiry):
from os.path import exists, getmtime, getsize
from time import time
return exists(fp) and time() < getmtime(fp) + expiry and getsize(fp) > 2
def download_top1m(urltop=None, csvfn=None):
from io import BytesIO
from urllib.request import urlopen
@ -30,6 +32,7 @@ def download_top1m(urltop=None, csvfn=None):
return uncomp.decode("utf-8")
def retrieve_top1m_entries(csv_fp="top-1m.csv"):
from sys import stderr

View File

@ -7,26 +7,20 @@ def ui(program, args):
import respodns.checks as chk
name = "respodns6"
parser = ArgumentParser(name,
description=name + ": test and log DNS records")
desc = name + ": test and log DNS records"
parser = ArgumentParser(name, description=desc)
# TODO: support multiple paths. nargs="+", iterate with pooling?
parser.add_argument(
"path", metavar="file-path",
help="a path to a file containing IPv4 addresses which host DNS servers")
desc = "a path to a file containing IPv4 addresses which host DNS servers"
parser.add_argument("path", metavar="file-path", help=desc)
parser.add_argument(
"--database",
help="specify database for logging")
parser.add_argument("--database", help="specify database for logging")
a = parser.parse_args(args)
checks = []
checks += chk.first
#checks += chk.new
checks += chk.likely
#checks += chk.unlikely
#checks += chk.top100
opts = Options()
opts.dry = a.database is None

View File

@ -4,10 +4,12 @@ for a, b, c, d in zip("anAN05", "mzMZ49", "naNA50", "zmZM94"):
for k, v in zip(range(ord(a), ord(b) + 1),
range(ord(c), ord(d) + 1))))
def right_now():
from datetime import datetime, timezone
return datetime.now(timezone.utc)
def nonsense_consistent(domain):
from random import Random
from string import ascii_lowercase
@ -16,12 +18,15 @@ def nonsense_consistent(domain):
length = rng.choices((9, 10, 11, 12), (4, 5, 3, 2))[0]
return "".join(rng.choice(ascii_lowercase) for i in range(length))
def rot13(s):
return "".join(rot13_mapping.get(c, c) for c in s)
def concat_nonsense(domain):
return nonsense_consistent(domain) + "." + domain
def head(n, it):
res = []
try:
@ -31,6 +36,7 @@ def head(n, it):
pass
return res
def taskize(item):
from types import CoroutineType
from asyncio import Task, create_task
@ -40,12 +46,15 @@ def taskize(item):
item = create_task(item)
return item
def make_pooler(pool_size, finisher=None):
# TODO: write a less confusing interface that allows the code to be written more flatly.
# maybe like: async for done in apply(doit, [tuple_of_args]):
# TODO: write a less confusing interface
# that allows the code to be written more flatly.
# maybe like: async for done in apply(doit, [tuple_of_args]):
from asyncio import wait, FIRST_COMPLETED
pending = set()
async def pooler(item=None):
nonlocal pending
finish = item is None
@ -55,8 +64,10 @@ def make_pooler(pool_size, finisher=None):
while len(pending) > desired_size:
done, pending = await wait(pending, return_when=FIRST_COMPLETED)
finisher(done, pending)
return pooler
class AttrCheck:
"""
Inheriting AttrCheck prevents accidentally setting attributes