From bdb2a88f40428a73f7f92b652d80de9236582109 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Sat, 29 Aug 2020 15:34:46 +0200 Subject: [PATCH] abide to PEP 808 (passes pycodestyle) --- respodns/checks.py | 155 ++++++++++++++++++++++++++++---------------- respodns/db.py | 46 ++++++++----- respodns/dns.py | 41 +++++++----- respodns/ip_util.py | 5 +- respodns/ips.py | 1 + respodns/sql.py | 3 +- respodns/structs.py | 5 +- respodns/tables.py | 8 ++- respodns/top1m.py | 3 + respodns/ui.py | 16 ++--- respodns/util.py | 15 ++++- 11 files changed, 193 insertions(+), 105 deletions(-) diff --git a/respodns/checks.py b/respodns/checks.py index 571c6d9..9fa9f37 100644 --- a/respodns/checks.py +++ b/respodns/checks.py @@ -3,84 +3,127 @@ from .util import concat_nonsense, rot13, head from .structs import Check first = [ - Check("common", "baidu.com"), # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html + # this avoids issues with chinese censorship. see: + # https://www.bortzmeyer.org/sichuan-pepper.html + Check("common", "baidu.com"), ] new = [ # via dnsvalidator - Check("adtrack", "bet365.com"), - Check("common", "facebook.com"), - Check("common", "google.com"), - Check("common", "paypal.com"), - Check("common", "wikileaks.com"), - Check("news", "telegram.com"), + Check("adtrack", "bet365.com"), + Check("common", "facebook.com"), + Check("common", "google.com"), + Check("common", "paypal.com"), + Check("common", "wikileaks.com"), + Check("news", "telegram.com"), ] likely = [ - # these checks are, in practice, the most likely to weed out unwanted DNS servers. - Check("news", "huanqiu.com"), - Check("adware", rot13("nqf789.pbz")), - Check("shock", rot13("tbng.pk")), # some servers block shock sites, which isn't a terrible idea, but it's inaccurate - Check("porn", "pornhub.com"), # some servers block piracy and porn sites for being taboo, or whatever - Check("adtrack", "media.fastclick.net"), - Check("parking", "scmp.org"), # dns.watch fails here: domain parking is evil, but servers must abide - Check("usercontent","4chan.org"), # some servers block sites driven by their chaotic user-created content - Check("bad", concat_nonsense("com")), # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/ - Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), # blogspot handles these strangely; DNS servers likewise + # these checks are, in practice, the most likely + # to weed out unwanted DNS servers. + Check("news", "huanqiu.com"), + Check("adware", rot13("nqf789.pbz")), + + # some servers block shock sites, which isn't a terrible idea, + # but it's inaccurate. + Check("shock", rot13("tbng.pk")), + + # some servers block piracy and porn sites for being taboo, or whatever. + Check("porn", "pornhub.com"), + Check("adtrack", "media.fastclick.net"), + + # dns.watch fails here: domain parking is evil, but servers must abide. + Check("parking", "scmp.org"), + + # some servers block sites driven by their chaotic user-created content. + Check("usercontent", "4chan.org"), + + # some servers like to redirect nonexistent domains. see: + # https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/ + Check("bad", concat_nonsense("com")), + + # blogspot handles these strangely; DNS servers likewise + Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), + # NOTE: disabled for being wildly inconsistent: -# Check("weird", "archive.is"), # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/ - Check("common", "archive.org"), - Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), # one of the WannaCry sinkholes, kinda important that it resolves - Check("common", "xn--b1aew.xn--p1ai"), # just to test internationalized domains - Check("common", "wikileaks.com"), - Check("uncommon", "cybre.space"), # also doubles as a check for the new TLDs - Check("piracy", "thehiddenbay.org"), # some servers block piracy and porn sites for being taboo, or whatever - Check("infosec", "scan.shadowserver.org"), # some servers block this: https://scan.shadowserver.org/dns/ - Check("common", "duckduckgo.com"), # a few servers block this for some reason? - Check("badsub", concat_nonsense("google.com")), # poisoning may yield an unwanted result here - Check("common", "en.wikipedia.org"), - Check("adtrack", "google-analytics.com"), - Check("adtrack", "ad.doubleclick.net"), + # Cloudflare fails here. see: + # https://jarv.is/notes/cloudflare-dns-archive-is-blocked/ + Check("weird", "archive.is"), + + Check("common", "archive.org"), + + # this is one of the WannaCry sinkholes, it's kinda important. + Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), + + # try out internationalized domains. + Check("common", "xn--b1aew.xn--p1ai"), + + Check("common", "wikileaks.com"), + + # i suppose this doubles as a check for the new TLDs. + Check("uncommon", "cybre.space"), + + # some servers block piracy and porn sites for being taboo, or whatever + Check("piracy", "thehiddenbay.org"), + + # some servers block this. see: + # https://scan.shadowserver.org/dns/ + Check("infosec", "scan.shadowserver.org"), + + # a few servers block this for some reason. + Check("common", "duckduckgo.com"), + + # DNS poisoning may yield an unwanted result here. + Check("badsub", concat_nonsense("google.com")), + + Check("common", "en.wikipedia.org"), + Check("adtrack", "google-analytics.com"), + Check("adtrack", "ad.doubleclick.net"), # baidu goes here...? - Check("common", "naver.com"), - Check("common", "google.com"), # surely a fully-functional server would resolve the most popular domain in existence + Check("common", "naver.com"), + + # surely a fully-functional server would resolve + # the most popular domain in existence, right? + Check("common", "google.com"), ] unlikely = [ - Check("piracy", "thepiratebay.org"), - Check("porn", "xvideos.com"), - Check("usercontent","imgur.com"), - Check("usercontent","twitter.com"), - Check("usercontent","weibo.com"), - Check("usercontent","github.com"), - Check("porn", "chaturbate.com"), - Check("video", "bilibili.com"), - Check("video", "twitch.tv"), - Check("common", "qq.com"), - Check("video", "netflix.com"), - Check("usercontent","reddit.com"), - Check("usercontent","facebook.com"), - Check("video", "youtube.com"), - Check("usercontent","tumblr.com"), - Check("usercontent","wordpress.com"), - Check("common", "tmall.com"), - Check("usercontent","instagram.com"), - Check("news", "nytimes.com"), - Check("usercontent","flickr.com"), - Check("common", "ebay.com"), - Check("news", "scmp.com"), - Check("common", "aliexpress.com"), - Check("common", "stackoverflow.com"), + Check("piracy", "thepiratebay.org"), + Check("porn", "xvideos.com"), + Check("usercontent", "imgur.com"), + Check("usercontent", "twitter.com"), + Check("usercontent", "weibo.com"), + Check("usercontent", "github.com"), + Check("porn", "chaturbate.com"), + Check("video", "bilibili.com"), + Check("video", "twitch.tv"), + Check("common", "qq.com"), + Check("video", "netflix.com"), + Check("usercontent", "reddit.com"), + Check("usercontent", "facebook.com"), + Check("video", "youtube.com"), + Check("usercontent", "tumblr.com"), + Check("usercontent", "wordpress.com"), + Check("common", "tmall.com"), + Check("usercontent", "instagram.com"), + Check("news", "nytimes.com"), + Check("usercontent", "flickr.com"), + Check("common", "ebay.com"), + Check("news", "scmp.com"), + Check("common", "aliexpress.com"), + Check("common", "stackoverflow.com"), ] defunct = [ "panda.tv", # imochen.github.io ] + def _top1m_gen(): return (Check("top", entry) for i, entry in retrieve_top1m_entries() if entry not in defunct) + top100 = head(100, _top1m_gen()) top1000 = head(1000, _top1m_gen()) diff --git a/respodns/db.py b/respodns/db.py index ebd3631..0b8f573 100644 --- a/respodns/db.py +++ b/respodns/db.py @@ -5,6 +5,7 @@ from .tables import TKind, TDomain, TRecord, TMessage from .ip_util import addr_to_int import storm.locals as rain + class Execution: def __init__(self, db): self.db = db @@ -20,16 +21,21 @@ class Execution: completed = exc_type is None self.db.finish_execution(self.execution, right_now(), completed) + +def is_column(ref): + return isinstance(ref, PropertyColumn) or isinstance(ref, rain.Reference) + + def apply_properties(obj, d): from storm.properties import PropertyColumn for k, v in d.items(): ref = getattr(obj.__class__, k) assert ref is not None, (type(obj), k) - assert isinstance(ref, PropertyColumn) or isinstance(ref, rain.Reference), \ - (type(obj), k) + assert is_column(ref), (type(obj), k) setattr(obj, k, v) return obj + class RespoDB: def __init__(self, uri, setup=False, create=False): self.uri = uri @@ -48,7 +54,7 @@ class RespoDB: self.setup_messages() for q in create_view_statements: - self._conn.execute(q, noresult=True) + self._fire(q) assert setup or create or db_exists, "database was never setup" self.execution = Execution(self) @@ -106,12 +112,16 @@ class RespoDB: assert self._conn is not None return self._conn.add(apply_properties(TMessage(), kwargs)) + def _fire(self, statement): + assert self._conn is not None + self._conn.execute(statement, noresult=True) + def setup_executions(self): - self._conn.execute(create_table_statements["executions"], noresult=True) + self._fire(create_table_statements["executions"]) def setup_exceptions(self): # careful not to call them "errors" since NXDOMAIN is not an error. - self._conn.execute(create_table_statements["exceptions"], noresult=True) + self._fire(create_table_statements["exceptions"]) # TODO: upsert? @@ -123,7 +133,7 @@ class RespoDB: def setup_ips(self): from .ips import china, blocks - self._conn.execute(create_table_statements["ips"], noresult=True) + self._fire(create_table_statements["ips"]) # TODO: upsert? @@ -135,22 +145,23 @@ class RespoDB: self.new_address(ip=addr_to_int(ip), block_target=True) def setup_kinds(self): - self._conn.execute(create_table_statements["kinds"], noresult=True) + self._fire(create_table_statements["kinds"]) # TODO: upsert? - #NXDOMAIN = self.find_one(TException, TException.name == "NXDOMAIN") - #self.new_kind(name="bad", exception=NXDOMAIN) - #self.new_kind(name="badsub", exception=NXDOMAIN) + if 0: + NXDOMAIN = self.find_one(TException, TException.name == "NXDOMAIN") + self.new_kind(name="bad", exception=NXDOMAIN) + self.new_kind(name="badsub", exception=NXDOMAIN) def setup_domains(self): - self._conn.execute(create_table_statements["domains"], noresult=True) + self._fire(create_table_statements["domains"]) def setup_records(self): - self._conn.execute(create_table_statements["records"], noresult=True) + self._fire(create_table_statements["records"]) def setup_messages(self): - self._conn.execute(create_table_statements["messages"], noresult=True) + self._fire(create_table_statements["messages"]) for trig in table_triggers["messages"]: self._conn.execute(trig) @@ -173,7 +184,8 @@ class RespoDB: def find_record_id(self, addresses): address_ids = list(address.address_id for address in addresses) - record_ids = list(self._conn.find(TRecord, TRecord.address_id.is_in(address_ids)).values(TRecord.record_id)) + temp = self._conn.find(TRecord, TRecord.address_id.is_in(address_ids)) + record_ids = list(temp.values(TRecord.record_id)) if not record_ids: return None unique_ids = sorted(set(record_ids)) @@ -188,7 +200,8 @@ class RespoDB: if not kind: kind = self.new_kind(name=entry.kind) if entry.kind.startswith("bad"): - exception = self.find_one(TException, TException.name == "NXDOMAIN") + exception = self.find_one(TException, + TException.name == "NXDOMAIN") assert exception is not None kind.exception = exception @@ -230,7 +243,8 @@ class RespoDB: server.server = True if entry.exception: - exception = self.find_one(TException, TException.name == entry.exception) + exception = self.find_one(TException, + TException.name == entry.exception) assert exception is not None else: exception = None diff --git a/respodns/dns.py b/respodns/dns.py index 7b2d1ab..b7e15cc 100644 --- a/respodns/dns.py +++ b/respodns/dns.py @@ -1,23 +1,28 @@ from .structs import Options + def detect_gfw(r, ip, check): # attempt to detect interference from the Great Firewall of China. - #from .ips import china - #if r in china: return True - # class D or class E, neither of which are correct for a (public?) DNS. - #if int(r.partition(".")[0]) >= 224: return True + def rs(prefix): + return r.startswith(prefix) + + def de(suffix): + return check.domain.endswith(suffix) - rs = lambda prefix: r.startswith(prefix) - de = lambda suffix: check.domain.endswith(suffix) hosted = de("facebook.com") or de("instagram.com") or de("whatsapp.com") - if rs("31.13.") and not hosted: return True - if rs("66.220."): return True - if rs("69.63."): return True - if rs("69.171.") and not rs("69.171.250."): return True - if rs("74.86."): return True - if rs("75.126."): return True - if r == "64.13.192.74": return True + + if ( + (rs("31.13.") and not hosted) or + (rs("66.220.")) or + (rs("69.63.")) or + (rs("69.171.") and not rs("69.171.250.")) or + (rs("74.86.")) or + (rs("75.126.")) or + (r == "64.13.192.74") + ): + return True + # more non-facebook GFW stuff: # 31.13.64.33 # 31.13.70.1 @@ -31,6 +36,7 @@ def detect_gfw(r, ip, check): return False + async def getaddrs(server, domain, opts): from .ip_util import ipkey from dns.asyncresolver import Resolver @@ -54,6 +60,7 @@ async def getaddrs(server, domain, opts): return ["Timeout"] return sorted(set(rr.address for rr in ans.rrset), key=ipkey) + def process_result(res, ip, check, opts: Options): from .ips import is_bogon, blocks from .util import right_now @@ -106,6 +113,7 @@ def process_result(res, ip, check, opts: Options): execution=opts.execution, ) + async def try_ip(db, server_ip, checks, opts: Options): from .util import make_pooler from asyncio import sleep @@ -113,6 +121,7 @@ async def try_ip(db, server_ip, checks, opts: Options): entries = [] success = True + def finisher(done, pending): nonlocal success for task in done: @@ -122,8 +131,8 @@ async def try_ip(db, server_ip, checks, opts: Options): if not entry.success: if opts.early_stopping and success: # only cancel once for pend in pending: - #print("CANCEL", file=stderr) - # FIXME: this can still, somehow, cancel the main function. + # FIXME: this can still, somehow, + # cancel the main function. pend.cancel() success = False @@ -159,7 +168,6 @@ async def try_ip(db, server_ip, checks, opts: Options): first_failure = None assert len(entries) > 0 for entry in entries: - #print(entry, file=stderr) if not entry.success: first_failure = entry break @@ -168,6 +176,7 @@ async def try_ip(db, server_ip, checks, opts: Options): return server_ip, first_failure return server_ip, None + async def main(db, filepath, checks, opts: Options): from .ip_util import read_ips from .util import make_pooler diff --git a/respodns/ip_util.py b/respodns/ip_util.py index 1318658..e3c8cef 100644 --- a/respodns/ip_util.py +++ b/respodns/ip_util.py @@ -1,5 +1,6 @@ import re -ipv4_pattern = re.compile("(\d+)\.(\d+)\.(\d+)\.(\d+)", re.ASCII) +ipv4_pattern = re.compile(r"(\d+)\.(\d+)\.(\d+)\.(\d+)", re.ASCII) + def read_ips(f): # TODO: make async and more robust. (regex pls) @@ -12,6 +13,7 @@ def read_ips(f): continue yield ip + def addr_to_int(ip): match = ipv4_pattern.fullmatch(ip) assert match is not None, row @@ -20,6 +22,7 @@ def addr_to_int(ip): numeric = segs[0] << 24 | segs[1] << 16 | segs[2] << 8 | segs[3] return numeric + def ipkey(ip_string): # this is more lenient than addr_to_int. segs = [int(s) for s in ip_string.replace(":", ".").split(".")] diff --git a/respodns/ips.py b/respodns/ips.py index baeee01..8d82df2 100644 --- a/respodns/ips.py +++ b/respodns/ips.py @@ -59,5 +59,6 @@ bogon_checks = [ "{}.".format(i) for i in range(224, 256) ] + def is_bogon(ip): return any(ip.startswith(check) for check in bogon_checks) diff --git a/respodns/sql.py b/respodns/sql.py index 76faaa2..438117c 100644 --- a/respodns/sql.py +++ b/respodns/sql.py @@ -106,7 +106,8 @@ BEFORE INSERT ON Messages BEGIN SELECT CASE - WHEN NEW.RecordId NOTNULL AND NOT EXISTS(SELECT 1 FROM Records WHERE Records.RecordID = NEW.RecordId) + WHEN NEW.RecordId NOTNULL AND NOT EXISTS( + SELECT 1 FROM Records WHERE Records.RecordID = NEW.RecordId) THEN raise(FAIL, "RecordId does not exist") END; END diff --git a/respodns/structs.py b/respodns/structs.py index 7ddf772..c467134 100644 --- a/respodns/structs.py +++ b/respodns/structs.py @@ -1,6 +1,7 @@ from collections import namedtuple from dataclasses import dataclass + @dataclass class Options: execution: object = None @@ -12,10 +13,11 @@ class Options: domain_wait: float = 0.25 impatient: bool = False # reduce retries and times for timeouts - early_stopping: bool = True # stop at the first invalid domain (best with dry) + early_stopping: bool = True # stop at the first invalid domain dry: bool = True # don't write anything to database progress: bool = False + @dataclass class Entry: from datetime import datetime @@ -30,4 +32,5 @@ class Entry: reason: str execution: object + Check = namedtuple("Check", ("kind", "domain")) diff --git a/respodns/tables.py b/respodns/tables.py index 595f11e..b8fab3d 100644 --- a/respodns/tables.py +++ b/respodns/tables.py @@ -1,12 +1,14 @@ from .util import AttrCheck import storm.locals as rain + class TException(rain.Storm, AttrCheck): __storm_table__ = "Exceptions" exception_id = rain.Int("ExceptionId", primary=True) name = rain.Unicode("Name") fail = rain.Bool("Fail") + class TExecution(rain.Storm, AttrCheck): __storm_table__ = "Executions" execution_id = rain.Int("ExecutionId", primary=True) @@ -14,6 +16,7 @@ class TExecution(rain.Storm, AttrCheck): finish_date = rain.DateTime("FinishDate") completed = rain.Bool("Completed") + class TAddress(rain.Storm, AttrCheck): __storm_table__ = "Ips" address_id = rain.Int("IpId", primary=True) @@ -25,6 +28,7 @@ class TAddress(rain.Storm, AttrCheck): redirect_target = rain.Bool("RedirectTarget") gfw_target = rain.Bool("GfwTarget") + class TKind(rain.Storm, AttrCheck): __storm_table__ = "Kinds" kind_id = rain.Int("KindId", primary=True) @@ -32,6 +36,7 @@ class TKind(rain.Storm, AttrCheck): xxid = rain.Int("ExpectExceptionId") exception = rain.Reference(xxid, "TException.exception_id") + class TDomain(rain.Storm, AttrCheck): __storm_table__ = "Domains" domain_id = rain.Int("DomainId", primary=True) @@ -39,6 +44,7 @@ class TDomain(rain.Storm, AttrCheck): kind_id = rain.Int("KindId") kind = rain.Reference(kind_id, "TKind.kind_id") + class TRecord(rain.Storm, AttrCheck): __storm_table__ = "Records" row_id = rain.Int("rowid", primary=True) @@ -46,6 +52,7 @@ class TRecord(rain.Storm, AttrCheck): address_id = rain.Int("IpId") address = rain.Reference(address_id, "TAddress.address_id") + class TMessage(rain.Storm, AttrCheck): __storm_table__ = "Messages" message_id = rain.Int("MessageId", primary=True) @@ -57,5 +64,4 @@ class TMessage(rain.Storm, AttrCheck): execution = rain.Reference(execution_id, "TExecution.execution_id") server = rain.Reference(server_id, "TAddress.address_id") domain = rain.Reference(domain_id, "TDomain.domain_id") - #record = rain.Reference(record_id, "TRecord.record_id") exception = rain.Reference(exception_id, "TException.exception_id") diff --git a/respodns/top1m.py b/respodns/top1m.py index ac30a49..6a0db05 100644 --- a/respodns/top1m.py +++ b/respodns/top1m.py @@ -3,12 +3,14 @@ csvfn_default = "top-1m.csv" one_week = 7 * 24 * 60 * 60 # in seconds + def alive(fp, expiry): from os.path import exists, getmtime, getsize from time import time return exists(fp) and time() < getmtime(fp) + expiry and getsize(fp) > 2 + def download_top1m(urltop=None, csvfn=None): from io import BytesIO from urllib.request import urlopen @@ -30,6 +32,7 @@ def download_top1m(urltop=None, csvfn=None): return uncomp.decode("utf-8") + def retrieve_top1m_entries(csv_fp="top-1m.csv"): from sys import stderr diff --git a/respodns/ui.py b/respodns/ui.py index 598fa2c..64521e9 100644 --- a/respodns/ui.py +++ b/respodns/ui.py @@ -7,26 +7,20 @@ def ui(program, args): import respodns.checks as chk name = "respodns6" - parser = ArgumentParser(name, - description=name + ": test and log DNS records") + desc = name + ": test and log DNS records" + parser = ArgumentParser(name, description=desc) # TODO: support multiple paths. nargs="+", iterate with pooling? - parser.add_argument( - "path", metavar="file-path", - help="a path to a file containing IPv4 addresses which host DNS servers") + desc = "a path to a file containing IPv4 addresses which host DNS servers" + parser.add_argument("path", metavar="file-path", help=desc) - parser.add_argument( - "--database", - help="specify database for logging") + parser.add_argument("--database", help="specify database for logging") a = parser.parse_args(args) checks = [] checks += chk.first - #checks += chk.new checks += chk.likely - #checks += chk.unlikely - #checks += chk.top100 opts = Options() opts.dry = a.database is None diff --git a/respodns/util.py b/respodns/util.py index 14f1693..6e53ccb 100644 --- a/respodns/util.py +++ b/respodns/util.py @@ -4,10 +4,12 @@ for a, b, c, d in zip("anAN05", "mzMZ49", "naNA50", "zmZM94"): for k, v in zip(range(ord(a), ord(b) + 1), range(ord(c), ord(d) + 1)))) + def right_now(): from datetime import datetime, timezone return datetime.now(timezone.utc) + def nonsense_consistent(domain): from random import Random from string import ascii_lowercase @@ -16,12 +18,15 @@ def nonsense_consistent(domain): length = rng.choices((9, 10, 11, 12), (4, 5, 3, 2))[0] return "".join(rng.choice(ascii_lowercase) for i in range(length)) + def rot13(s): return "".join(rot13_mapping.get(c, c) for c in s) + def concat_nonsense(domain): return nonsense_consistent(domain) + "." + domain + def head(n, it): res = [] try: @@ -31,6 +36,7 @@ def head(n, it): pass return res + def taskize(item): from types import CoroutineType from asyncio import Task, create_task @@ -40,12 +46,15 @@ def taskize(item): item = create_task(item) return item + def make_pooler(pool_size, finisher=None): - # TODO: write a less confusing interface that allows the code to be written more flatly. - # maybe like: async for done in apply(doit, [tuple_of_args]): + # TODO: write a less confusing interface + # that allows the code to be written more flatly. + # maybe like: async for done in apply(doit, [tuple_of_args]): from asyncio import wait, FIRST_COMPLETED pending = set() + async def pooler(item=None): nonlocal pending finish = item is None @@ -55,8 +64,10 @@ def make_pooler(pool_size, finisher=None): while len(pending) > desired_size: done, pending = await wait(pending, return_when=FIRST_COMPLETED) finisher(done, pending) + return pooler + class AttrCheck: """ Inheriting AttrCheck prevents accidentally setting attributes