from .structs import Options from sys import stdin, stdout, stderr def lament(*args, **kwargs): print(*args, file=stderr, **kwargs) def detect_gfw(r, ip, check): # attempt to detect interference from the Great Firewall of China. from .ips import gfw_ips def rs(prefix): return r.startswith(prefix) def de(suffix): return check.domain.endswith(suffix) hosted = de("facebook.com") or de("instagram.com") or de("whatsapp.com") if rs("31.13.") and not hosted: return True if rs("69.171.") and not rs("69.171.250."): return True if rs("108.160."): return True if r in gfw_ips: return True return False async def getaddrs(server, domain, opts, context=None): from .ip_util import ipkey from dns.asyncresolver import Resolver from dns.exception import Timeout from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers res = Resolver(configure=False) if opts.impatient: res.timeout = 5 res.lifetime = 2 else: res.timeout = 3 res.lifetime = 9 res.nameservers = [server] try: if context is not None: async with context: ans = await res.resolve(domain, "A", search=False) else: ans = await res.resolve(domain, "A", search=False) except NXDOMAIN: return ["NXDOMAIN"] except NoAnswer: return ["NoAnswer"] except NoNameservers: return ["NoNameservers"] except Timeout: return ["Timeout"] return sorted(set(rr.address for rr in ans.rrset), key=ipkey) def process_result(res, ip, check, opts: Options): from .ips import is_bogon, is_block_target from .util import right_now from .structs import Entry # TODO: get more accurate times by inserting start-end into getaddrs. now = right_now() assert len(res) > 0 reason = None if "Timeout" in res: reason = "timeout" elif check.kind.startswith("bad"): reason = "okay" if "NXDOMAIN" in res else "redirect" elif any(r == ip for r in res): # server returned its own IP, must be a block, unless # one of our checks actually contains a domain that also hosts a DNS. reason = "block" elif any(is_bogon(r) or is_block_target(r) for r in res): reason = "block" elif not any(len(r) > 0 and r[0].isdigit() for r in res): # TODO: check for no alias on common. reason = "missing" else: for r in res: if len(r) == 0 or not r[0].isdigit(): continue if detect_gfw(r, ip, check): reason = "gfw" break else: reason = "okay" assert reason is not None, (res, ip, check) addrs = list(filter(lambda r: len(r) > 0 and r[0].isdigit(), res)) exception = res[0] if len(addrs) == 0 else None return Entry( date=now, success=reason == "okay" or check.kind == "ignore", server=ip, kind=check.kind, domain=check.domain, exception=exception, addrs=addrs, reason=reason, execution=opts.execution, ) async def try_ip(db, server_ip, checks, context, opts: Options, callback=None): # context can be None. from .util import make_pooler from asyncio import sleep, CancelledError entries = [] success = True def finisher(done, pending): nonlocal success for task in done: try: res, ip, check = task.result() except CancelledError: success = False break entry = process_result(res, ip, check, opts) if callback is not None: for addr in entry.addrs: callback(addr) entries.append(entry) if not entry.success: if opts.early_stopping and success: # only cancel once for pend in pending: # FIXME: this can still, somehow, # cancel the main function. pend.cancel() success = False pooler = make_pooler(opts.domain_simul, finisher) async def getaddrs_wrapper(ip, check): # NOTE: could put right_now() stuff here! # TODO: add duration field given in milliseconds (integer) # by subtracting start and end datetimes. res = await getaddrs(ip, check.domain, opts, context) return res, ip, check #lament("TESTING", server_ip) for i, check in enumerate(checks): first = i == 0 if not first: await sleep(opts.domain_wait) await pooler(getaddrs_wrapper(server_ip, check)) if first: # limit to one connection for the first check. await pooler() if not success: if opts.early_stopping or first: break else: await pooler() if not opts.dry: for entry in entries: db.push_entry(entry) db.commit() #lament("TESTED", server_ip) if not success: first_failure = None assert len(entries) > 0 for entry in entries: if not entry.success: first_failure = entry break else: assert 0, ("no failures found:", entries) return first_failure return None async def try_all_ips(db, try_me, checks, context, opts: Options, callback=None): # context can be None. from asyncio import create_task, sleep, BoundedSemaphore seen, total = 0, None sem = BoundedSemaphore(opts.ip_simul) async def _process(ip): nonlocal seen seen += 1 if opts.progress: lament(f"#{seen}: {ip}" if total is None else f"#{seen}/{total}: {ip}") stderr.flush() first_failure = await try_ip(db, ip, checks, context, opts, callback) if first_failure is None: print(ip) # all tests for this server passed; pass it along to stdout elif opts.dry: # don't save the error anywhere; pass it along to stdout ff = first_failure if ff.kind in ("shock", "adware"): # don't print sketchy domains to console in case they're clicked. print(ip, ff.reason, ff.kind, sep="\t") else: print(ip, ff.reason, ff.kind, ff.domain, sep="\t") async def process(ip): try: await _process(ip) finally: sem.release() tasks = [] while (res := await try_me.get()) is not None: #lament("TRYING", ip) if len(tasks) > 0: await sleep(opts.ip_wait) ip, total = res # acquire now instead of within the task so # a ton of tasks aren't created all at once. await sem.acquire() task = create_task(process(ip)) tasks.append(task) for task in tasks: await task #lament("EXITING try_all_ips") def sync_database(db, callback=None): # NOTE: this no longer takes Options. # NOTE: this is no longer async. from .ips import china, blocks # TODO: handle addresses that were removed from respodns.ips.china. # i could probably just do ip.startswith("- ") and remove those. for ips, kw in ((china, "china"), (blocks, "block_target")): for ip in ips: kwargs = {kw: True} if db is not None: db.modify_address(ip, **kwargs) if callback is not None: callback(ip) async def locate_ips(db, locate_me, ipinfo): # NOTE: this no longer takes Options. from time import time seen = set() last_save = time() while (ip := await locate_me.get()) is not None: if ipinfo is not None and ip not in seen: #lament("LOCATE", ip) seen.add(ip) code = await ipinfo.find_country(ip) if db is not None: db.modify_address(ip, country_code=code) if time() >= last_save + 10.0: # only flush occasionally #lament("FLUSH", time() - last_save) ipinfo.flush() last_save = time() ipinfo.flush() #lament("EXITING locate_ips") async def read_all_ips(filepaths, blocking=False, callback=None): assert callback is not None, "that doesn't make sense!" if blocking: from .ip_util import read_ips for filepath in filepaths: f = stdin if filepath == "" else open(filepath, "r") for ip in read_ips(f): #lament("READ", ip) await callback(ip) if f != stdin: f.close() else: from .ip_util import IpReader fps = [stdin if fp == "" else fp for fp in filepaths] with IpReader(*fps) as reader: async for ip in reader: #lament("READ", ip) await callback(ip, reader.total) #lament("EXITING read_all_ips") async def main(db, filepaths, checks, ipinfo, opts: Options): # ipinfo can be None. from .util import LimitPerSecond from asyncio import Queue, QueueFull, create_task from queue import SimpleQueue deferred = SimpleQueue() locate_me = Queue() try_me = Queue() pps = opts.packets_per_second context = LimitPerSecond(pps) if pps > 0 else None def locate_later(ip): try: locate_me.put_nowait(ip) except QueueFull: deferred.put(ip) seen = 0 async def try_soon(ip, total=None): nonlocal seen seen += 1 await try_me.put((ip, total)) sync_database(db, callback=locate_later) reading = create_task(read_all_ips(filepaths, opts.blocking_file_io, callback=try_soon)) trying = create_task(try_all_ips(db, try_me, checks, context, opts, callback=locate_later)) locating = create_task(locate_ips(db, locate_me, ipinfo)) # these tasks feed each other with queues, so order them as such: # reading -> trying -> locating #lament("AWAIT reading") await reading #lament("AWAITED reading") if seen == 0: #lament("UPDATING country codes") # no IPs were provided. refresh all the country codes instead. all_ips = db.all_ips() for i, ip in enumerate(all_ips): if opts.progress: lament(f"#{i + 1}/{len(all_ips)}: {ip}") await locate_me.put(ip) await try_me.put(None) #lament("AWAIT trying") await trying #lament("AWAITED trying") while not deferred.empty(): await locate_me.put(deferred.get()) #lament("STOPPING locating") await locate_me.put(None) #lament("AWAIT locating") await locating #lament("AWAITED locating") if context is not None and hasattr(context, "finish"): await context.finish()