336 lines
9.8 KiB
Python
336 lines
9.8 KiB
Python
from .structs import Options
|
|
from sys import stdin, stdout, stderr
|
|
|
|
|
|
def lament(*args, **kwargs):
|
|
print(*args, file=stderr, **kwargs)
|
|
|
|
|
|
def detect_gfw(r, ip, check):
|
|
# attempt to detect interference from the Great Firewall of China.
|
|
from .ips import gfw_ips
|
|
|
|
def rs(prefix):
|
|
return r.startswith(prefix)
|
|
|
|
def de(suffix):
|
|
return check.domain.endswith(suffix)
|
|
|
|
hosted = de("facebook.com") or de("instagram.com") or de("whatsapp.com")
|
|
|
|
if rs("31.13.") and not hosted:
|
|
return True
|
|
|
|
if rs("69.171.") and not rs("69.171.250."):
|
|
return True
|
|
|
|
if rs("108.160."):
|
|
return True
|
|
|
|
if r in gfw_ips:
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
async def getaddrs(server, domain, impatient=False, context=None):
|
|
from .ip_util import ipkey
|
|
from dns.asyncresolver import Resolver
|
|
from dns.exception import Timeout
|
|
from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers
|
|
|
|
res = Resolver(configure=False)
|
|
if impatient:
|
|
res.timeout = 5
|
|
res.lifetime = 2
|
|
else:
|
|
res.timeout = 3
|
|
res.lifetime = 9
|
|
res.nameservers = [server]
|
|
try:
|
|
if context is not None:
|
|
async with context:
|
|
ans = await res.resolve(domain, "A", search=False)
|
|
else:
|
|
ans = await res.resolve(domain, "A", search=False)
|
|
except NXDOMAIN:
|
|
return ["NXDOMAIN"]
|
|
except NoAnswer:
|
|
return ["NoAnswer"]
|
|
except NoNameservers:
|
|
return ["NoNameservers"]
|
|
except Timeout:
|
|
return ["Timeout"]
|
|
return sorted(set(rr.address for rr in ans.rrset), key=ipkey)
|
|
|
|
|
|
def process_result(res, ip, check, opts: Options):
|
|
from .ips import is_bogon, is_block_target
|
|
from .util import right_now
|
|
from .structs import Entry
|
|
|
|
# TODO: get more accurate times by inserting start-end into getaddrs.
|
|
now = right_now()
|
|
assert len(res) > 0
|
|
reason = None
|
|
|
|
if "Timeout" in res:
|
|
reason = "timeout"
|
|
|
|
elif check.kind.startswith("bad"):
|
|
reason = "okay" if "NXDOMAIN" in res else "redirect"
|
|
|
|
elif any(r == ip for r in res):
|
|
# server returned its own IP, must be a block, unless
|
|
# one of our checks actually contains a domain that also hosts a DNS.
|
|
reason = "block"
|
|
|
|
elif any(is_bogon(r) or is_block_target(r) for r in res):
|
|
reason = "block"
|
|
|
|
elif not any(len(r) > 0 and r[0].isdigit() for r in res):
|
|
# TODO: check for no alias on common.
|
|
reason = "missing"
|
|
|
|
else:
|
|
for r in res:
|
|
if len(r) == 0 or not r[0].isdigit():
|
|
continue
|
|
if detect_gfw(r, ip, check):
|
|
reason = "gfw"
|
|
break
|
|
else:
|
|
reason = "okay"
|
|
|
|
assert reason is not None, (res, ip, check)
|
|
|
|
addrs = list(filter(lambda r: len(r) > 0 and r[0].isdigit(), res))
|
|
exception = res[0] if len(addrs) == 0 else None
|
|
|
|
return Entry(
|
|
date=now,
|
|
success=reason == "okay" or check.kind == "ignore",
|
|
server=ip,
|
|
kind=check.kind,
|
|
domain=check.domain,
|
|
exception=exception,
|
|
addrs=addrs,
|
|
reason=reason,
|
|
execution=opts.execution,
|
|
)
|
|
|
|
|
|
def find_failure(entries):
|
|
assert len(entries) > 0
|
|
for entry in entries:
|
|
if not entry.success:
|
|
return entry
|
|
assert False, ("no failures found:", entries)
|
|
|
|
|
|
async def try_ip(db, server_ip, checks, context, opts: Options, callback=None):
|
|
# context can be None.
|
|
from asyncio import sleep, create_task, CancelledError, BoundedSemaphore
|
|
|
|
sem = BoundedSemaphore(opts.domain_simul)
|
|
entries = []
|
|
tasks = []
|
|
success = True
|
|
|
|
async def _process(check):
|
|
nonlocal success
|
|
res = await getaddrs(server_ip, check.domain, opts.impatient, context)
|
|
entry = process_result(res, server_ip, check, opts)
|
|
if callback is not None:
|
|
for addr in entry.addrs:
|
|
callback(addr)
|
|
entries.append(entry)
|
|
if not entry.success:
|
|
if opts.early_stopping and success: # only cancel once
|
|
for task in tasks:
|
|
if not task.done() and not task.cancelled():
|
|
task.cancel()
|
|
success = False
|
|
|
|
# limit to one connection until the first check completes.
|
|
await _process(checks[0])
|
|
|
|
async def process(check):
|
|
try:
|
|
await _process(check)
|
|
finally:
|
|
sem.release()
|
|
|
|
for check in checks[1:]:
|
|
if len(tasks) > 0:
|
|
await sleep(opts.domain_wait)
|
|
# acquire now instead of within the task so
|
|
# a ton of tasks aren't created all at once.
|
|
await sem.acquire()
|
|
if not success:
|
|
break
|
|
tasks.append(create_task(process(check)))
|
|
for task in tasks:
|
|
try:
|
|
await task
|
|
except CancelledError:
|
|
pass
|
|
|
|
if not opts.dry:
|
|
for entry in entries:
|
|
db.push_entry(entry)
|
|
db.commit()
|
|
|
|
return None if success else find_failure(entries)
|
|
|
|
|
|
async def try_all_ips(db, try_me, checks, context, opts: Options, callback=None):
|
|
# context can be None.
|
|
from asyncio import create_task, sleep, BoundedSemaphore
|
|
|
|
seen, total = 0, None
|
|
sem = BoundedSemaphore(opts.ip_simul)
|
|
|
|
async def _process(ip):
|
|
nonlocal seen
|
|
seen += 1
|
|
if opts.progress:
|
|
lament(f"#{seen}: {ip}" if total is None else f"#{seen}/{total}: {ip}")
|
|
stderr.flush()
|
|
|
|
first_failure = await try_ip(db, ip, checks, context, opts, callback)
|
|
|
|
if first_failure is None:
|
|
print(ip) # all tests for this server passed; pass it along to stdout
|
|
elif opts.dry: # don't save the error anywhere; pass it along to stdout
|
|
ff = first_failure
|
|
if ff.kind in ("shock", "adware"):
|
|
# don't print sketchy domains to console in case they're clicked.
|
|
print(ip, ff.reason, ff.kind, sep="\t")
|
|
else:
|
|
print(ip, ff.reason, ff.kind, ff.domain, sep="\t")
|
|
|
|
async def process(ip):
|
|
try:
|
|
await _process(ip)
|
|
finally:
|
|
sem.release()
|
|
|
|
tasks = []
|
|
while (res := await try_me.get()) is not None:
|
|
if len(tasks) > 0:
|
|
await sleep(opts.ip_wait)
|
|
ip, total = res
|
|
# acquire now instead of within the task so
|
|
# a ton of tasks aren't created all at once.
|
|
await sem.acquire()
|
|
tasks.append(create_task(process(ip)))
|
|
for task in tasks:
|
|
await task
|
|
|
|
|
|
def sync_database(db, callback=None):
|
|
# NOTE: this no longer takes Options.
|
|
# NOTE: this is no longer async.
|
|
from .ips import china, blocks
|
|
|
|
# TODO: handle addresses that were removed from respodns.ips.china.
|
|
# i could probably just do ip.startswith("- ") and remove those.
|
|
for ips, kw in ((china, "china"), (blocks, "block_target")):
|
|
for ip in ips:
|
|
kwargs = {kw: True}
|
|
if db is not None:
|
|
db.modify_address(ip, **kwargs)
|
|
if callback is not None:
|
|
callback(ip)
|
|
|
|
|
|
async def locate_ips(db, locate_me, ipinfo):
|
|
# NOTE: this no longer takes Options.
|
|
from time import time
|
|
seen = set()
|
|
last_save = time()
|
|
while (ip := await locate_me.get()) is not None:
|
|
if ipinfo is not None and ip not in seen:
|
|
seen.add(ip)
|
|
code = await ipinfo.find_country(ip)
|
|
if db is not None:
|
|
db.modify_address(ip, country_code=code)
|
|
if time() >= last_save + 10.0: # only flush occasionally
|
|
ipinfo.flush()
|
|
last_save = time()
|
|
ipinfo.flush()
|
|
|
|
|
|
async def read_all_ips(filepaths, blocking=False, callback=None):
|
|
assert callback is not None, "that doesn't make sense!"
|
|
if blocking:
|
|
from .ip_util import read_ips
|
|
for filepath in filepaths:
|
|
f = stdin if filepath == "" else open(filepath, "r")
|
|
for ip in read_ips(f):
|
|
await callback(ip)
|
|
if f != stdin:
|
|
f.close()
|
|
else:
|
|
from .ip_util import IpReader
|
|
fps = [stdin if fp == "" else fp for fp in filepaths]
|
|
with IpReader(*fps) as reader:
|
|
async for ip in reader:
|
|
await callback(ip, reader.total)
|
|
|
|
|
|
async def main(db, filepaths, checks, ipinfo, opts: Options):
|
|
# ipinfo can be None.
|
|
from .util import RateLimiter
|
|
from asyncio import Queue, QueueFull, create_task
|
|
from queue import SimpleQueue
|
|
|
|
deferred = SimpleQueue()
|
|
locate_me = Queue()
|
|
try_me = Queue()
|
|
pps = opts.packets_per_second
|
|
context = RateLimiter(pps) if pps > 0 else None
|
|
|
|
def locate_later(ip):
|
|
try:
|
|
locate_me.put_nowait(ip)
|
|
except QueueFull:
|
|
deferred.put(ip)
|
|
|
|
seen = 0
|
|
async def try_soon(ip, total=None):
|
|
nonlocal seen
|
|
seen += 1
|
|
await try_me.put((ip, total))
|
|
|
|
sync_database(db, callback=locate_later)
|
|
|
|
reading = create_task(read_all_ips(filepaths, opts.blocking_file_io,
|
|
callback=try_soon))
|
|
trying = create_task(try_all_ips(db, try_me, checks, context, opts,
|
|
callback=locate_later))
|
|
locating = create_task(locate_ips(db, locate_me, ipinfo))
|
|
|
|
# these tasks feed each other with queues, so order them as such:
|
|
# reading -> trying -> locating
|
|
|
|
await reading
|
|
|
|
if seen == 0 and db is not None:
|
|
# no IPs were provided. refresh all the country codes instead.
|
|
all_ips = db.all_ips()
|
|
for i, ip in enumerate(all_ips):
|
|
if opts.progress:
|
|
lament(f"#{i + 1}/{len(all_ips)}: {ip}")
|
|
await locate_me.put(ip)
|
|
|
|
await try_me.put(None)
|
|
await trying
|
|
|
|
while not deferred.empty():
|
|
await locate_me.put(deferred.get())
|
|
|
|
await locate_me.put(None)
|
|
await locating
|