This commit is contained in:
Connor Olding 2020-08-29 10:16:06 +02:00
commit 3afeb5a3fd
15 changed files with 1084 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*.csv
*.db
__pycache__/

13
LICENSE Normal file
View File

@ -0,0 +1,13 @@
Copyright (c) 2020, Connor Olding
Permission to use, copy, modify, and/or distribute this software for any
purpose with or without fee is hereby granted, provided that the above
copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.

1
README.md Normal file
View File

@ -0,0 +1 @@
へい

2
requirements.txt Normal file
View File

@ -0,0 +1,2 @@
dnspython >= 2.1.0
storm

0
respodns/__init__.py Normal file
View File

208
respodns/__main__.py Normal file
View File

@ -0,0 +1,208 @@
#!/usr/bin/env python3
from argparse import ArgumentParser
from asyncio import run, sleep
from .checks import (first_checks, new_checks,
likely_checks, unlikely_checks, top100)
from .db import RespoDB
from .ips import blocks, is_bogon
from .pooler import make_simple_pooler
from .structs import Options, Entry
from .util import right_now, read_ips, getaddrs, detect_gfw
from sys import argv, stdin, stderr, exit
def process_result(res, ip, check, opts: Options):
# TODO: get more accurate times by inserting start-end into getaddrs.
now = right_now()
assert len(res) > 0
reason = None
if "Timeout" in res:
reason = "timeout"
elif check.kind.startswith("bad"):
reason = "okay" if "NXDOMAIN" in res else "redirect"
elif any(is_bogon(r) for r in res):
reason = "block"
elif any(blocked in res for blocked in blocks):
reason = "block"
elif not any(len(r) > 0 and r[0].isdigit() for r in res):
# TODO: check for no alias on common.
reason = "missing"
else:
for r in res:
if len(r) == 0 or not r[0].isdigit():
continue
if detect_gfw(r, ip, check):
reason = "gfw"
break
else:
reason = "okay"
assert reason is not None, (res, ip, check)
addrs = list(filter(lambda r: len(r) > 0 and r[0].isdigit(), res))
exception = res[0] if len(addrs) == 0 else None
return Entry(
date=now,
success=reason == "okay",
server=ip,
kind=check.kind,
domain=check.domain,
exception=exception,
addrs=addrs,
reason=reason,
execution=opts.execution,
)
async def try_ip(db, server_ip, checks, opts: Options):
entries = []
success = True
def finisher(done, pending):
nonlocal success
for task in done:
res, ip, check = task.result()
entry = process_result(res, ip, check, opts)
entries.append(entry)
if not entry.success:
if opts.early_stopping and success: # only cancel once
for pend in pending:
#print("CANCEL", file=stderr)
# FIXME: this can still, somehow, cancel the main function.
pend.cancel()
success = False
pooler = make_simple_pooler(opts.domain_simul, finisher)
async def getaddrs_wrapper(ip, check):
# NOTE: could put right_now() stuff here!
# TODO: add duration field given in milliseconds (integer)
# by subtracting start and end datetimes.
res = await getaddrs(ip, check.domain, opts)
return res, ip, check
for i, check in enumerate(checks):
first = i == 0
if not first:
await sleep(opts.domain_wait)
await pooler(getaddrs_wrapper(server_ip, check))
if first:
# limit to one connection for the first check.
await pooler()
if not success:
if opts.early_stopping or first:
break
else:
await pooler()
if not opts.dry:
for entry in entries:
db.push_entry(entry)
db.commit()
if not success:
first_failure = None
assert len(entries) > 0
for entry in entries:
#print(entry, file=stderr)
if not entry.success:
first_failure = entry
break
else:
assert 0, ("no failures found:", entries)
return server_ip, first_failure
return server_ip, None
async def main(db, filepath, checks, opts: Options):
def finisher(done, pending):
for task in done:
ip, first_failure = task.result()
if first_failure is None:
print(ip)
elif opts.dry:
ff = first_failure
if ff.kind in ("shock", "adware"):
print(ip, ff.reason, ff.kind, sep="\t")
else:
print(ip, ff.reason, ff.kind, ff.domain, sep="\t")
pooler = make_simple_pooler(opts.ip_simul, finisher)
f = stdin if filepath == "" else open(filepath, "r")
for i, ip in enumerate(read_ips(f)):
first = i == 0
if opts.progress:
print(f"#{i}: {ip}", file=stderr)
stderr.flush()
if not first:
await sleep(opts.ip_wait)
await pooler(try_ip(db, ip, checks, opts))
if f != stdin:
f.close()
await pooler()
def ui(program, args):
name = "respodns6"
parser = ArgumentParser(name,
description=name + ": test and log DNS records")
# TODO: support multiple paths. nargs="+", iterate with pooling?
parser.add_argument(
"path", metavar="file-path",
help="a path to a file containing IPv4 addresses which host DNS servers")
parser.add_argument(
"--database",
help="specify database for logging")
a = parser.parse_args(args)
checks = []
checks += first_checks
#checks += new_checks
checks += likely_checks
#checks += unlikely_checks
#checks += top100
opts = Options()
opts.dry = a.database is None
opts.early_stopping = opts.dry
if a.database is not None:
if a.database.startswith("sqlite:"):
uri = a.database
else:
uri = "sqlite:///" + a.database
def runwrap(db, debug=False):
if debug:
import logging
logging.basicConfig(level=logging.DEBUG)
run(main(db, a.path, checks, opts), debug=True)
else:
run(main(db, a.path, checks, opts))
if opts.dry:
runwrap(None)
else:
# log to a database.
db = RespoDB(uri, create=True)
with db: # TODO: .open and .close methods for manual invocation.
with db.execution as execution: # TODO: clean up this interface.
opts.execution = execution
runwrap(db)
if __name__ == "__main__":
if len(argv) == 0:
print("You've met with a terrible fate.", file=stderr)
ret = 2
else:
ret = ui(argv[0], argv[1:])
if ret is not None:
exit(ret)

114
respodns/checks.py Normal file
View File

@ -0,0 +1,114 @@
from collections import namedtuple
from .nonsense import nonsense_consistent
from .top1m import retrieve_top1m_entries
rot13_mapping = {}
for a, b, c, d in zip("anAN05", "mzMZ49", "naNA50", "zmZM94"):
rot13_mapping.update(dict((chr(k), chr(v))
for k, v in zip(range(ord(a), ord(b) + 1),
range(ord(c), ord(d) + 1))))
def rot13(s):
return "".join(rot13_mapping.get(c, c) for c in s)
def concat_nonsense(domain):
return nonsense_consistent(domain) + "." + domain
def head(n, it):
res = []
try:
while len(res) < n:
res.append(next(it))
except StopIteration:
pass
return res
Check = namedtuple("Check", ("kind", "domain"))
first_checks = [
Check("common", "baidu.com"), # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html
]
new_checks = [
# via dnsvalidator
Check("adtrack", "bet365.com"),
Check("common", "facebook.com"),
Check("common", "google.com"),
Check("common", "paypal.com"),
Check("common", "wikileaks.com"),
Check("news", "telegram.com"),
]
likely_checks = [
# these checks are, in practice, the most likely to weed out unwanted DNS servers.
Check("news", "huanqiu.com"),
Check("adware", rot13("nqf789.pbz")),
Check("shock", rot13("tbng.pk")), # some servers block shock sites, which isn't a terrible idea, but it's inaccurate
Check("porn", "pornhub.com"), # some servers block piracy and porn sites for being taboo, or whatever
Check("adtrack", "media.fastclick.net"),
Check("parking", "scmp.org"), # dns.watch fails here: domain parking is evil, but servers must abide
Check("usercontent","4chan.org"), # some servers block sites driven by their chaotic user-created content
Check("bad", concat_nonsense("com")), # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), # blogspot handles these strangely; DNS servers likewise
# NOTE: disabled for being wildly inconsistent:
# Check("weird", "archive.is"), # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
Check("common", "archive.org"),
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), # one of the WannaCry sinkholes, kinda important that it resolves
Check("common", "xn--b1aew.xn--p1ai"), # just to test internationalized domains
Check("common", "wikileaks.com"),
Check("uncommon", "cybre.space"), # also doubles as a check for the new TLDs
Check("piracy", "thehiddenbay.org"), # some servers block piracy and porn sites for being taboo, or whatever
Check("infosec", "scan.shadowserver.org"), # some servers block this: https://scan.shadowserver.org/dns/
Check("common", "duckduckgo.com"), # a few servers block this for some reason?
Check("badsub", concat_nonsense("google.com")), # poisoning may yield an unwanted result here
Check("common", "en.wikipedia.org"),
Check("adtrack", "google-analytics.com"),
Check("adtrack", "ad.doubleclick.net"),
# baidu goes here...?
Check("common", "naver.com"),
Check("common", "google.com"), # surely a fully-functional server would resolve the most popular domain in existence
]
unlikely_checks = [
Check("piracy", "thepiratebay.org"),
Check("porn", "xvideos.com"),
Check("usercontent","imgur.com"),
Check("usercontent","twitter.com"),
Check("usercontent","weibo.com"),
Check("usercontent","github.com"),
Check("porn", "chaturbate.com"),
Check("video", "bilibili.com"),
Check("video", "twitch.tv"),
Check("common", "qq.com"),
Check("video", "netflix.com"),
Check("usercontent","reddit.com"),
Check("usercontent","facebook.com"),
Check("video", "youtube.com"),
Check("usercontent","tumblr.com"),
Check("usercontent","wordpress.com"),
Check("common", "tmall.com"),
Check("usercontent","instagram.com"),
Check("news", "nytimes.com"),
Check("usercontent","flickr.com"),
Check("common", "ebay.com"),
Check("news", "scmp.com"),
Check("common", "aliexpress.com"),
Check("common", "stackoverflow.com"),
]
defunct = [
"panda.tv", # imochen.github.io
]
def _top1m_gen():
return (Check("top", entry)
for i, entry in retrieve_top1m_entries()
if entry not in defunct)
top100 = head(100, _top1m_gen())
top1000 = head(1000, _top1m_gen())
#__all__ = [
# "first_checks", "new_checks", "likely_checks", "unlikely_checks", "top100",
# "defunct",
#]

434
respodns/db.py Normal file
View File

@ -0,0 +1,434 @@
import storm.locals as rain
import re
ipv4_pattern = re.compile("(\d+)\.(\d+)\.(\d+)\.(\d+)", re.ASCII)
def addr_to_int(ip):
match = ipv4_pattern.fullmatch(ip)
assert match is not None, row
segs = list(map(int, match.group(1, 2, 3, 4)))
assert all(0 <= seg <= 255 for seg in segs), match.group(0)
numeric = segs[0] << 24 | segs[1] << 16 | segs[2] << 8 | segs[3]
return numeric
create_table_statements = dict(
# TODO: Duration REAL GENERATED ALWAYS AS etc.?
executions="""
CREATE TABLE IF NOT EXISTS Executions (
ExecutionId INTEGER PRIMARY KEY,
StartDate DATE NOT NULL,
FinishDate DATE,
Completed BOOLEAN DEFAULT 0 NOT NULL)
""",
exceptions="""
CREATE TABLE IF NOT EXISTS Exceptions (
ExceptionId INTEGER PRIMARY KEY,
Name TEXT NOT NULL,
Fail BOOLEAN NOT NULL)
""",
ips="""
CREATE TABLE IF NOT EXISTS Ips (
IpId INTEGER PRIMARY KEY,
AsStr TEXT GENERATED ALWAYS AS (
Cast(AsInt >> 24 & 255 AS TEXT) || '.' ||
Cast(AsInt >> 16 & 255 AS TEXT) || '.' ||
Cast(AsInt >> 8 & 255 AS TEXT) || '.' ||
Cast(AsInt & 255 AS TEXT)
) STORED NOT NULL,
AsInt INTEGER UNIQUE CHECK(AsInt >= 0 AND AsInt < 1 << 32) NOT NULL,
China BOOLEAN DEFAULT 0 NOT NULL,
BlockTarget BOOLEAN DEFAULT 0 NOT NULL,
Server BOOLEAN DEFAULT 0 NOT NULL,
RedirectTarget BOOLEAN DEFAULT 0 NOT NULL,
GfwTarget BOOLEAN DEFAULT 0 NOT NULL)
""",
kinds="""
CREATE TABLE IF NOT EXISTS Kinds (
KindId INTEGER PRIMARY KEY,
Name TEXT UNIQUE NOT NULL,
ExpectExceptionId INTEGER,
FOREIGN KEY(ExpectExceptionId) REFERENCES Exceptions(ExceptionId))
""",
domains="""
CREATE TABLE IF NOT EXISTS Domains (
DomainId INTEGER PRIMARY KEY,
Name TEXT UNIQUE NOT NULL,
KindId INTEGER,
FOREIGN KEY(KindId) REFERENCES Kinds(KindId))
""",
# NOTE: that RecordId is *not* the rowid here
# since records can contain multiple IPs,
# and thereby span multiple rows.
# TODO: indexing stuff, cascade deletion stuff.
records="""
CREATE TABLE IF NOT EXISTS Records (
RecordId INTEGER NOT NULL,
IpId INTEGER,
FOREIGN KEY(IpId) REFERENCES Ips(IpId))
""",
messages="""
CREATE TABLE IF NOT EXISTS Messages (
MessageId INTEGER PRIMARY KEY,
ExecutionId INTEGER,
ServerId INTEGER NOT NULL,
DomainId INTEGER NOT NULL,
RecordId INTEGER,
ExceptionId INTEGER,
FOREIGN KEY(ServerId) REFERENCES Ips(IpId),
FOREIGN KEY(ExecutionId) REFERENCES Executions(ExecutionId),
FOREIGN KEY(DomainId) REFERENCES Domains(DomainId),
FOREIGN KEY(ExceptionId) REFERENCES Exceptions(ExceptionId))
""",
# this fails because RecordId is not UNIQUE:
# FOREIGN KEY(RecordId) REFERENCES Records(RecordId)
)
create_view_statements = [
"""
CREATE VIEW Results AS
SELECT
Messages.ExecutionId,
ServerIps.AsStr as Server,
Kinds.Name as Kind,
Domains.Name as Name,
RecordIps.AsStr as Address,
Exceptions.Name as Exception
FROM Messages
LEFT JOIN Domains ON Messages.DomainId = Domains.DomainId
LEFT JOIN Kinds ON Domains.KindId = Kinds.KindId
LEFT JOIN Ips AS ServerIps ON Messages.ServerId = ServerIps.IpId
LEFT JOIN Records ON Messages.RecordId = Records.RecordId
LEFT JOIN Ips as RecordIps ON Records.IpId = RecordIps.IpId
LEFT JOIN Exceptions ON Messages.ExceptionId = Exceptions.ExceptionId
-- GROUP BY Records.IpId
""",
]
table_triggers = dict(
messages=[
# TODO: more triggers. (before update, and also for Records table)
"""
CREATE TRIGGER IF NOT EXISTS RecordExists
BEFORE INSERT
ON Messages
BEGIN
SELECT CASE
WHEN NEW.RecordId NOTNULL AND NOT EXISTS(SELECT 1 FROM Records WHERE Records.RecordID = NEW.RecordId)
THEN raise(FAIL, "RecordId does not exist")
END;
END
""",
])
class Execution:
def __init__(self, db):
self.db = db
self.execution = None
def __enter__(self):
from .util import right_now
self.execution = self.db.start_execution(right_now())
return self.execution
def __exit__(self, exc_type, exc_value, traceback):
from .util import right_now
completed = exc_type is None
self.db.finish_execution(self.execution, right_now(), completed)
class AttrCheck:
"""
Inheriting AttrCheck prevents accidentally setting attributes
that don't already exist.
"""
def __setattr__(self, name, value):
# NOTE: hasattr doesn't do what we want here. dir does.
if name.startswith("_") or name in dir(self):
super().__setattr__(name, value)
else:
raise AttributeError(name)
class TException(rain.Storm, AttrCheck):
__storm_table__ = "Exceptions"
exception_id = rain.Int("ExceptionId", primary=True)
name = rain.Unicode("Name")
fail = rain.Bool("Fail")
class TExecution(rain.Storm, AttrCheck):
__storm_table__ = "Executions"
execution_id = rain.Int("ExecutionId", primary=True)
start_date = rain.DateTime("StartDate")
finish_date = rain.DateTime("FinishDate")
completed = rain.Bool("Completed")
class TAddress(rain.Storm, AttrCheck):
__storm_table__ = "Ips"
address_id = rain.Int("IpId", primary=True)
str = rain.Unicode("AsStr")
ip = rain.Int("AsInt")
china = rain.Bool("China")
block_target = rain.Bool("BlockTarget")
server = rain.Bool("Server")
redirect_target = rain.Bool("RedirectTarget")
gfw_target = rain.Bool("GfwTarget")
class TKind(rain.Storm, AttrCheck):
__storm_table__ = "Kinds"
kind_id = rain.Int("KindId", primary=True)
name = rain.Unicode("Name")
xxid = rain.Int("ExpectExceptionId")
exception = rain.Reference(xxid, "TException.exception_id")
class TDomain(rain.Storm, AttrCheck):
__storm_table__ = "Domains"
domain_id = rain.Int("DomainId", primary=True)
name = rain.Unicode("Name")
kind_id = rain.Int("KindId")
kind = rain.Reference(kind_id, "TKind.kind_id")
class TRecord(rain.Storm, AttrCheck):
__storm_table__ = "Records"
row_id = rain.Int("rowid", primary=True)
record_id = rain.Int("RecordId")
address_id = rain.Int("IpId")
address = rain.Reference(address_id, "TAddress.address_id")
class TMessage(rain.Storm, AttrCheck):
__storm_table__ = "Messages"
message_id = rain.Int("MessageId", primary=True)
execution_id = rain.Int("ExecutionId")
server_id = rain.Int("ServerId")
domain_id = rain.Int("DomainId")
record_id = rain.Int("RecordId")
exception_id = rain.Int("ExceptionId")
execution = rain.Reference(execution_id, "TExecution.execution_id")
server = rain.Reference(server_id, "TAddress.address_id")
domain = rain.Reference(domain_id, "TDomain.domain_id")
#record = rain.Reference(record_id, "TRecord.record_id")
exception = rain.Reference(exception_id, "TException.exception_id")
def apply_properties(obj, d):
from storm.properties import PropertyColumn
for k, v in d.items():
ref = getattr(obj.__class__, k)
assert ref is not None, (type(obj), k)
assert isinstance(ref, PropertyColumn) or isinstance(ref, rain.Reference), \
(type(obj), k)
setattr(obj, k, v)
return obj
class RespoDB:
def __init__(self, uri, setup=False, create=False):
self.uri = uri
db_exists = self._db_exists(self.uri)
self.db = rain.create_database(self.uri)
self._conn = None
if setup or (create and not db_exists):
with self:
self.setup_executions()
self.setup_exceptions()
self.setup_ips()
self.setup_kinds()
self.setup_domains()
self.setup_records()
self.setup_messages()
for q in create_view_statements:
self._conn.execute(q, noresult=True)
assert setup or create or db_exists, "database was never setup"
self.execution = Execution(self)
@staticmethod
def _db_exists(uri):
from os.path import exists
_, _, fp = uri.partition(":")
if fp.startswith("//"):
_, _, fp = fp[2:].partition("/")
return fp and exists(fp)
def __enter__(self):
self._conn = rain.Store(self.db)
return self
def __exit__(self, exc_type, exc_value, traceback):
self.commit()
self._conn.close()
self._conn = None
def find_one(self, cls_spec, *args, **kwargs):
assert self._conn is not None
return self._conn.find(cls_spec, *args, **kwargs).one()
def flush(self):
assert self._conn is not None
self._conn.flush()
def commit(self):
assert self._conn is not None
self._conn.commit()
def new_exception(self, **kwargs):
assert self._conn is not None
return self._conn.add(apply_properties(TException(), kwargs))
def new_kind(self, **kwargs):
assert self._conn is not None
return self._conn.add(apply_properties(TKind(), kwargs))
def new_domain(self, **kwargs):
assert self._conn is not None
return self._conn.add(apply_properties(TDomain(), kwargs))
def new_address(self, **kwargs):
assert self._conn is not None
return self._conn.add(apply_properties(TAddress(), kwargs))
def new_record(self, **kwargs):
assert self._conn is not None
return self._conn.add(apply_properties(TRecord(), kwargs))
def new_message(self, **kwargs):
assert self._conn is not None
return self._conn.add(apply_properties(TMessage(), kwargs))
def setup_executions(self):
self._conn.execute(create_table_statements["executions"], noresult=True)
def setup_exceptions(self):
# careful not to call them "errors" since NXDOMAIN is not an error.
self._conn.execute(create_table_statements["exceptions"], noresult=True)
# TODO: upsert?
self.new_exception(name="NXDOMAIN", fail=False)
self.new_exception(name="NoAnswer", fail=True)
self.new_exception(name="NoNameservers", fail=True)
self.new_exception(name="Timeout", fail=True)
def setup_ips(self):
from .ips import china, blocks
self._conn.execute(create_table_statements["ips"], noresult=True)
# TODO: upsert?
self.new_address(ip=addr_to_int("0.0.0.0"), block_target=True)
self.new_address(ip=addr_to_int("127.0.0.1"), block_target=True)
for ip in china:
self.new_address(ip=addr_to_int(ip), china=True)
for ip in blocks:
self.new_address(ip=addr_to_int(ip), block_target=True)
def setup_kinds(self):
self._conn.execute(create_table_statements["kinds"], noresult=True)
# TODO: upsert?
#NXDOMAIN = self.find_one(TException, TException.name == "NXDOMAIN")
#self.new_kind(name="bad", exception=NXDOMAIN)
#self.new_kind(name="badsub", exception=NXDOMAIN)
def setup_domains(self):
self._conn.execute(create_table_statements["domains"], noresult=True)
def setup_records(self):
self._conn.execute(create_table_statements["records"], noresult=True)
def setup_messages(self):
self._conn.execute(create_table_statements["messages"], noresult=True)
for trig in table_triggers["messages"]:
self._conn.execute(trig)
def start_execution(self, dt):
execution = TExecution()
execution.start_date = dt
self.flush()
return execution
def finish_execution(self, execution, dt, completed):
# TODO: fail if ExecutionId is missing?
execution.finish_date = dt
execution.completed = completed
self.flush()
def next_record_id(self):
from storm.expr import Add, Max, Coalesce
expr = Add(Coalesce(Max(TRecord.record_id), 0), 1)
return self.find_one(expr)
def find_record_id(self, addresses):
address_ids = list(address.address_id for address in addresses)
record_ids = list(self._conn.find(TRecord, TRecord.address_id.is_in(address_ids)).values(TRecord.record_id))
if not record_ids:
return None
unique_ids = sorted(set(record_ids))
for needle in unique_ids:
if sum(1 for id in record_ids if id == needle) == len(addresses):
found = True
return needle
return None
def push_entry(self, entry):
kind = self.find_one(TKind, TKind.name == entry.kind)
if not kind:
kind = self.new_kind(name=entry.kind)
if entry.kind.startswith("bad"):
exception = self.find_one(TException, TException.name == "NXDOMAIN")
assert exception is not None
kind.exception = exception
domain = self.find_one(TDomain, TDomain.name == entry.domain)
if not domain:
domain = self.new_domain(name=entry.domain)
domain.kind = kind
addresses = []
as_ints = sorted(set(map(addr_to_int, entry.addrs)))
for numeric in as_ints:
address = self.find_one(TAddress, TAddress.ip == numeric)
if not address:
address = self.new_address(ip=numeric)
addresses.append(address)
for address in addresses:
if entry.reason == "block":
address.block_target = True
elif entry.reason == "redirect":
address.redirect_target = True
elif entry.reason == "gfw":
address.gfw_target = True
if addresses:
record_id = self.find_record_id(addresses)
if record_id is None:
record_id = self.next_record_id()
for address in addresses:
self.new_record(record_id=record_id, address=address)
else:
record_id = None
numeric = addr_to_int(entry.server)
server = self.find_one(TAddress, TAddress.ip == numeric)
if not server:
server = self.new_address(ip=numeric)
self.flush()
server.server = True
if entry.exception:
exception = self.find_one(TException, TException.name == entry.exception)
assert exception is not None
else:
exception = None
message = self.new_message(
execution=entry.execution,
server=server, domain=domain,
record_id=record_id, exception=exception)
self.flush()

67
respodns/ips.py Normal file
View File

@ -0,0 +1,67 @@
# known IPs of any DNS located in China:
china = {
"1.1.8.8",
"1.1.8.9",
"1.2.4.8",
"1.8.1.8",
"1.8.8.8",
"114.254.201.131",
"218.107.55.108",
"222.216.2.236",
}
# known IPs (not servers) that are used to deny access to websites:
blocks = {
"1.2.3.4", # timeout
"54.242.237.204", # fake
"93.158.134.250", # fake
"114.6.128.8", # fake
"118.97.116.27", # fake
"119.235.29.59", # fake
"124.40.255.99", # fake
"146.112.61.106", # fake
"156.154.113.17", # fake
"156.154.175.30", # fake
"156.154.175.215", # fake
"156.154.175.216", # fake
"156.154.175.221", # fake
"163.28.10.160", # fake
"175.139.142.25", # fake
"176.103.130.135", # fake
"182.93.64.126", # fake
"192.99.140.48", # fake
"195.175.254.2", # fake
"202.40.187.91", # fake
"202.162.209.133", # fake
"202.165.36.253", # fake
"203.119.13.75", # fake
"203.119.13.76", # fake
"203.190.55.217", # fake
}
bogon_checks = [
"0.",
"10.",
"127.",
"169.254.",
"192.0.0.",
"192.0.2.",
"192.168.",
"198.18.",
"198.19.",
"198.51.100.",
"203.0.113.",
] + [
"100.{}.".format(i) for i in range(64, 128)
] + [
"172.{}.".format(i) for i in range(16, 32)
] + [
"{}.".format(i) for i in range(224, 256)
]
def is_bogon(ip):
return any(ip.startswith(check) for check in bogon_checks)
def ipkey(ip_string):
segs = [int(s) for s in ip_string.replace(":", ".").split(".")]
return sum(256**(3 - i) * seg for i, seg in enumerate(segs))

8
respodns/nonsense.py Normal file
View File

@ -0,0 +1,8 @@
from random import choice, choices, Random
from string import ascii_lowercase
from zlib import crc32
def nonsense_consistent(domain):
rng = Random(crc32(domain.encode("utf-8")))
length = rng.choices((9, 10, 11, 12), (4, 5, 3, 2))[0]
return "".join(rng.choice(ascii_lowercase) for i in range(length))

48
respodns/pooler.py Normal file
View File

@ -0,0 +1,48 @@
from types import CoroutineType
import asyncio
# TODO: write a less confusing interface that allows the code to be written more flatly.
# maybe like: async for done in apply(doit, [tuple_of_args]):
def make_pooler(pool_size, finisher=None):
aws = set()
async def pooler(item=None):
nonlocal aws
finish = item is None
if not finish:
if isinstance(item, CoroutineType):
assert not isinstance(item, asyncio.Task)
item = asyncio.create_task(item)
aws.add(item)
# TODO: don't wait until all completed, just first completed in loop.
# that way we can handle each done task ASAP.
condition = asyncio.ALL_COMPLETED if finish else asyncio.FIRST_COMPLETED
if len(aws) == 0:
return None
if finish or len(aws) >= pool_size:
done, pending = await asyncio.wait(aws, return_when=condition)
#pending = set(task for task in pending if task in aws) # ???
ret = None if finisher is None else finisher(done, pending)
#aws = set(task for task in pending if not task.cancelled())
aws = pending
if ret is not None:
return ret
return None
return pooler
def make_simple_pooler(pool_size, finisher=None):
condition = asyncio.FIRST_COMPLETED
pending = set()
async def pooler(item=None):
nonlocal pending
finish = item is None
if not finish:
if isinstance(item, CoroutineType):
assert not isinstance(item, asyncio.Task)
item = asyncio.create_task(item)
pending.add(item)
desired_size = 0 if finish else pool_size - 1
while len(pending) > desired_size:
done, pending = await asyncio.wait(pending, return_when=condition)
finisher(done, pending)
return pooler

32
respodns/structs.py Normal file
View File

@ -0,0 +1,32 @@
from dataclasses import dataclass
@dataclass
class Options:
#exec_id: int = -1
execution: object = None
ip_simul: int = 10 # how many IPs to connect to at once
domain_simul: int = 3 # how many domains per IP to request at once
ip_wait: float = 0.15
domain_wait: float = 0.25
impatient: bool = False # reduce retries and times for timeouts
early_stopping: bool = True # stop at the first invalid domain (best with dry)
dry: bool = True # don't write anything to database
progress: bool = False
@dataclass
class Entry:
from datetime import datetime
date: datetime
success: bool
server: str
kind: str
domain: str
exception: str
addrs: list # list of strings
reason: str
#exec_id: int
execution: object

52
respodns/top1m.py Normal file
View File

@ -0,0 +1,52 @@
urltop_default = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
csvfn_default = "top-1m.csv"
one_week = 7 * 24 * 60 * 60 # in seconds
def alive(fp, expire):
from os.path import exists, getmtime, getsize
from time import time
return exists(fp) and time() < getmtime(fp) + one_week and getsize(fp) > 2
def download_top1m(urltop=None, csvfn=None):
from io import BytesIO
from urllib.request import urlopen
from zipfile import ZipFile
if urltop is None:
urltop = urltop_default
if csvfn is None:
csvfn = csvfn_default
comp = BytesIO()
with urlopen(urltop) as re:
comp.write(re.read())
with ZipFile(comp) as zipf:
with zipf.open(csvfn) as f:
uncomp = f.read()
comp.close()
return uncomp.decode("utf-8")
def retrieve_top1m_entries(csv_fp="top-1m.csv"):
from sys import stderr
if alive(csv_fp, one_week):
with open(csv_fp, "r") as f:
uncomp = f.read()
else:
print("downloading", csv_fp, file=stderr)
uncomp = download_top1m()
with open(csv_fp, "w") as f:
f.write(uncomp)
# we could use the csv module, but this is totally overkill
# for data that *should* be just a subset of ascii.
lines = uncomp.splitlines()
entries = [(lambda a: (int(a[0]), a[2]))(line.partition(","))
for line in lines]
return entries
top1m = download_top1m

71
respodns/util.py Normal file
View File

@ -0,0 +1,71 @@
def right_now():
from datetime import datetime, timezone
return datetime.now(timezone.utc)
def detect_gfw(r, ip, check):
# attempt to detect interference from the Great Firewall of China.
#from .ips import china
#if r in china: return True
# class D or class E, neither of which are correct for a (public?) DNS.
#if int(r.partition(".")[0]) >= 224: return True
rs = lambda prefix: r.startswith(prefix)
de = lambda suffix: check.domain.endswith(suffix)
hosted = de("facebook.com") or de("instagram.com") or de("whatsapp.com")
if rs("31.13.") and not hosted: return True
if rs("66.220."): return True
if rs("69.63."): return True
if rs("69.171.") and not rs("69.171.250."): return True
if rs("74.86."): return True
if rs("75.126."): return True
if r == "64.13.192.74": return True
# more non-facebook GFW stuff:
# 31.13.64.33
# 31.13.70.1
# 31.13.70.20
# 31.13.76.16
# 31.13.86.1
# 173.252.110.21
# 192.99.140.48
# 199.16.156.40
# 199.16.158.190
return False
async def getaddrs(server, domain, opts):
from dns.asyncresolver import Resolver
from dns.exception import Timeout
from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers
#from dns.resolver import Resolver
from .ips import ipkey
res = Resolver(configure=False)
if opts.impatient:
res.timeout = 5
res.lifetime = 2
res.nameservers = [server]
try:
#ans = res.resolve(domain, "A", search=False)
ans = await res.resolve(domain, "A", search=False)
except NXDOMAIN:
return ["NXDOMAIN"]
except NoAnswer:
return ["NoAnswer"]
except NoNameservers:
return ["NoNameservers"]
except Timeout:
return ["Timeout"]
#return list(set(rr.address for rr in ans.rrset))
return sorted(set(rr.address for rr in ans.rrset), key=ipkey)
def read_ips(f):
# TODO: make async and more robust. (regex pls)
# TODO: does readlines() block if the pipe is left open i.e. user input?
for ip in f.readlines():
if "#" in ip:
ip, _, _ = ip.partition("#")
ip = ip.strip()
if ip.count(".") != 3:
continue
yield ip

31
setup.py Normal file
View File

@ -0,0 +1,31 @@
from setuptools import setup
setup(
name='respodns',
version='0.1.0',
packages=[
'respodns',
],
author='notwa',
author_email='cloningdonor+pypi@gmail.com',
url='https://github.com/notwa/respodns',
keywords='TODO',
description='DNS logger',
license='MIT',
zip_safe=True,
classifiers=[
'Development Status :: 4 - Beta',
'Environment :: Console',
'Intended Audience :: System Administrators',
'License :: OSI Approved :: ISC License (ISCL)',
'Natural Language :: English',
'Operating System :: OS Independent',
'Programming Language :: Python :: 3.7',
'Programming Language :: SQL',
'Topic :: Internet :: Name Service (DNS)',
'Topic :: System :: Logging',
'Topic :: Utilities',
]
)