init
This commit is contained in:
commit
3afeb5a3fd
15 changed files with 1084 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
*.csv
|
||||
*.db
|
||||
__pycache__/
|
13
LICENSE
Normal file
13
LICENSE
Normal file
|
@ -0,0 +1,13 @@
|
|||
Copyright (c) 2020, Connor Olding
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any
|
||||
purpose with or without fee is hereby granted, provided that the above
|
||||
copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
1
README.md
Normal file
1
README.md
Normal file
|
@ -0,0 +1 @@
|
|||
へい
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
|||
dnspython >= 2.1.0
|
||||
storm
|
0
respodns/__init__.py
Normal file
0
respodns/__init__.py
Normal file
208
respodns/__main__.py
Normal file
208
respodns/__main__.py
Normal file
|
@ -0,0 +1,208 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
from argparse import ArgumentParser
|
||||
from asyncio import run, sleep
|
||||
from .checks import (first_checks, new_checks,
|
||||
likely_checks, unlikely_checks, top100)
|
||||
from .db import RespoDB
|
||||
from .ips import blocks, is_bogon
|
||||
from .pooler import make_simple_pooler
|
||||
from .structs import Options, Entry
|
||||
from .util import right_now, read_ips, getaddrs, detect_gfw
|
||||
from sys import argv, stdin, stderr, exit
|
||||
|
||||
def process_result(res, ip, check, opts: Options):
|
||||
# TODO: get more accurate times by inserting start-end into getaddrs.
|
||||
now = right_now()
|
||||
assert len(res) > 0
|
||||
reason = None
|
||||
|
||||
if "Timeout" in res:
|
||||
reason = "timeout"
|
||||
|
||||
elif check.kind.startswith("bad"):
|
||||
reason = "okay" if "NXDOMAIN" in res else "redirect"
|
||||
|
||||
elif any(is_bogon(r) for r in res):
|
||||
reason = "block"
|
||||
elif any(blocked in res for blocked in blocks):
|
||||
reason = "block"
|
||||
|
||||
elif not any(len(r) > 0 and r[0].isdigit() for r in res):
|
||||
# TODO: check for no alias on common.
|
||||
reason = "missing"
|
||||
|
||||
else:
|
||||
for r in res:
|
||||
if len(r) == 0 or not r[0].isdigit():
|
||||
continue
|
||||
if detect_gfw(r, ip, check):
|
||||
reason = "gfw"
|
||||
break
|
||||
else:
|
||||
reason = "okay"
|
||||
|
||||
assert reason is not None, (res, ip, check)
|
||||
|
||||
addrs = list(filter(lambda r: len(r) > 0 and r[0].isdigit(), res))
|
||||
exception = res[0] if len(addrs) == 0 else None
|
||||
|
||||
return Entry(
|
||||
date=now,
|
||||
success=reason == "okay",
|
||||
server=ip,
|
||||
kind=check.kind,
|
||||
domain=check.domain,
|
||||
exception=exception,
|
||||
addrs=addrs,
|
||||
reason=reason,
|
||||
execution=opts.execution,
|
||||
)
|
||||
|
||||
async def try_ip(db, server_ip, checks, opts: Options):
|
||||
entries = []
|
||||
|
||||
success = True
|
||||
def finisher(done, pending):
|
||||
nonlocal success
|
||||
for task in done:
|
||||
res, ip, check = task.result()
|
||||
entry = process_result(res, ip, check, opts)
|
||||
entries.append(entry)
|
||||
if not entry.success:
|
||||
if opts.early_stopping and success: # only cancel once
|
||||
for pend in pending:
|
||||
#print("CANCEL", file=stderr)
|
||||
# FIXME: this can still, somehow, cancel the main function.
|
||||
pend.cancel()
|
||||
success = False
|
||||
|
||||
pooler = make_simple_pooler(opts.domain_simul, finisher)
|
||||
|
||||
async def getaddrs_wrapper(ip, check):
|
||||
# NOTE: could put right_now() stuff here!
|
||||
# TODO: add duration field given in milliseconds (integer)
|
||||
# by subtracting start and end datetimes.
|
||||
res = await getaddrs(ip, check.domain, opts)
|
||||
return res, ip, check
|
||||
|
||||
for i, check in enumerate(checks):
|
||||
first = i == 0
|
||||
if not first:
|
||||
await sleep(opts.domain_wait)
|
||||
await pooler(getaddrs_wrapper(server_ip, check))
|
||||
if first:
|
||||
# limit to one connection for the first check.
|
||||
await pooler()
|
||||
if not success:
|
||||
if opts.early_stopping or first:
|
||||
break
|
||||
else:
|
||||
await pooler()
|
||||
|
||||
if not opts.dry:
|
||||
for entry in entries:
|
||||
db.push_entry(entry)
|
||||
db.commit()
|
||||
|
||||
if not success:
|
||||
first_failure = None
|
||||
assert len(entries) > 0
|
||||
for entry in entries:
|
||||
#print(entry, file=stderr)
|
||||
if not entry.success:
|
||||
first_failure = entry
|
||||
break
|
||||
else:
|
||||
assert 0, ("no failures found:", entries)
|
||||
return server_ip, first_failure
|
||||
return server_ip, None
|
||||
|
||||
async def main(db, filepath, checks, opts: Options):
|
||||
def finisher(done, pending):
|
||||
for task in done:
|
||||
ip, first_failure = task.result()
|
||||
if first_failure is None:
|
||||
print(ip)
|
||||
elif opts.dry:
|
||||
ff = first_failure
|
||||
if ff.kind in ("shock", "adware"):
|
||||
print(ip, ff.reason, ff.kind, sep="\t")
|
||||
else:
|
||||
print(ip, ff.reason, ff.kind, ff.domain, sep="\t")
|
||||
|
||||
pooler = make_simple_pooler(opts.ip_simul, finisher)
|
||||
|
||||
f = stdin if filepath == "" else open(filepath, "r")
|
||||
for i, ip in enumerate(read_ips(f)):
|
||||
first = i == 0
|
||||
if opts.progress:
|
||||
print(f"#{i}: {ip}", file=stderr)
|
||||
stderr.flush()
|
||||
if not first:
|
||||
await sleep(opts.ip_wait)
|
||||
await pooler(try_ip(db, ip, checks, opts))
|
||||
if f != stdin:
|
||||
f.close()
|
||||
|
||||
await pooler()
|
||||
|
||||
def ui(program, args):
|
||||
name = "respodns6"
|
||||
parser = ArgumentParser(name,
|
||||
description=name + ": test and log DNS records")
|
||||
|
||||
# TODO: support multiple paths. nargs="+", iterate with pooling?
|
||||
parser.add_argument(
|
||||
"path", metavar="file-path",
|
||||
help="a path to a file containing IPv4 addresses which host DNS servers")
|
||||
|
||||
parser.add_argument(
|
||||
"--database",
|
||||
help="specify database for logging")
|
||||
|
||||
a = parser.parse_args(args)
|
||||
|
||||
checks = []
|
||||
checks += first_checks
|
||||
#checks += new_checks
|
||||
checks += likely_checks
|
||||
#checks += unlikely_checks
|
||||
#checks += top100
|
||||
|
||||
opts = Options()
|
||||
opts.dry = a.database is None
|
||||
opts.early_stopping = opts.dry
|
||||
|
||||
if a.database is not None:
|
||||
if a.database.startswith("sqlite:"):
|
||||
uri = a.database
|
||||
else:
|
||||
uri = "sqlite:///" + a.database
|
||||
|
||||
def runwrap(db, debug=False):
|
||||
if debug:
|
||||
import logging
|
||||
logging.basicConfig(level=logging.DEBUG)
|
||||
run(main(db, a.path, checks, opts), debug=True)
|
||||
else:
|
||||
run(main(db, a.path, checks, opts))
|
||||
|
||||
if opts.dry:
|
||||
runwrap(None)
|
||||
else:
|
||||
# log to a database.
|
||||
db = RespoDB(uri, create=True)
|
||||
with db: # TODO: .open and .close methods for manual invocation.
|
||||
with db.execution as execution: # TODO: clean up this interface.
|
||||
opts.execution = execution
|
||||
runwrap(db)
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(argv) == 0:
|
||||
print("You've met with a terrible fate.", file=stderr)
|
||||
ret = 2
|
||||
else:
|
||||
ret = ui(argv[0], argv[1:])
|
||||
if ret is not None:
|
||||
exit(ret)
|
114
respodns/checks.py
Normal file
114
respodns/checks.py
Normal file
|
@ -0,0 +1,114 @@
|
|||
from collections import namedtuple
|
||||
from .nonsense import nonsense_consistent
|
||||
from .top1m import retrieve_top1m_entries
|
||||
|
||||
rot13_mapping = {}
|
||||
for a, b, c, d in zip("anAN05", "mzMZ49", "naNA50", "zmZM94"):
|
||||
rot13_mapping.update(dict((chr(k), chr(v))
|
||||
for k, v in zip(range(ord(a), ord(b) + 1),
|
||||
range(ord(c), ord(d) + 1))))
|
||||
|
||||
def rot13(s):
|
||||
return "".join(rot13_mapping.get(c, c) for c in s)
|
||||
|
||||
def concat_nonsense(domain):
|
||||
return nonsense_consistent(domain) + "." + domain
|
||||
|
||||
def head(n, it):
|
||||
res = []
|
||||
try:
|
||||
while len(res) < n:
|
||||
res.append(next(it))
|
||||
except StopIteration:
|
||||
pass
|
||||
return res
|
||||
|
||||
Check = namedtuple("Check", ("kind", "domain"))
|
||||
|
||||
first_checks = [
|
||||
Check("common", "baidu.com"), # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html
|
||||
]
|
||||
|
||||
new_checks = [
|
||||
# via dnsvalidator
|
||||
Check("adtrack", "bet365.com"),
|
||||
Check("common", "facebook.com"),
|
||||
Check("common", "google.com"),
|
||||
Check("common", "paypal.com"),
|
||||
Check("common", "wikileaks.com"),
|
||||
Check("news", "telegram.com"),
|
||||
]
|
||||
|
||||
likely_checks = [
|
||||
# these checks are, in practice, the most likely to weed out unwanted DNS servers.
|
||||
Check("news", "huanqiu.com"),
|
||||
Check("adware", rot13("nqf789.pbz")),
|
||||
Check("shock", rot13("tbng.pk")), # some servers block shock sites, which isn't a terrible idea, but it's inaccurate
|
||||
Check("porn", "pornhub.com"), # some servers block piracy and porn sites for being taboo, or whatever
|
||||
Check("adtrack", "media.fastclick.net"),
|
||||
Check("parking", "scmp.org"), # dns.watch fails here: domain parking is evil, but servers must abide
|
||||
Check("usercontent","4chan.org"), # some servers block sites driven by their chaotic user-created content
|
||||
Check("bad", concat_nonsense("com")), # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
|
||||
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), # blogspot handles these strangely; DNS servers likewise
|
||||
# NOTE: disabled for being wildly inconsistent:
|
||||
# Check("weird", "archive.is"), # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
|
||||
Check("common", "archive.org"),
|
||||
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), # one of the WannaCry sinkholes, kinda important that it resolves
|
||||
Check("common", "xn--b1aew.xn--p1ai"), # just to test internationalized domains
|
||||
Check("common", "wikileaks.com"),
|
||||
Check("uncommon", "cybre.space"), # also doubles as a check for the new TLDs
|
||||
Check("piracy", "thehiddenbay.org"), # some servers block piracy and porn sites for being taboo, or whatever
|
||||
Check("infosec", "scan.shadowserver.org"), # some servers block this: https://scan.shadowserver.org/dns/
|
||||
Check("common", "duckduckgo.com"), # a few servers block this for some reason?
|
||||
Check("badsub", concat_nonsense("google.com")), # poisoning may yield an unwanted result here
|
||||
Check("common", "en.wikipedia.org"),
|
||||
Check("adtrack", "google-analytics.com"),
|
||||
Check("adtrack", "ad.doubleclick.net"),
|
||||
# baidu goes here...?
|
||||
Check("common", "naver.com"),
|
||||
Check("common", "google.com"), # surely a fully-functional server would resolve the most popular domain in existence
|
||||
]
|
||||
|
||||
unlikely_checks = [
|
||||
Check("piracy", "thepiratebay.org"),
|
||||
Check("porn", "xvideos.com"),
|
||||
Check("usercontent","imgur.com"),
|
||||
Check("usercontent","twitter.com"),
|
||||
Check("usercontent","weibo.com"),
|
||||
Check("usercontent","github.com"),
|
||||
Check("porn", "chaturbate.com"),
|
||||
Check("video", "bilibili.com"),
|
||||
Check("video", "twitch.tv"),
|
||||
Check("common", "qq.com"),
|
||||
Check("video", "netflix.com"),
|
||||
Check("usercontent","reddit.com"),
|
||||
Check("usercontent","facebook.com"),
|
||||
Check("video", "youtube.com"),
|
||||
Check("usercontent","tumblr.com"),
|
||||
Check("usercontent","wordpress.com"),
|
||||
Check("common", "tmall.com"),
|
||||
Check("usercontent","instagram.com"),
|
||||
Check("news", "nytimes.com"),
|
||||
Check("usercontent","flickr.com"),
|
||||
Check("common", "ebay.com"),
|
||||
Check("news", "scmp.com"),
|
||||
Check("common", "aliexpress.com"),
|
||||
Check("common", "stackoverflow.com"),
|
||||
]
|
||||
|
||||
defunct = [
|
||||
"panda.tv", # imochen.github.io
|
||||
]
|
||||
|
||||
def _top1m_gen():
|
||||
return (Check("top", entry)
|
||||
for i, entry in retrieve_top1m_entries()
|
||||
if entry not in defunct)
|
||||
|
||||
top100 = head(100, _top1m_gen())
|
||||
top1000 = head(1000, _top1m_gen())
|
||||
|
||||
#__all__ = [
|
||||
# "first_checks", "new_checks", "likely_checks", "unlikely_checks", "top100",
|
||||
# "defunct",
|
||||
#]
|
434
respodns/db.py
Normal file
434
respodns/db.py
Normal file
|
@ -0,0 +1,434 @@
|
|||
import storm.locals as rain
|
||||
import re
|
||||
|
||||
ipv4_pattern = re.compile("(\d+)\.(\d+)\.(\d+)\.(\d+)", re.ASCII)
|
||||
|
||||
def addr_to_int(ip):
|
||||
match = ipv4_pattern.fullmatch(ip)
|
||||
assert match is not None, row
|
||||
segs = list(map(int, match.group(1, 2, 3, 4)))
|
||||
assert all(0 <= seg <= 255 for seg in segs), match.group(0)
|
||||
numeric = segs[0] << 24 | segs[1] << 16 | segs[2] << 8 | segs[3]
|
||||
return numeric
|
||||
|
||||
create_table_statements = dict(
|
||||
# TODO: Duration REAL GENERATED ALWAYS AS etc.?
|
||||
executions="""
|
||||
CREATE TABLE IF NOT EXISTS Executions (
|
||||
ExecutionId INTEGER PRIMARY KEY,
|
||||
StartDate DATE NOT NULL,
|
||||
FinishDate DATE,
|
||||
Completed BOOLEAN DEFAULT 0 NOT NULL)
|
||||
""",
|
||||
|
||||
exceptions="""
|
||||
CREATE TABLE IF NOT EXISTS Exceptions (
|
||||
ExceptionId INTEGER PRIMARY KEY,
|
||||
Name TEXT NOT NULL,
|
||||
Fail BOOLEAN NOT NULL)
|
||||
""",
|
||||
|
||||
ips="""
|
||||
CREATE TABLE IF NOT EXISTS Ips (
|
||||
IpId INTEGER PRIMARY KEY,
|
||||
AsStr TEXT GENERATED ALWAYS AS (
|
||||
Cast(AsInt >> 24 & 255 AS TEXT) || '.' ||
|
||||
Cast(AsInt >> 16 & 255 AS TEXT) || '.' ||
|
||||
Cast(AsInt >> 8 & 255 AS TEXT) || '.' ||
|
||||
Cast(AsInt & 255 AS TEXT)
|
||||
) STORED NOT NULL,
|
||||
AsInt INTEGER UNIQUE CHECK(AsInt >= 0 AND AsInt < 1 << 32) NOT NULL,
|
||||
China BOOLEAN DEFAULT 0 NOT NULL,
|
||||
BlockTarget BOOLEAN DEFAULT 0 NOT NULL,
|
||||
Server BOOLEAN DEFAULT 0 NOT NULL,
|
||||
RedirectTarget BOOLEAN DEFAULT 0 NOT NULL,
|
||||
GfwTarget BOOLEAN DEFAULT 0 NOT NULL)
|
||||
""",
|
||||
|
||||
kinds="""
|
||||
CREATE TABLE IF NOT EXISTS Kinds (
|
||||
KindId INTEGER PRIMARY KEY,
|
||||
Name TEXT UNIQUE NOT NULL,
|
||||
ExpectExceptionId INTEGER,
|
||||
FOREIGN KEY(ExpectExceptionId) REFERENCES Exceptions(ExceptionId))
|
||||
""",
|
||||
|
||||
domains="""
|
||||
CREATE TABLE IF NOT EXISTS Domains (
|
||||
DomainId INTEGER PRIMARY KEY,
|
||||
Name TEXT UNIQUE NOT NULL,
|
||||
KindId INTEGER,
|
||||
FOREIGN KEY(KindId) REFERENCES Kinds(KindId))
|
||||
""",
|
||||
|
||||
# NOTE: that RecordId is *not* the rowid here
|
||||
# since records can contain multiple IPs,
|
||||
# and thereby span multiple rows.
|
||||
# TODO: indexing stuff, cascade deletion stuff.
|
||||
records="""
|
||||
CREATE TABLE IF NOT EXISTS Records (
|
||||
RecordId INTEGER NOT NULL,
|
||||
IpId INTEGER,
|
||||
FOREIGN KEY(IpId) REFERENCES Ips(IpId))
|
||||
""",
|
||||
|
||||
messages="""
|
||||
CREATE TABLE IF NOT EXISTS Messages (
|
||||
MessageId INTEGER PRIMARY KEY,
|
||||
ExecutionId INTEGER,
|
||||
ServerId INTEGER NOT NULL,
|
||||
DomainId INTEGER NOT NULL,
|
||||
RecordId INTEGER,
|
||||
ExceptionId INTEGER,
|
||||
FOREIGN KEY(ServerId) REFERENCES Ips(IpId),
|
||||
FOREIGN KEY(ExecutionId) REFERENCES Executions(ExecutionId),
|
||||
FOREIGN KEY(DomainId) REFERENCES Domains(DomainId),
|
||||
FOREIGN KEY(ExceptionId) REFERENCES Exceptions(ExceptionId))
|
||||
""",
|
||||
# this fails because RecordId is not UNIQUE:
|
||||
# FOREIGN KEY(RecordId) REFERENCES Records(RecordId)
|
||||
)
|
||||
|
||||
create_view_statements = [
|
||||
"""
|
||||
CREATE VIEW Results AS
|
||||
SELECT
|
||||
Messages.ExecutionId,
|
||||
ServerIps.AsStr as Server,
|
||||
Kinds.Name as Kind,
|
||||
Domains.Name as Name,
|
||||
RecordIps.AsStr as Address,
|
||||
Exceptions.Name as Exception
|
||||
FROM Messages
|
||||
LEFT JOIN Domains ON Messages.DomainId = Domains.DomainId
|
||||
LEFT JOIN Kinds ON Domains.KindId = Kinds.KindId
|
||||
LEFT JOIN Ips AS ServerIps ON Messages.ServerId = ServerIps.IpId
|
||||
LEFT JOIN Records ON Messages.RecordId = Records.RecordId
|
||||
LEFT JOIN Ips as RecordIps ON Records.IpId = RecordIps.IpId
|
||||
LEFT JOIN Exceptions ON Messages.ExceptionId = Exceptions.ExceptionId
|
||||
-- GROUP BY Records.IpId
|
||||
""",
|
||||
]
|
||||
|
||||
table_triggers = dict(
|
||||
messages=[
|
||||
# TODO: more triggers. (before update, and also for Records table)
|
||||
"""
|
||||
CREATE TRIGGER IF NOT EXISTS RecordExists
|
||||
BEFORE INSERT
|
||||
ON Messages
|
||||
BEGIN
|
||||
SELECT CASE
|
||||
WHEN NEW.RecordId NOTNULL AND NOT EXISTS(SELECT 1 FROM Records WHERE Records.RecordID = NEW.RecordId)
|
||||
THEN raise(FAIL, "RecordId does not exist")
|
||||
END;
|
||||
END
|
||||
""",
|
||||
])
|
||||
|
||||
class Execution:
|
||||
def __init__(self, db):
|
||||
self.db = db
|
||||
self.execution = None
|
||||
|
||||
def __enter__(self):
|
||||
from .util import right_now
|
||||
self.execution = self.db.start_execution(right_now())
|
||||
return self.execution
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
from .util import right_now
|
||||
completed = exc_type is None
|
||||
self.db.finish_execution(self.execution, right_now(), completed)
|
||||
|
||||
class AttrCheck:
|
||||
"""
|
||||
Inheriting AttrCheck prevents accidentally setting attributes
|
||||
that don't already exist.
|
||||
"""
|
||||
def __setattr__(self, name, value):
|
||||
# NOTE: hasattr doesn't do what we want here. dir does.
|
||||
if name.startswith("_") or name in dir(self):
|
||||
super().__setattr__(name, value)
|
||||
else:
|
||||
raise AttributeError(name)
|
||||
|
||||
class TException(rain.Storm, AttrCheck):
|
||||
__storm_table__ = "Exceptions"
|
||||
exception_id = rain.Int("ExceptionId", primary=True)
|
||||
name = rain.Unicode("Name")
|
||||
fail = rain.Bool("Fail")
|
||||
|
||||
class TExecution(rain.Storm, AttrCheck):
|
||||
__storm_table__ = "Executions"
|
||||
execution_id = rain.Int("ExecutionId", primary=True)
|
||||
start_date = rain.DateTime("StartDate")
|
||||
finish_date = rain.DateTime("FinishDate")
|
||||
completed = rain.Bool("Completed")
|
||||
|
||||
class TAddress(rain.Storm, AttrCheck):
|
||||
__storm_table__ = "Ips"
|
||||
address_id = rain.Int("IpId", primary=True)
|
||||
str = rain.Unicode("AsStr")
|
||||
ip = rain.Int("AsInt")
|
||||
china = rain.Bool("China")
|
||||
block_target = rain.Bool("BlockTarget")
|
||||
server = rain.Bool("Server")
|
||||
redirect_target = rain.Bool("RedirectTarget")
|
||||
gfw_target = rain.Bool("GfwTarget")
|
||||
|
||||
class TKind(rain.Storm, AttrCheck):
|
||||
__storm_table__ = "Kinds"
|
||||
kind_id = rain.Int("KindId", primary=True)
|
||||
name = rain.Unicode("Name")
|
||||
xxid = rain.Int("ExpectExceptionId")
|
||||
exception = rain.Reference(xxid, "TException.exception_id")
|
||||
|
||||
class TDomain(rain.Storm, AttrCheck):
|
||||
__storm_table__ = "Domains"
|
||||
domain_id = rain.Int("DomainId", primary=True)
|
||||
name = rain.Unicode("Name")
|
||||
kind_id = rain.Int("KindId")
|
||||
kind = rain.Reference(kind_id, "TKind.kind_id")
|
||||
|
||||
class TRecord(rain.Storm, AttrCheck):
|
||||
__storm_table__ = "Records"
|
||||
row_id = rain.Int("rowid", primary=True)
|
||||
record_id = rain.Int("RecordId")
|
||||
address_id = rain.Int("IpId")
|
||||
address = rain.Reference(address_id, "TAddress.address_id")
|
||||
|
||||
class TMessage(rain.Storm, AttrCheck):
|
||||
__storm_table__ = "Messages"
|
||||
message_id = rain.Int("MessageId", primary=True)
|
||||
execution_id = rain.Int("ExecutionId")
|
||||
server_id = rain.Int("ServerId")
|
||||
domain_id = rain.Int("DomainId")
|
||||
record_id = rain.Int("RecordId")
|
||||
exception_id = rain.Int("ExceptionId")
|
||||
execution = rain.Reference(execution_id, "TExecution.execution_id")
|
||||
server = rain.Reference(server_id, "TAddress.address_id")
|
||||
domain = rain.Reference(domain_id, "TDomain.domain_id")
|
||||
#record = rain.Reference(record_id, "TRecord.record_id")
|
||||
exception = rain.Reference(exception_id, "TException.exception_id")
|
||||
|
||||
def apply_properties(obj, d):
|
||||
from storm.properties import PropertyColumn
|
||||
for k, v in d.items():
|
||||
ref = getattr(obj.__class__, k)
|
||||
assert ref is not None, (type(obj), k)
|
||||
assert isinstance(ref, PropertyColumn) or isinstance(ref, rain.Reference), \
|
||||
(type(obj), k)
|
||||
setattr(obj, k, v)
|
||||
return obj
|
||||
|
||||
class RespoDB:
|
||||
def __init__(self, uri, setup=False, create=False):
|
||||
self.uri = uri
|
||||
db_exists = self._db_exists(self.uri)
|
||||
self.db = rain.create_database(self.uri)
|
||||
self._conn = None
|
||||
|
||||
if setup or (create and not db_exists):
|
||||
with self:
|
||||
self.setup_executions()
|
||||
self.setup_exceptions()
|
||||
self.setup_ips()
|
||||
self.setup_kinds()
|
||||
self.setup_domains()
|
||||
self.setup_records()
|
||||
self.setup_messages()
|
||||
|
||||
for q in create_view_statements:
|
||||
self._conn.execute(q, noresult=True)
|
||||
assert setup or create or db_exists, "database was never setup"
|
||||
|
||||
self.execution = Execution(self)
|
||||
|
||||
@staticmethod
|
||||
def _db_exists(uri):
|
||||
from os.path import exists
|
||||
_, _, fp = uri.partition(":")
|
||||
if fp.startswith("//"):
|
||||
_, _, fp = fp[2:].partition("/")
|
||||
return fp and exists(fp)
|
||||
|
||||
def __enter__(self):
|
||||
self._conn = rain.Store(self.db)
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
self.commit()
|
||||
self._conn.close()
|
||||
self._conn = None
|
||||
|
||||
def find_one(self, cls_spec, *args, **kwargs):
|
||||
assert self._conn is not None
|
||||
return self._conn.find(cls_spec, *args, **kwargs).one()
|
||||
|
||||
def flush(self):
|
||||
assert self._conn is not None
|
||||
self._conn.flush()
|
||||
|
||||
def commit(self):
|
||||
assert self._conn is not None
|
||||
self._conn.commit()
|
||||
|
||||
def new_exception(self, **kwargs):
|
||||
assert self._conn is not None
|
||||
return self._conn.add(apply_properties(TException(), kwargs))
|
||||
|
||||
def new_kind(self, **kwargs):
|
||||
assert self._conn is not None
|
||||
return self._conn.add(apply_properties(TKind(), kwargs))
|
||||
|
||||
def new_domain(self, **kwargs):
|
||||
assert self._conn is not None
|
||||
return self._conn.add(apply_properties(TDomain(), kwargs))
|
||||
|
||||
def new_address(self, **kwargs):
|
||||
assert self._conn is not None
|
||||
return self._conn.add(apply_properties(TAddress(), kwargs))
|
||||
|
||||
def new_record(self, **kwargs):
|
||||
assert self._conn is not None
|
||||
return self._conn.add(apply_properties(TRecord(), kwargs))
|
||||
|
||||
def new_message(self, **kwargs):
|
||||
assert self._conn is not None
|
||||
return self._conn.add(apply_properties(TMessage(), kwargs))
|
||||
|
||||
def setup_executions(self):
|
||||
self._conn.execute(create_table_statements["executions"], noresult=True)
|
||||
|
||||
def setup_exceptions(self):
|
||||
# careful not to call them "errors" since NXDOMAIN is not an error.
|
||||
self._conn.execute(create_table_statements["exceptions"], noresult=True)
|
||||
|
||||
# TODO: upsert?
|
||||
|
||||
self.new_exception(name="NXDOMAIN", fail=False)
|
||||
self.new_exception(name="NoAnswer", fail=True)
|
||||
self.new_exception(name="NoNameservers", fail=True)
|
||||
self.new_exception(name="Timeout", fail=True)
|
||||
|
||||
def setup_ips(self):
|
||||
from .ips import china, blocks
|
||||
|
||||
self._conn.execute(create_table_statements["ips"], noresult=True)
|
||||
|
||||
# TODO: upsert?
|
||||
|
||||
self.new_address(ip=addr_to_int("0.0.0.0"), block_target=True)
|
||||
self.new_address(ip=addr_to_int("127.0.0.1"), block_target=True)
|
||||
for ip in china:
|
||||
self.new_address(ip=addr_to_int(ip), china=True)
|
||||
for ip in blocks:
|
||||
self.new_address(ip=addr_to_int(ip), block_target=True)
|
||||
|
||||
def setup_kinds(self):
|
||||
self._conn.execute(create_table_statements["kinds"], noresult=True)
|
||||
|
||||
# TODO: upsert?
|
||||
|
||||
#NXDOMAIN = self.find_one(TException, TException.name == "NXDOMAIN")
|
||||
#self.new_kind(name="bad", exception=NXDOMAIN)
|
||||
#self.new_kind(name="badsub", exception=NXDOMAIN)
|
||||
|
||||
def setup_domains(self):
|
||||
self._conn.execute(create_table_statements["domains"], noresult=True)
|
||||
|
||||
def setup_records(self):
|
||||
self._conn.execute(create_table_statements["records"], noresult=True)
|
||||
|
||||
def setup_messages(self):
|
||||
self._conn.execute(create_table_statements["messages"], noresult=True)
|
||||
for trig in table_triggers["messages"]:
|
||||
self._conn.execute(trig)
|
||||
|
||||
def start_execution(self, dt):
|
||||
execution = TExecution()
|
||||
execution.start_date = dt
|
||||
self.flush()
|
||||
return execution
|
||||
|
||||
def finish_execution(self, execution, dt, completed):
|
||||
# TODO: fail if ExecutionId is missing?
|
||||
execution.finish_date = dt
|
||||
execution.completed = completed
|
||||
self.flush()
|
||||
|
||||
def next_record_id(self):
|
||||
from storm.expr import Add, Max, Coalesce
|
||||
expr = Add(Coalesce(Max(TRecord.record_id), 0), 1)
|
||||
return self.find_one(expr)
|
||||
|
||||
def find_record_id(self, addresses):
|
||||
address_ids = list(address.address_id for address in addresses)
|
||||
record_ids = list(self._conn.find(TRecord, TRecord.address_id.is_in(address_ids)).values(TRecord.record_id))
|
||||
if not record_ids:
|
||||
return None
|
||||
unique_ids = sorted(set(record_ids))
|
||||
for needle in unique_ids:
|
||||
if sum(1 for id in record_ids if id == needle) == len(addresses):
|
||||
found = True
|
||||
return needle
|
||||
return None
|
||||
|
||||
def push_entry(self, entry):
|
||||
kind = self.find_one(TKind, TKind.name == entry.kind)
|
||||
if not kind:
|
||||
kind = self.new_kind(name=entry.kind)
|
||||
if entry.kind.startswith("bad"):
|
||||
exception = self.find_one(TException, TException.name == "NXDOMAIN")
|
||||
assert exception is not None
|
||||
kind.exception = exception
|
||||
|
||||
domain = self.find_one(TDomain, TDomain.name == entry.domain)
|
||||
if not domain:
|
||||
domain = self.new_domain(name=entry.domain)
|
||||
domain.kind = kind
|
||||
|
||||
addresses = []
|
||||
as_ints = sorted(set(map(addr_to_int, entry.addrs)))
|
||||
for numeric in as_ints:
|
||||
address = self.find_one(TAddress, TAddress.ip == numeric)
|
||||
if not address:
|
||||
address = self.new_address(ip=numeric)
|
||||
addresses.append(address)
|
||||
|
||||
for address in addresses:
|
||||
if entry.reason == "block":
|
||||
address.block_target = True
|
||||
elif entry.reason == "redirect":
|
||||
address.redirect_target = True
|
||||
elif entry.reason == "gfw":
|
||||
address.gfw_target = True
|
||||
|
||||
if addresses:
|
||||
record_id = self.find_record_id(addresses)
|
||||
if record_id is None:
|
||||
record_id = self.next_record_id()
|
||||
for address in addresses:
|
||||
self.new_record(record_id=record_id, address=address)
|
||||
else:
|
||||
record_id = None
|
||||
|
||||
numeric = addr_to_int(entry.server)
|
||||
server = self.find_one(TAddress, TAddress.ip == numeric)
|
||||
if not server:
|
||||
server = self.new_address(ip=numeric)
|
||||
self.flush()
|
||||
server.server = True
|
||||
|
||||
if entry.exception:
|
||||
exception = self.find_one(TException, TException.name == entry.exception)
|
||||
assert exception is not None
|
||||
else:
|
||||
exception = None
|
||||
|
||||
message = self.new_message(
|
||||
execution=entry.execution,
|
||||
server=server, domain=domain,
|
||||
record_id=record_id, exception=exception)
|
||||
self.flush()
|
67
respodns/ips.py
Normal file
67
respodns/ips.py
Normal file
|
@ -0,0 +1,67 @@
|
|||
# known IPs of any DNS located in China:
|
||||
china = {
|
||||
"1.1.8.8",
|
||||
"1.1.8.9",
|
||||
"1.2.4.8",
|
||||
"1.8.1.8",
|
||||
"1.8.8.8",
|
||||
"114.254.201.131",
|
||||
"218.107.55.108",
|
||||
"222.216.2.236",
|
||||
}
|
||||
|
||||
# known IPs (not servers) that are used to deny access to websites:
|
||||
blocks = {
|
||||
"1.2.3.4", # timeout
|
||||
"54.242.237.204", # fake
|
||||
"93.158.134.250", # fake
|
||||
"114.6.128.8", # fake
|
||||
"118.97.116.27", # fake
|
||||
"119.235.29.59", # fake
|
||||
"124.40.255.99", # fake
|
||||
"146.112.61.106", # fake
|
||||
"156.154.113.17", # fake
|
||||
"156.154.175.30", # fake
|
||||
"156.154.175.215", # fake
|
||||
"156.154.175.216", # fake
|
||||
"156.154.175.221", # fake
|
||||
"163.28.10.160", # fake
|
||||
"175.139.142.25", # fake
|
||||
"176.103.130.135", # fake
|
||||
"182.93.64.126", # fake
|
||||
"192.99.140.48", # fake
|
||||
"195.175.254.2", # fake
|
||||
"202.40.187.91", # fake
|
||||
"202.162.209.133", # fake
|
||||
"202.165.36.253", # fake
|
||||
"203.119.13.75", # fake
|
||||
"203.119.13.76", # fake
|
||||
"203.190.55.217", # fake
|
||||
}
|
||||
|
||||
bogon_checks = [
|
||||
"0.",
|
||||
"10.",
|
||||
"127.",
|
||||
"169.254.",
|
||||
"192.0.0.",
|
||||
"192.0.2.",
|
||||
"192.168.",
|
||||
"198.18.",
|
||||
"198.19.",
|
||||
"198.51.100.",
|
||||
"203.0.113.",
|
||||
] + [
|
||||
"100.{}.".format(i) for i in range(64, 128)
|
||||
] + [
|
||||
"172.{}.".format(i) for i in range(16, 32)
|
||||
] + [
|
||||
"{}.".format(i) for i in range(224, 256)
|
||||
]
|
||||
|
||||
def is_bogon(ip):
|
||||
return any(ip.startswith(check) for check in bogon_checks)
|
||||
|
||||
def ipkey(ip_string):
|
||||
segs = [int(s) for s in ip_string.replace(":", ".").split(".")]
|
||||
return sum(256**(3 - i) * seg for i, seg in enumerate(segs))
|
8
respodns/nonsense.py
Normal file
8
respodns/nonsense.py
Normal file
|
@ -0,0 +1,8 @@
|
|||
from random import choice, choices, Random
|
||||
from string import ascii_lowercase
|
||||
from zlib import crc32
|
||||
|
||||
def nonsense_consistent(domain):
|
||||
rng = Random(crc32(domain.encode("utf-8")))
|
||||
length = rng.choices((9, 10, 11, 12), (4, 5, 3, 2))[0]
|
||||
return "".join(rng.choice(ascii_lowercase) for i in range(length))
|
48
respodns/pooler.py
Normal file
48
respodns/pooler.py
Normal file
|
@ -0,0 +1,48 @@
|
|||
from types import CoroutineType
|
||||
import asyncio
|
||||
|
||||
# TODO: write a less confusing interface that allows the code to be written more flatly.
|
||||
# maybe like: async for done in apply(doit, [tuple_of_args]):
|
||||
|
||||
def make_pooler(pool_size, finisher=None):
|
||||
aws = set()
|
||||
async def pooler(item=None):
|
||||
nonlocal aws
|
||||
finish = item is None
|
||||
if not finish:
|
||||
if isinstance(item, CoroutineType):
|
||||
assert not isinstance(item, asyncio.Task)
|
||||
item = asyncio.create_task(item)
|
||||
aws.add(item)
|
||||
# TODO: don't wait until all completed, just first completed in loop.
|
||||
# that way we can handle each done task ASAP.
|
||||
condition = asyncio.ALL_COMPLETED if finish else asyncio.FIRST_COMPLETED
|
||||
if len(aws) == 0:
|
||||
return None
|
||||
if finish or len(aws) >= pool_size:
|
||||
done, pending = await asyncio.wait(aws, return_when=condition)
|
||||
#pending = set(task for task in pending if task in aws) # ???
|
||||
ret = None if finisher is None else finisher(done, pending)
|
||||
#aws = set(task for task in pending if not task.cancelled())
|
||||
aws = pending
|
||||
if ret is not None:
|
||||
return ret
|
||||
return None
|
||||
return pooler
|
||||
|
||||
def make_simple_pooler(pool_size, finisher=None):
|
||||
condition = asyncio.FIRST_COMPLETED
|
||||
pending = set()
|
||||
async def pooler(item=None):
|
||||
nonlocal pending
|
||||
finish = item is None
|
||||
if not finish:
|
||||
if isinstance(item, CoroutineType):
|
||||
assert not isinstance(item, asyncio.Task)
|
||||
item = asyncio.create_task(item)
|
||||
pending.add(item)
|
||||
desired_size = 0 if finish else pool_size - 1
|
||||
while len(pending) > desired_size:
|
||||
done, pending = await asyncio.wait(pending, return_when=condition)
|
||||
finisher(done, pending)
|
||||
return pooler
|
32
respodns/structs.py
Normal file
32
respodns/structs.py
Normal file
|
@ -0,0 +1,32 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class Options:
|
||||
#exec_id: int = -1
|
||||
execution: object = None
|
||||
|
||||
ip_simul: int = 10 # how many IPs to connect to at once
|
||||
domain_simul: int = 3 # how many domains per IP to request at once
|
||||
|
||||
ip_wait: float = 0.15
|
||||
domain_wait: float = 0.25
|
||||
|
||||
impatient: bool = False # reduce retries and times for timeouts
|
||||
early_stopping: bool = True # stop at the first invalid domain (best with dry)
|
||||
dry: bool = True # don't write anything to database
|
||||
progress: bool = False
|
||||
|
||||
@dataclass
|
||||
class Entry:
|
||||
from datetime import datetime
|
||||
|
||||
date: datetime
|
||||
success: bool
|
||||
server: str
|
||||
kind: str
|
||||
domain: str
|
||||
exception: str
|
||||
addrs: list # list of strings
|
||||
reason: str
|
||||
#exec_id: int
|
||||
execution: object
|
52
respodns/top1m.py
Normal file
52
respodns/top1m.py
Normal file
|
@ -0,0 +1,52 @@
|
|||
urltop_default = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
|
||||
csvfn_default = "top-1m.csv"
|
||||
|
||||
one_week = 7 * 24 * 60 * 60 # in seconds
|
||||
|
||||
def alive(fp, expire):
|
||||
from os.path import exists, getmtime, getsize
|
||||
from time import time
|
||||
|
||||
return exists(fp) and time() < getmtime(fp) + one_week and getsize(fp) > 2
|
||||
|
||||
def download_top1m(urltop=None, csvfn=None):
|
||||
from io import BytesIO
|
||||
from urllib.request import urlopen
|
||||
from zipfile import ZipFile
|
||||
|
||||
if urltop is None:
|
||||
urltop = urltop_default
|
||||
if csvfn is None:
|
||||
csvfn = csvfn_default
|
||||
|
||||
comp = BytesIO()
|
||||
with urlopen(urltop) as re:
|
||||
comp.write(re.read())
|
||||
|
||||
with ZipFile(comp) as zipf:
|
||||
with zipf.open(csvfn) as f:
|
||||
uncomp = f.read()
|
||||
comp.close()
|
||||
|
||||
return uncomp.decode("utf-8")
|
||||
|
||||
def retrieve_top1m_entries(csv_fp="top-1m.csv"):
|
||||
from sys import stderr
|
||||
|
||||
if alive(csv_fp, one_week):
|
||||
with open(csv_fp, "r") as f:
|
||||
uncomp = f.read()
|
||||
else:
|
||||
print("downloading", csv_fp, file=stderr)
|
||||
uncomp = download_top1m()
|
||||
with open(csv_fp, "w") as f:
|
||||
f.write(uncomp)
|
||||
|
||||
# we could use the csv module, but this is totally overkill
|
||||
# for data that *should* be just a subset of ascii.
|
||||
lines = uncomp.splitlines()
|
||||
entries = [(lambda a: (int(a[0]), a[2]))(line.partition(","))
|
||||
for line in lines]
|
||||
return entries
|
||||
|
||||
top1m = download_top1m
|
71
respodns/util.py
Normal file
71
respodns/util.py
Normal file
|
@ -0,0 +1,71 @@
|
|||
def right_now():
|
||||
from datetime import datetime, timezone
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
def detect_gfw(r, ip, check):
|
||||
# attempt to detect interference from the Great Firewall of China.
|
||||
#from .ips import china
|
||||
#if r in china: return True
|
||||
|
||||
# class D or class E, neither of which are correct for a (public?) DNS.
|
||||
#if int(r.partition(".")[0]) >= 224: return True
|
||||
|
||||
rs = lambda prefix: r.startswith(prefix)
|
||||
de = lambda suffix: check.domain.endswith(suffix)
|
||||
hosted = de("facebook.com") or de("instagram.com") or de("whatsapp.com")
|
||||
if rs("31.13.") and not hosted: return True
|
||||
if rs("66.220."): return True
|
||||
if rs("69.63."): return True
|
||||
if rs("69.171.") and not rs("69.171.250."): return True
|
||||
if rs("74.86."): return True
|
||||
if rs("75.126."): return True
|
||||
if r == "64.13.192.74": return True
|
||||
# more non-facebook GFW stuff:
|
||||
# 31.13.64.33
|
||||
# 31.13.70.1
|
||||
# 31.13.70.20
|
||||
# 31.13.76.16
|
||||
# 31.13.86.1
|
||||
# 173.252.110.21
|
||||
# 192.99.140.48
|
||||
# 199.16.156.40
|
||||
# 199.16.158.190
|
||||
|
||||
return False
|
||||
|
||||
async def getaddrs(server, domain, opts):
|
||||
from dns.asyncresolver import Resolver
|
||||
from dns.exception import Timeout
|
||||
from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers
|
||||
#from dns.resolver import Resolver
|
||||
from .ips import ipkey
|
||||
|
||||
res = Resolver(configure=False)
|
||||
if opts.impatient:
|
||||
res.timeout = 5
|
||||
res.lifetime = 2
|
||||
res.nameservers = [server]
|
||||
try:
|
||||
#ans = res.resolve(domain, "A", search=False)
|
||||
ans = await res.resolve(domain, "A", search=False)
|
||||
except NXDOMAIN:
|
||||
return ["NXDOMAIN"]
|
||||
except NoAnswer:
|
||||
return ["NoAnswer"]
|
||||
except NoNameservers:
|
||||
return ["NoNameservers"]
|
||||
except Timeout:
|
||||
return ["Timeout"]
|
||||
#return list(set(rr.address for rr in ans.rrset))
|
||||
return sorted(set(rr.address for rr in ans.rrset), key=ipkey)
|
||||
|
||||
def read_ips(f):
|
||||
# TODO: make async and more robust. (regex pls)
|
||||
# TODO: does readlines() block if the pipe is left open i.e. user input?
|
||||
for ip in f.readlines():
|
||||
if "#" in ip:
|
||||
ip, _, _ = ip.partition("#")
|
||||
ip = ip.strip()
|
||||
if ip.count(".") != 3:
|
||||
continue
|
||||
yield ip
|
31
setup.py
Normal file
31
setup.py
Normal file
|
@ -0,0 +1,31 @@
|
|||
from setuptools import setup
|
||||
|
||||
setup(
|
||||
name='respodns',
|
||||
version='0.1.0',
|
||||
packages=[
|
||||
'respodns',
|
||||
],
|
||||
|
||||
author='notwa',
|
||||
author_email='cloningdonor+pypi@gmail.com',
|
||||
url='https://github.com/notwa/respodns',
|
||||
keywords='TODO',
|
||||
description='DNS logger',
|
||||
license='MIT',
|
||||
zip_safe=True,
|
||||
|
||||
classifiers=[
|
||||
'Development Status :: 4 - Beta',
|
||||
'Environment :: Console',
|
||||
'Intended Audience :: System Administrators',
|
||||
'License :: OSI Approved :: ISC License (ISCL)',
|
||||
'Natural Language :: English',
|
||||
'Operating System :: OS Independent',
|
||||
'Programming Language :: Python :: 3.7',
|
||||
'Programming Language :: SQL',
|
||||
'Topic :: Internet :: Name Service (DNS)',
|
||||
'Topic :: System :: Logging',
|
||||
'Topic :: Utilities',
|
||||
]
|
||||
)
|
Loading…
Add table
Reference in a new issue