init
This commit is contained in:
commit
3afeb5a3fd
15 changed files with 1084 additions and 0 deletions
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
*.csv
|
||||||
|
*.db
|
||||||
|
__pycache__/
|
13
LICENSE
Normal file
13
LICENSE
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
Copyright (c) 2020, Connor Olding
|
||||||
|
|
||||||
|
Permission to use, copy, modify, and/or distribute this software for any
|
||||||
|
purpose with or without fee is hereby granted, provided that the above
|
||||||
|
copyright notice and this permission notice appear in all copies.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
||||||
|
WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
||||||
|
MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
||||||
|
ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
||||||
|
ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
||||||
|
OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
1
README.md
Normal file
1
README.md
Normal file
|
@ -0,0 +1 @@
|
||||||
|
へい
|
2
requirements.txt
Normal file
2
requirements.txt
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
dnspython >= 2.1.0
|
||||||
|
storm
|
0
respodns/__init__.py
Normal file
0
respodns/__init__.py
Normal file
208
respodns/__main__.py
Normal file
208
respodns/__main__.py
Normal file
|
@ -0,0 +1,208 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from asyncio import run, sleep
|
||||||
|
from .checks import (first_checks, new_checks,
|
||||||
|
likely_checks, unlikely_checks, top100)
|
||||||
|
from .db import RespoDB
|
||||||
|
from .ips import blocks, is_bogon
|
||||||
|
from .pooler import make_simple_pooler
|
||||||
|
from .structs import Options, Entry
|
||||||
|
from .util import right_now, read_ips, getaddrs, detect_gfw
|
||||||
|
from sys import argv, stdin, stderr, exit
|
||||||
|
|
||||||
|
def process_result(res, ip, check, opts: Options):
|
||||||
|
# TODO: get more accurate times by inserting start-end into getaddrs.
|
||||||
|
now = right_now()
|
||||||
|
assert len(res) > 0
|
||||||
|
reason = None
|
||||||
|
|
||||||
|
if "Timeout" in res:
|
||||||
|
reason = "timeout"
|
||||||
|
|
||||||
|
elif check.kind.startswith("bad"):
|
||||||
|
reason = "okay" if "NXDOMAIN" in res else "redirect"
|
||||||
|
|
||||||
|
elif any(is_bogon(r) for r in res):
|
||||||
|
reason = "block"
|
||||||
|
elif any(blocked in res for blocked in blocks):
|
||||||
|
reason = "block"
|
||||||
|
|
||||||
|
elif not any(len(r) > 0 and r[0].isdigit() for r in res):
|
||||||
|
# TODO: check for no alias on common.
|
||||||
|
reason = "missing"
|
||||||
|
|
||||||
|
else:
|
||||||
|
for r in res:
|
||||||
|
if len(r) == 0 or not r[0].isdigit():
|
||||||
|
continue
|
||||||
|
if detect_gfw(r, ip, check):
|
||||||
|
reason = "gfw"
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
reason = "okay"
|
||||||
|
|
||||||
|
assert reason is not None, (res, ip, check)
|
||||||
|
|
||||||
|
addrs = list(filter(lambda r: len(r) > 0 and r[0].isdigit(), res))
|
||||||
|
exception = res[0] if len(addrs) == 0 else None
|
||||||
|
|
||||||
|
return Entry(
|
||||||
|
date=now,
|
||||||
|
success=reason == "okay",
|
||||||
|
server=ip,
|
||||||
|
kind=check.kind,
|
||||||
|
domain=check.domain,
|
||||||
|
exception=exception,
|
||||||
|
addrs=addrs,
|
||||||
|
reason=reason,
|
||||||
|
execution=opts.execution,
|
||||||
|
)
|
||||||
|
|
||||||
|
async def try_ip(db, server_ip, checks, opts: Options):
|
||||||
|
entries = []
|
||||||
|
|
||||||
|
success = True
|
||||||
|
def finisher(done, pending):
|
||||||
|
nonlocal success
|
||||||
|
for task in done:
|
||||||
|
res, ip, check = task.result()
|
||||||
|
entry = process_result(res, ip, check, opts)
|
||||||
|
entries.append(entry)
|
||||||
|
if not entry.success:
|
||||||
|
if opts.early_stopping and success: # only cancel once
|
||||||
|
for pend in pending:
|
||||||
|
#print("CANCEL", file=stderr)
|
||||||
|
# FIXME: this can still, somehow, cancel the main function.
|
||||||
|
pend.cancel()
|
||||||
|
success = False
|
||||||
|
|
||||||
|
pooler = make_simple_pooler(opts.domain_simul, finisher)
|
||||||
|
|
||||||
|
async def getaddrs_wrapper(ip, check):
|
||||||
|
# NOTE: could put right_now() stuff here!
|
||||||
|
# TODO: add duration field given in milliseconds (integer)
|
||||||
|
# by subtracting start and end datetimes.
|
||||||
|
res = await getaddrs(ip, check.domain, opts)
|
||||||
|
return res, ip, check
|
||||||
|
|
||||||
|
for i, check in enumerate(checks):
|
||||||
|
first = i == 0
|
||||||
|
if not first:
|
||||||
|
await sleep(opts.domain_wait)
|
||||||
|
await pooler(getaddrs_wrapper(server_ip, check))
|
||||||
|
if first:
|
||||||
|
# limit to one connection for the first check.
|
||||||
|
await pooler()
|
||||||
|
if not success:
|
||||||
|
if opts.early_stopping or first:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
await pooler()
|
||||||
|
|
||||||
|
if not opts.dry:
|
||||||
|
for entry in entries:
|
||||||
|
db.push_entry(entry)
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
if not success:
|
||||||
|
first_failure = None
|
||||||
|
assert len(entries) > 0
|
||||||
|
for entry in entries:
|
||||||
|
#print(entry, file=stderr)
|
||||||
|
if not entry.success:
|
||||||
|
first_failure = entry
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
assert 0, ("no failures found:", entries)
|
||||||
|
return server_ip, first_failure
|
||||||
|
return server_ip, None
|
||||||
|
|
||||||
|
async def main(db, filepath, checks, opts: Options):
|
||||||
|
def finisher(done, pending):
|
||||||
|
for task in done:
|
||||||
|
ip, first_failure = task.result()
|
||||||
|
if first_failure is None:
|
||||||
|
print(ip)
|
||||||
|
elif opts.dry:
|
||||||
|
ff = first_failure
|
||||||
|
if ff.kind in ("shock", "adware"):
|
||||||
|
print(ip, ff.reason, ff.kind, sep="\t")
|
||||||
|
else:
|
||||||
|
print(ip, ff.reason, ff.kind, ff.domain, sep="\t")
|
||||||
|
|
||||||
|
pooler = make_simple_pooler(opts.ip_simul, finisher)
|
||||||
|
|
||||||
|
f = stdin if filepath == "" else open(filepath, "r")
|
||||||
|
for i, ip in enumerate(read_ips(f)):
|
||||||
|
first = i == 0
|
||||||
|
if opts.progress:
|
||||||
|
print(f"#{i}: {ip}", file=stderr)
|
||||||
|
stderr.flush()
|
||||||
|
if not first:
|
||||||
|
await sleep(opts.ip_wait)
|
||||||
|
await pooler(try_ip(db, ip, checks, opts))
|
||||||
|
if f != stdin:
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
await pooler()
|
||||||
|
|
||||||
|
def ui(program, args):
|
||||||
|
name = "respodns6"
|
||||||
|
parser = ArgumentParser(name,
|
||||||
|
description=name + ": test and log DNS records")
|
||||||
|
|
||||||
|
# TODO: support multiple paths. nargs="+", iterate with pooling?
|
||||||
|
parser.add_argument(
|
||||||
|
"path", metavar="file-path",
|
||||||
|
help="a path to a file containing IPv4 addresses which host DNS servers")
|
||||||
|
|
||||||
|
parser.add_argument(
|
||||||
|
"--database",
|
||||||
|
help="specify database for logging")
|
||||||
|
|
||||||
|
a = parser.parse_args(args)
|
||||||
|
|
||||||
|
checks = []
|
||||||
|
checks += first_checks
|
||||||
|
#checks += new_checks
|
||||||
|
checks += likely_checks
|
||||||
|
#checks += unlikely_checks
|
||||||
|
#checks += top100
|
||||||
|
|
||||||
|
opts = Options()
|
||||||
|
opts.dry = a.database is None
|
||||||
|
opts.early_stopping = opts.dry
|
||||||
|
|
||||||
|
if a.database is not None:
|
||||||
|
if a.database.startswith("sqlite:"):
|
||||||
|
uri = a.database
|
||||||
|
else:
|
||||||
|
uri = "sqlite:///" + a.database
|
||||||
|
|
||||||
|
def runwrap(db, debug=False):
|
||||||
|
if debug:
|
||||||
|
import logging
|
||||||
|
logging.basicConfig(level=logging.DEBUG)
|
||||||
|
run(main(db, a.path, checks, opts), debug=True)
|
||||||
|
else:
|
||||||
|
run(main(db, a.path, checks, opts))
|
||||||
|
|
||||||
|
if opts.dry:
|
||||||
|
runwrap(None)
|
||||||
|
else:
|
||||||
|
# log to a database.
|
||||||
|
db = RespoDB(uri, create=True)
|
||||||
|
with db: # TODO: .open and .close methods for manual invocation.
|
||||||
|
with db.execution as execution: # TODO: clean up this interface.
|
||||||
|
opts.execution = execution
|
||||||
|
runwrap(db)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(argv) == 0:
|
||||||
|
print("You've met with a terrible fate.", file=stderr)
|
||||||
|
ret = 2
|
||||||
|
else:
|
||||||
|
ret = ui(argv[0], argv[1:])
|
||||||
|
if ret is not None:
|
||||||
|
exit(ret)
|
114
respodns/checks.py
Normal file
114
respodns/checks.py
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
from collections import namedtuple
|
||||||
|
from .nonsense import nonsense_consistent
|
||||||
|
from .top1m import retrieve_top1m_entries
|
||||||
|
|
||||||
|
rot13_mapping = {}
|
||||||
|
for a, b, c, d in zip("anAN05", "mzMZ49", "naNA50", "zmZM94"):
|
||||||
|
rot13_mapping.update(dict((chr(k), chr(v))
|
||||||
|
for k, v in zip(range(ord(a), ord(b) + 1),
|
||||||
|
range(ord(c), ord(d) + 1))))
|
||||||
|
|
||||||
|
def rot13(s):
|
||||||
|
return "".join(rot13_mapping.get(c, c) for c in s)
|
||||||
|
|
||||||
|
def concat_nonsense(domain):
|
||||||
|
return nonsense_consistent(domain) + "." + domain
|
||||||
|
|
||||||
|
def head(n, it):
|
||||||
|
res = []
|
||||||
|
try:
|
||||||
|
while len(res) < n:
|
||||||
|
res.append(next(it))
|
||||||
|
except StopIteration:
|
||||||
|
pass
|
||||||
|
return res
|
||||||
|
|
||||||
|
Check = namedtuple("Check", ("kind", "domain"))
|
||||||
|
|
||||||
|
first_checks = [
|
||||||
|
Check("common", "baidu.com"), # this avoids issues with chinese censorship: https://www.bortzmeyer.org/sichuan-pepper.html
|
||||||
|
]
|
||||||
|
|
||||||
|
new_checks = [
|
||||||
|
# via dnsvalidator
|
||||||
|
Check("adtrack", "bet365.com"),
|
||||||
|
Check("common", "facebook.com"),
|
||||||
|
Check("common", "google.com"),
|
||||||
|
Check("common", "paypal.com"),
|
||||||
|
Check("common", "wikileaks.com"),
|
||||||
|
Check("news", "telegram.com"),
|
||||||
|
]
|
||||||
|
|
||||||
|
likely_checks = [
|
||||||
|
# these checks are, in practice, the most likely to weed out unwanted DNS servers.
|
||||||
|
Check("news", "huanqiu.com"),
|
||||||
|
Check("adware", rot13("nqf789.pbz")),
|
||||||
|
Check("shock", rot13("tbng.pk")), # some servers block shock sites, which isn't a terrible idea, but it's inaccurate
|
||||||
|
Check("porn", "pornhub.com"), # some servers block piracy and porn sites for being taboo, or whatever
|
||||||
|
Check("adtrack", "media.fastclick.net"),
|
||||||
|
Check("parking", "scmp.org"), # dns.watch fails here: domain parking is evil, but servers must abide
|
||||||
|
Check("usercontent","4chan.org"), # some servers block sites driven by their chaotic user-created content
|
||||||
|
Check("bad", concat_nonsense("com")), # some servers like to redirect nonexistent domains: https://web.archive.org/web/20140302064622/http://james.bertelson.me/blog/2014/01/level-3-are-now-hijacking-failed-dns-requests-for-ad-revenue-on-4-2-2-x/
|
||||||
|
Check("weirdsub", concat_nonsense("javarevisited.blogspot.com")), # blogspot handles these strangely; DNS servers likewise
|
||||||
|
# NOTE: disabled for being wildly inconsistent:
|
||||||
|
# Check("weird", "archive.is"), # Cloudflare fails here: https://jarv.is/notes/cloudflare-dns-archive-is-blocked/
|
||||||
|
Check("common", "archive.org"),
|
||||||
|
Check("infosec", "iuqerfsodp9ifjaposdfjhgosurijfaewrwergwea.com"), # one of the WannaCry sinkholes, kinda important that it resolves
|
||||||
|
Check("common", "xn--b1aew.xn--p1ai"), # just to test internationalized domains
|
||||||
|
Check("common", "wikileaks.com"),
|
||||||
|
Check("uncommon", "cybre.space"), # also doubles as a check for the new TLDs
|
||||||
|
Check("piracy", "thehiddenbay.org"), # some servers block piracy and porn sites for being taboo, or whatever
|
||||||
|
Check("infosec", "scan.shadowserver.org"), # some servers block this: https://scan.shadowserver.org/dns/
|
||||||
|
Check("common", "duckduckgo.com"), # a few servers block this for some reason?
|
||||||
|
Check("badsub", concat_nonsense("google.com")), # poisoning may yield an unwanted result here
|
||||||
|
Check("common", "en.wikipedia.org"),
|
||||||
|
Check("adtrack", "google-analytics.com"),
|
||||||
|
Check("adtrack", "ad.doubleclick.net"),
|
||||||
|
# baidu goes here...?
|
||||||
|
Check("common", "naver.com"),
|
||||||
|
Check("common", "google.com"), # surely a fully-functional server would resolve the most popular domain in existence
|
||||||
|
]
|
||||||
|
|
||||||
|
unlikely_checks = [
|
||||||
|
Check("piracy", "thepiratebay.org"),
|
||||||
|
Check("porn", "xvideos.com"),
|
||||||
|
Check("usercontent","imgur.com"),
|
||||||
|
Check("usercontent","twitter.com"),
|
||||||
|
Check("usercontent","weibo.com"),
|
||||||
|
Check("usercontent","github.com"),
|
||||||
|
Check("porn", "chaturbate.com"),
|
||||||
|
Check("video", "bilibili.com"),
|
||||||
|
Check("video", "twitch.tv"),
|
||||||
|
Check("common", "qq.com"),
|
||||||
|
Check("video", "netflix.com"),
|
||||||
|
Check("usercontent","reddit.com"),
|
||||||
|
Check("usercontent","facebook.com"),
|
||||||
|
Check("video", "youtube.com"),
|
||||||
|
Check("usercontent","tumblr.com"),
|
||||||
|
Check("usercontent","wordpress.com"),
|
||||||
|
Check("common", "tmall.com"),
|
||||||
|
Check("usercontent","instagram.com"),
|
||||||
|
Check("news", "nytimes.com"),
|
||||||
|
Check("usercontent","flickr.com"),
|
||||||
|
Check("common", "ebay.com"),
|
||||||
|
Check("news", "scmp.com"),
|
||||||
|
Check("common", "aliexpress.com"),
|
||||||
|
Check("common", "stackoverflow.com"),
|
||||||
|
]
|
||||||
|
|
||||||
|
defunct = [
|
||||||
|
"panda.tv", # imochen.github.io
|
||||||
|
]
|
||||||
|
|
||||||
|
def _top1m_gen():
|
||||||
|
return (Check("top", entry)
|
||||||
|
for i, entry in retrieve_top1m_entries()
|
||||||
|
if entry not in defunct)
|
||||||
|
|
||||||
|
top100 = head(100, _top1m_gen())
|
||||||
|
top1000 = head(1000, _top1m_gen())
|
||||||
|
|
||||||
|
#__all__ = [
|
||||||
|
# "first_checks", "new_checks", "likely_checks", "unlikely_checks", "top100",
|
||||||
|
# "defunct",
|
||||||
|
#]
|
434
respodns/db.py
Normal file
434
respodns/db.py
Normal file
|
@ -0,0 +1,434 @@
|
||||||
|
import storm.locals as rain
|
||||||
|
import re
|
||||||
|
|
||||||
|
ipv4_pattern = re.compile("(\d+)\.(\d+)\.(\d+)\.(\d+)", re.ASCII)
|
||||||
|
|
||||||
|
def addr_to_int(ip):
|
||||||
|
match = ipv4_pattern.fullmatch(ip)
|
||||||
|
assert match is not None, row
|
||||||
|
segs = list(map(int, match.group(1, 2, 3, 4)))
|
||||||
|
assert all(0 <= seg <= 255 for seg in segs), match.group(0)
|
||||||
|
numeric = segs[0] << 24 | segs[1] << 16 | segs[2] << 8 | segs[3]
|
||||||
|
return numeric
|
||||||
|
|
||||||
|
create_table_statements = dict(
|
||||||
|
# TODO: Duration REAL GENERATED ALWAYS AS etc.?
|
||||||
|
executions="""
|
||||||
|
CREATE TABLE IF NOT EXISTS Executions (
|
||||||
|
ExecutionId INTEGER PRIMARY KEY,
|
||||||
|
StartDate DATE NOT NULL,
|
||||||
|
FinishDate DATE,
|
||||||
|
Completed BOOLEAN DEFAULT 0 NOT NULL)
|
||||||
|
""",
|
||||||
|
|
||||||
|
exceptions="""
|
||||||
|
CREATE TABLE IF NOT EXISTS Exceptions (
|
||||||
|
ExceptionId INTEGER PRIMARY KEY,
|
||||||
|
Name TEXT NOT NULL,
|
||||||
|
Fail BOOLEAN NOT NULL)
|
||||||
|
""",
|
||||||
|
|
||||||
|
ips="""
|
||||||
|
CREATE TABLE IF NOT EXISTS Ips (
|
||||||
|
IpId INTEGER PRIMARY KEY,
|
||||||
|
AsStr TEXT GENERATED ALWAYS AS (
|
||||||
|
Cast(AsInt >> 24 & 255 AS TEXT) || '.' ||
|
||||||
|
Cast(AsInt >> 16 & 255 AS TEXT) || '.' ||
|
||||||
|
Cast(AsInt >> 8 & 255 AS TEXT) || '.' ||
|
||||||
|
Cast(AsInt & 255 AS TEXT)
|
||||||
|
) STORED NOT NULL,
|
||||||
|
AsInt INTEGER UNIQUE CHECK(AsInt >= 0 AND AsInt < 1 << 32) NOT NULL,
|
||||||
|
China BOOLEAN DEFAULT 0 NOT NULL,
|
||||||
|
BlockTarget BOOLEAN DEFAULT 0 NOT NULL,
|
||||||
|
Server BOOLEAN DEFAULT 0 NOT NULL,
|
||||||
|
RedirectTarget BOOLEAN DEFAULT 0 NOT NULL,
|
||||||
|
GfwTarget BOOLEAN DEFAULT 0 NOT NULL)
|
||||||
|
""",
|
||||||
|
|
||||||
|
kinds="""
|
||||||
|
CREATE TABLE IF NOT EXISTS Kinds (
|
||||||
|
KindId INTEGER PRIMARY KEY,
|
||||||
|
Name TEXT UNIQUE NOT NULL,
|
||||||
|
ExpectExceptionId INTEGER,
|
||||||
|
FOREIGN KEY(ExpectExceptionId) REFERENCES Exceptions(ExceptionId))
|
||||||
|
""",
|
||||||
|
|
||||||
|
domains="""
|
||||||
|
CREATE TABLE IF NOT EXISTS Domains (
|
||||||
|
DomainId INTEGER PRIMARY KEY,
|
||||||
|
Name TEXT UNIQUE NOT NULL,
|
||||||
|
KindId INTEGER,
|
||||||
|
FOREIGN KEY(KindId) REFERENCES Kinds(KindId))
|
||||||
|
""",
|
||||||
|
|
||||||
|
# NOTE: that RecordId is *not* the rowid here
|
||||||
|
# since records can contain multiple IPs,
|
||||||
|
# and thereby span multiple rows.
|
||||||
|
# TODO: indexing stuff, cascade deletion stuff.
|
||||||
|
records="""
|
||||||
|
CREATE TABLE IF NOT EXISTS Records (
|
||||||
|
RecordId INTEGER NOT NULL,
|
||||||
|
IpId INTEGER,
|
||||||
|
FOREIGN KEY(IpId) REFERENCES Ips(IpId))
|
||||||
|
""",
|
||||||
|
|
||||||
|
messages="""
|
||||||
|
CREATE TABLE IF NOT EXISTS Messages (
|
||||||
|
MessageId INTEGER PRIMARY KEY,
|
||||||
|
ExecutionId INTEGER,
|
||||||
|
ServerId INTEGER NOT NULL,
|
||||||
|
DomainId INTEGER NOT NULL,
|
||||||
|
RecordId INTEGER,
|
||||||
|
ExceptionId INTEGER,
|
||||||
|
FOREIGN KEY(ServerId) REFERENCES Ips(IpId),
|
||||||
|
FOREIGN KEY(ExecutionId) REFERENCES Executions(ExecutionId),
|
||||||
|
FOREIGN KEY(DomainId) REFERENCES Domains(DomainId),
|
||||||
|
FOREIGN KEY(ExceptionId) REFERENCES Exceptions(ExceptionId))
|
||||||
|
""",
|
||||||
|
# this fails because RecordId is not UNIQUE:
|
||||||
|
# FOREIGN KEY(RecordId) REFERENCES Records(RecordId)
|
||||||
|
)
|
||||||
|
|
||||||
|
create_view_statements = [
|
||||||
|
"""
|
||||||
|
CREATE VIEW Results AS
|
||||||
|
SELECT
|
||||||
|
Messages.ExecutionId,
|
||||||
|
ServerIps.AsStr as Server,
|
||||||
|
Kinds.Name as Kind,
|
||||||
|
Domains.Name as Name,
|
||||||
|
RecordIps.AsStr as Address,
|
||||||
|
Exceptions.Name as Exception
|
||||||
|
FROM Messages
|
||||||
|
LEFT JOIN Domains ON Messages.DomainId = Domains.DomainId
|
||||||
|
LEFT JOIN Kinds ON Domains.KindId = Kinds.KindId
|
||||||
|
LEFT JOIN Ips AS ServerIps ON Messages.ServerId = ServerIps.IpId
|
||||||
|
LEFT JOIN Records ON Messages.RecordId = Records.RecordId
|
||||||
|
LEFT JOIN Ips as RecordIps ON Records.IpId = RecordIps.IpId
|
||||||
|
LEFT JOIN Exceptions ON Messages.ExceptionId = Exceptions.ExceptionId
|
||||||
|
-- GROUP BY Records.IpId
|
||||||
|
""",
|
||||||
|
]
|
||||||
|
|
||||||
|
table_triggers = dict(
|
||||||
|
messages=[
|
||||||
|
# TODO: more triggers. (before update, and also for Records table)
|
||||||
|
"""
|
||||||
|
CREATE TRIGGER IF NOT EXISTS RecordExists
|
||||||
|
BEFORE INSERT
|
||||||
|
ON Messages
|
||||||
|
BEGIN
|
||||||
|
SELECT CASE
|
||||||
|
WHEN NEW.RecordId NOTNULL AND NOT EXISTS(SELECT 1 FROM Records WHERE Records.RecordID = NEW.RecordId)
|
||||||
|
THEN raise(FAIL, "RecordId does not exist")
|
||||||
|
END;
|
||||||
|
END
|
||||||
|
""",
|
||||||
|
])
|
||||||
|
|
||||||
|
class Execution:
|
||||||
|
def __init__(self, db):
|
||||||
|
self.db = db
|
||||||
|
self.execution = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
from .util import right_now
|
||||||
|
self.execution = self.db.start_execution(right_now())
|
||||||
|
return self.execution
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_value, traceback):
|
||||||
|
from .util import right_now
|
||||||
|
completed = exc_type is None
|
||||||
|
self.db.finish_execution(self.execution, right_now(), completed)
|
||||||
|
|
||||||
|
class AttrCheck:
|
||||||
|
"""
|
||||||
|
Inheriting AttrCheck prevents accidentally setting attributes
|
||||||
|
that don't already exist.
|
||||||
|
"""
|
||||||
|
def __setattr__(self, name, value):
|
||||||
|
# NOTE: hasattr doesn't do what we want here. dir does.
|
||||||
|
if name.startswith("_") or name in dir(self):
|
||||||
|
super().__setattr__(name, value)
|
||||||
|
else:
|
||||||
|
raise AttributeError(name)
|
||||||
|
|
||||||
|
class TException(rain.Storm, AttrCheck):
|
||||||
|
__storm_table__ = "Exceptions"
|
||||||
|
exception_id = rain.Int("ExceptionId", primary=True)
|
||||||
|
name = rain.Unicode("Name")
|
||||||
|
fail = rain.Bool("Fail")
|
||||||
|
|
||||||
|
class TExecution(rain.Storm, AttrCheck):
|
||||||
|
__storm_table__ = "Executions"
|
||||||
|
execution_id = rain.Int("ExecutionId", primary=True)
|
||||||
|
start_date = rain.DateTime("StartDate")
|
||||||
|
finish_date = rain.DateTime("FinishDate")
|
||||||
|
completed = rain.Bool("Completed")
|
||||||
|
|
||||||
|
class TAddress(rain.Storm, AttrCheck):
|
||||||
|
__storm_table__ = "Ips"
|
||||||
|
address_id = rain.Int("IpId", primary=True)
|
||||||
|
str = rain.Unicode("AsStr")
|
||||||
|
ip = rain.Int("AsInt")
|
||||||
|
china = rain.Bool("China")
|
||||||
|
block_target = rain.Bool("BlockTarget")
|
||||||
|
server = rain.Bool("Server")
|
||||||
|
redirect_target = rain.Bool("RedirectTarget")
|
||||||
|
gfw_target = rain.Bool("GfwTarget")
|
||||||
|
|
||||||
|
class TKind(rain.Storm, AttrCheck):
|
||||||
|
__storm_table__ = "Kinds"
|
||||||
|
kind_id = rain.Int("KindId", primary=True)
|
||||||
|
name = rain.Unicode("Name")
|
||||||
|
xxid = rain.Int("ExpectExceptionId")
|
||||||
|
exception = rain.Reference(xxid, "TException.exception_id")
|
||||||
|
|
||||||
|
class TDomain(rain.Storm, AttrCheck):
|
||||||
|
__storm_table__ = "Domains"
|
||||||
|
domain_id = rain.Int("DomainId", primary=True)
|
||||||
|
name = rain.Unicode("Name")
|
||||||
|
kind_id = rain.Int("KindId")
|
||||||
|
kind = rain.Reference(kind_id, "TKind.kind_id")
|
||||||
|
|
||||||
|
class TRecord(rain.Storm, AttrCheck):
|
||||||
|
__storm_table__ = "Records"
|
||||||
|
row_id = rain.Int("rowid", primary=True)
|
||||||
|
record_id = rain.Int("RecordId")
|
||||||
|
address_id = rain.Int("IpId")
|
||||||
|
address = rain.Reference(address_id, "TAddress.address_id")
|
||||||
|
|
||||||
|
class TMessage(rain.Storm, AttrCheck):
|
||||||
|
__storm_table__ = "Messages"
|
||||||
|
message_id = rain.Int("MessageId", primary=True)
|
||||||
|
execution_id = rain.Int("ExecutionId")
|
||||||
|
server_id = rain.Int("ServerId")
|
||||||
|
domain_id = rain.Int("DomainId")
|
||||||
|
record_id = rain.Int("RecordId")
|
||||||
|
exception_id = rain.Int("ExceptionId")
|
||||||
|
execution = rain.Reference(execution_id, "TExecution.execution_id")
|
||||||
|
server = rain.Reference(server_id, "TAddress.address_id")
|
||||||
|
domain = rain.Reference(domain_id, "TDomain.domain_id")
|
||||||
|
#record = rain.Reference(record_id, "TRecord.record_id")
|
||||||
|
exception = rain.Reference(exception_id, "TException.exception_id")
|
||||||
|
|
||||||
|
def apply_properties(obj, d):
|
||||||
|
from storm.properties import PropertyColumn
|
||||||
|
for k, v in d.items():
|
||||||
|
ref = getattr(obj.__class__, k)
|
||||||
|
assert ref is not None, (type(obj), k)
|
||||||
|
assert isinstance(ref, PropertyColumn) or isinstance(ref, rain.Reference), \
|
||||||
|
(type(obj), k)
|
||||||
|
setattr(obj, k, v)
|
||||||
|
return obj
|
||||||
|
|
||||||
|
class RespoDB:
|
||||||
|
def __init__(self, uri, setup=False, create=False):
|
||||||
|
self.uri = uri
|
||||||
|
db_exists = self._db_exists(self.uri)
|
||||||
|
self.db = rain.create_database(self.uri)
|
||||||
|
self._conn = None
|
||||||
|
|
||||||
|
if setup or (create and not db_exists):
|
||||||
|
with self:
|
||||||
|
self.setup_executions()
|
||||||
|
self.setup_exceptions()
|
||||||
|
self.setup_ips()
|
||||||
|
self.setup_kinds()
|
||||||
|
self.setup_domains()
|
||||||
|
self.setup_records()
|
||||||
|
self.setup_messages()
|
||||||
|
|
||||||
|
for q in create_view_statements:
|
||||||
|
self._conn.execute(q, noresult=True)
|
||||||
|
assert setup or create or db_exists, "database was never setup"
|
||||||
|
|
||||||
|
self.execution = Execution(self)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _db_exists(uri):
|
||||||
|
from os.path import exists
|
||||||
|
_, _, fp = uri.partition(":")
|
||||||
|
if fp.startswith("//"):
|
||||||
|
_, _, fp = fp[2:].partition("/")
|
||||||
|
return fp and exists(fp)
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
self._conn = rain.Store(self.db)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_value, traceback):
|
||||||
|
self.commit()
|
||||||
|
self._conn.close()
|
||||||
|
self._conn = None
|
||||||
|
|
||||||
|
def find_one(self, cls_spec, *args, **kwargs):
|
||||||
|
assert self._conn is not None
|
||||||
|
return self._conn.find(cls_spec, *args, **kwargs).one()
|
||||||
|
|
||||||
|
def flush(self):
|
||||||
|
assert self._conn is not None
|
||||||
|
self._conn.flush()
|
||||||
|
|
||||||
|
def commit(self):
|
||||||
|
assert self._conn is not None
|
||||||
|
self._conn.commit()
|
||||||
|
|
||||||
|
def new_exception(self, **kwargs):
|
||||||
|
assert self._conn is not None
|
||||||
|
return self._conn.add(apply_properties(TException(), kwargs))
|
||||||
|
|
||||||
|
def new_kind(self, **kwargs):
|
||||||
|
assert self._conn is not None
|
||||||
|
return self._conn.add(apply_properties(TKind(), kwargs))
|
||||||
|
|
||||||
|
def new_domain(self, **kwargs):
|
||||||
|
assert self._conn is not None
|
||||||
|
return self._conn.add(apply_properties(TDomain(), kwargs))
|
||||||
|
|
||||||
|
def new_address(self, **kwargs):
|
||||||
|
assert self._conn is not None
|
||||||
|
return self._conn.add(apply_properties(TAddress(), kwargs))
|
||||||
|
|
||||||
|
def new_record(self, **kwargs):
|
||||||
|
assert self._conn is not None
|
||||||
|
return self._conn.add(apply_properties(TRecord(), kwargs))
|
||||||
|
|
||||||
|
def new_message(self, **kwargs):
|
||||||
|
assert self._conn is not None
|
||||||
|
return self._conn.add(apply_properties(TMessage(), kwargs))
|
||||||
|
|
||||||
|
def setup_executions(self):
|
||||||
|
self._conn.execute(create_table_statements["executions"], noresult=True)
|
||||||
|
|
||||||
|
def setup_exceptions(self):
|
||||||
|
# careful not to call them "errors" since NXDOMAIN is not an error.
|
||||||
|
self._conn.execute(create_table_statements["exceptions"], noresult=True)
|
||||||
|
|
||||||
|
# TODO: upsert?
|
||||||
|
|
||||||
|
self.new_exception(name="NXDOMAIN", fail=False)
|
||||||
|
self.new_exception(name="NoAnswer", fail=True)
|
||||||
|
self.new_exception(name="NoNameservers", fail=True)
|
||||||
|
self.new_exception(name="Timeout", fail=True)
|
||||||
|
|
||||||
|
def setup_ips(self):
|
||||||
|
from .ips import china, blocks
|
||||||
|
|
||||||
|
self._conn.execute(create_table_statements["ips"], noresult=True)
|
||||||
|
|
||||||
|
# TODO: upsert?
|
||||||
|
|
||||||
|
self.new_address(ip=addr_to_int("0.0.0.0"), block_target=True)
|
||||||
|
self.new_address(ip=addr_to_int("127.0.0.1"), block_target=True)
|
||||||
|
for ip in china:
|
||||||
|
self.new_address(ip=addr_to_int(ip), china=True)
|
||||||
|
for ip in blocks:
|
||||||
|
self.new_address(ip=addr_to_int(ip), block_target=True)
|
||||||
|
|
||||||
|
def setup_kinds(self):
|
||||||
|
self._conn.execute(create_table_statements["kinds"], noresult=True)
|
||||||
|
|
||||||
|
# TODO: upsert?
|
||||||
|
|
||||||
|
#NXDOMAIN = self.find_one(TException, TException.name == "NXDOMAIN")
|
||||||
|
#self.new_kind(name="bad", exception=NXDOMAIN)
|
||||||
|
#self.new_kind(name="badsub", exception=NXDOMAIN)
|
||||||
|
|
||||||
|
def setup_domains(self):
|
||||||
|
self._conn.execute(create_table_statements["domains"], noresult=True)
|
||||||
|
|
||||||
|
def setup_records(self):
|
||||||
|
self._conn.execute(create_table_statements["records"], noresult=True)
|
||||||
|
|
||||||
|
def setup_messages(self):
|
||||||
|
self._conn.execute(create_table_statements["messages"], noresult=True)
|
||||||
|
for trig in table_triggers["messages"]:
|
||||||
|
self._conn.execute(trig)
|
||||||
|
|
||||||
|
def start_execution(self, dt):
|
||||||
|
execution = TExecution()
|
||||||
|
execution.start_date = dt
|
||||||
|
self.flush()
|
||||||
|
return execution
|
||||||
|
|
||||||
|
def finish_execution(self, execution, dt, completed):
|
||||||
|
# TODO: fail if ExecutionId is missing?
|
||||||
|
execution.finish_date = dt
|
||||||
|
execution.completed = completed
|
||||||
|
self.flush()
|
||||||
|
|
||||||
|
def next_record_id(self):
|
||||||
|
from storm.expr import Add, Max, Coalesce
|
||||||
|
expr = Add(Coalesce(Max(TRecord.record_id), 0), 1)
|
||||||
|
return self.find_one(expr)
|
||||||
|
|
||||||
|
def find_record_id(self, addresses):
|
||||||
|
address_ids = list(address.address_id for address in addresses)
|
||||||
|
record_ids = list(self._conn.find(TRecord, TRecord.address_id.is_in(address_ids)).values(TRecord.record_id))
|
||||||
|
if not record_ids:
|
||||||
|
return None
|
||||||
|
unique_ids = sorted(set(record_ids))
|
||||||
|
for needle in unique_ids:
|
||||||
|
if sum(1 for id in record_ids if id == needle) == len(addresses):
|
||||||
|
found = True
|
||||||
|
return needle
|
||||||
|
return None
|
||||||
|
|
||||||
|
def push_entry(self, entry):
|
||||||
|
kind = self.find_one(TKind, TKind.name == entry.kind)
|
||||||
|
if not kind:
|
||||||
|
kind = self.new_kind(name=entry.kind)
|
||||||
|
if entry.kind.startswith("bad"):
|
||||||
|
exception = self.find_one(TException, TException.name == "NXDOMAIN")
|
||||||
|
assert exception is not None
|
||||||
|
kind.exception = exception
|
||||||
|
|
||||||
|
domain = self.find_one(TDomain, TDomain.name == entry.domain)
|
||||||
|
if not domain:
|
||||||
|
domain = self.new_domain(name=entry.domain)
|
||||||
|
domain.kind = kind
|
||||||
|
|
||||||
|
addresses = []
|
||||||
|
as_ints = sorted(set(map(addr_to_int, entry.addrs)))
|
||||||
|
for numeric in as_ints:
|
||||||
|
address = self.find_one(TAddress, TAddress.ip == numeric)
|
||||||
|
if not address:
|
||||||
|
address = self.new_address(ip=numeric)
|
||||||
|
addresses.append(address)
|
||||||
|
|
||||||
|
for address in addresses:
|
||||||
|
if entry.reason == "block":
|
||||||
|
address.block_target = True
|
||||||
|
elif entry.reason == "redirect":
|
||||||
|
address.redirect_target = True
|
||||||
|
elif entry.reason == "gfw":
|
||||||
|
address.gfw_target = True
|
||||||
|
|
||||||
|
if addresses:
|
||||||
|
record_id = self.find_record_id(addresses)
|
||||||
|
if record_id is None:
|
||||||
|
record_id = self.next_record_id()
|
||||||
|
for address in addresses:
|
||||||
|
self.new_record(record_id=record_id, address=address)
|
||||||
|
else:
|
||||||
|
record_id = None
|
||||||
|
|
||||||
|
numeric = addr_to_int(entry.server)
|
||||||
|
server = self.find_one(TAddress, TAddress.ip == numeric)
|
||||||
|
if not server:
|
||||||
|
server = self.new_address(ip=numeric)
|
||||||
|
self.flush()
|
||||||
|
server.server = True
|
||||||
|
|
||||||
|
if entry.exception:
|
||||||
|
exception = self.find_one(TException, TException.name == entry.exception)
|
||||||
|
assert exception is not None
|
||||||
|
else:
|
||||||
|
exception = None
|
||||||
|
|
||||||
|
message = self.new_message(
|
||||||
|
execution=entry.execution,
|
||||||
|
server=server, domain=domain,
|
||||||
|
record_id=record_id, exception=exception)
|
||||||
|
self.flush()
|
67
respodns/ips.py
Normal file
67
respodns/ips.py
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
# known IPs of any DNS located in China:
|
||||||
|
china = {
|
||||||
|
"1.1.8.8",
|
||||||
|
"1.1.8.9",
|
||||||
|
"1.2.4.8",
|
||||||
|
"1.8.1.8",
|
||||||
|
"1.8.8.8",
|
||||||
|
"114.254.201.131",
|
||||||
|
"218.107.55.108",
|
||||||
|
"222.216.2.236",
|
||||||
|
}
|
||||||
|
|
||||||
|
# known IPs (not servers) that are used to deny access to websites:
|
||||||
|
blocks = {
|
||||||
|
"1.2.3.4", # timeout
|
||||||
|
"54.242.237.204", # fake
|
||||||
|
"93.158.134.250", # fake
|
||||||
|
"114.6.128.8", # fake
|
||||||
|
"118.97.116.27", # fake
|
||||||
|
"119.235.29.59", # fake
|
||||||
|
"124.40.255.99", # fake
|
||||||
|
"146.112.61.106", # fake
|
||||||
|
"156.154.113.17", # fake
|
||||||
|
"156.154.175.30", # fake
|
||||||
|
"156.154.175.215", # fake
|
||||||
|
"156.154.175.216", # fake
|
||||||
|
"156.154.175.221", # fake
|
||||||
|
"163.28.10.160", # fake
|
||||||
|
"175.139.142.25", # fake
|
||||||
|
"176.103.130.135", # fake
|
||||||
|
"182.93.64.126", # fake
|
||||||
|
"192.99.140.48", # fake
|
||||||
|
"195.175.254.2", # fake
|
||||||
|
"202.40.187.91", # fake
|
||||||
|
"202.162.209.133", # fake
|
||||||
|
"202.165.36.253", # fake
|
||||||
|
"203.119.13.75", # fake
|
||||||
|
"203.119.13.76", # fake
|
||||||
|
"203.190.55.217", # fake
|
||||||
|
}
|
||||||
|
|
||||||
|
bogon_checks = [
|
||||||
|
"0.",
|
||||||
|
"10.",
|
||||||
|
"127.",
|
||||||
|
"169.254.",
|
||||||
|
"192.0.0.",
|
||||||
|
"192.0.2.",
|
||||||
|
"192.168.",
|
||||||
|
"198.18.",
|
||||||
|
"198.19.",
|
||||||
|
"198.51.100.",
|
||||||
|
"203.0.113.",
|
||||||
|
] + [
|
||||||
|
"100.{}.".format(i) for i in range(64, 128)
|
||||||
|
] + [
|
||||||
|
"172.{}.".format(i) for i in range(16, 32)
|
||||||
|
] + [
|
||||||
|
"{}.".format(i) for i in range(224, 256)
|
||||||
|
]
|
||||||
|
|
||||||
|
def is_bogon(ip):
|
||||||
|
return any(ip.startswith(check) for check in bogon_checks)
|
||||||
|
|
||||||
|
def ipkey(ip_string):
|
||||||
|
segs = [int(s) for s in ip_string.replace(":", ".").split(".")]
|
||||||
|
return sum(256**(3 - i) * seg for i, seg in enumerate(segs))
|
8
respodns/nonsense.py
Normal file
8
respodns/nonsense.py
Normal file
|
@ -0,0 +1,8 @@
|
||||||
|
from random import choice, choices, Random
|
||||||
|
from string import ascii_lowercase
|
||||||
|
from zlib import crc32
|
||||||
|
|
||||||
|
def nonsense_consistent(domain):
|
||||||
|
rng = Random(crc32(domain.encode("utf-8")))
|
||||||
|
length = rng.choices((9, 10, 11, 12), (4, 5, 3, 2))[0]
|
||||||
|
return "".join(rng.choice(ascii_lowercase) for i in range(length))
|
48
respodns/pooler.py
Normal file
48
respodns/pooler.py
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
from types import CoroutineType
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
# TODO: write a less confusing interface that allows the code to be written more flatly.
|
||||||
|
# maybe like: async for done in apply(doit, [tuple_of_args]):
|
||||||
|
|
||||||
|
def make_pooler(pool_size, finisher=None):
|
||||||
|
aws = set()
|
||||||
|
async def pooler(item=None):
|
||||||
|
nonlocal aws
|
||||||
|
finish = item is None
|
||||||
|
if not finish:
|
||||||
|
if isinstance(item, CoroutineType):
|
||||||
|
assert not isinstance(item, asyncio.Task)
|
||||||
|
item = asyncio.create_task(item)
|
||||||
|
aws.add(item)
|
||||||
|
# TODO: don't wait until all completed, just first completed in loop.
|
||||||
|
# that way we can handle each done task ASAP.
|
||||||
|
condition = asyncio.ALL_COMPLETED if finish else asyncio.FIRST_COMPLETED
|
||||||
|
if len(aws) == 0:
|
||||||
|
return None
|
||||||
|
if finish or len(aws) >= pool_size:
|
||||||
|
done, pending = await asyncio.wait(aws, return_when=condition)
|
||||||
|
#pending = set(task for task in pending if task in aws) # ???
|
||||||
|
ret = None if finisher is None else finisher(done, pending)
|
||||||
|
#aws = set(task for task in pending if not task.cancelled())
|
||||||
|
aws = pending
|
||||||
|
if ret is not None:
|
||||||
|
return ret
|
||||||
|
return None
|
||||||
|
return pooler
|
||||||
|
|
||||||
|
def make_simple_pooler(pool_size, finisher=None):
|
||||||
|
condition = asyncio.FIRST_COMPLETED
|
||||||
|
pending = set()
|
||||||
|
async def pooler(item=None):
|
||||||
|
nonlocal pending
|
||||||
|
finish = item is None
|
||||||
|
if not finish:
|
||||||
|
if isinstance(item, CoroutineType):
|
||||||
|
assert not isinstance(item, asyncio.Task)
|
||||||
|
item = asyncio.create_task(item)
|
||||||
|
pending.add(item)
|
||||||
|
desired_size = 0 if finish else pool_size - 1
|
||||||
|
while len(pending) > desired_size:
|
||||||
|
done, pending = await asyncio.wait(pending, return_when=condition)
|
||||||
|
finisher(done, pending)
|
||||||
|
return pooler
|
32
respodns/structs.py
Normal file
32
respodns/structs.py
Normal file
|
@ -0,0 +1,32 @@
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Options:
|
||||||
|
#exec_id: int = -1
|
||||||
|
execution: object = None
|
||||||
|
|
||||||
|
ip_simul: int = 10 # how many IPs to connect to at once
|
||||||
|
domain_simul: int = 3 # how many domains per IP to request at once
|
||||||
|
|
||||||
|
ip_wait: float = 0.15
|
||||||
|
domain_wait: float = 0.25
|
||||||
|
|
||||||
|
impatient: bool = False # reduce retries and times for timeouts
|
||||||
|
early_stopping: bool = True # stop at the first invalid domain (best with dry)
|
||||||
|
dry: bool = True # don't write anything to database
|
||||||
|
progress: bool = False
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Entry:
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
date: datetime
|
||||||
|
success: bool
|
||||||
|
server: str
|
||||||
|
kind: str
|
||||||
|
domain: str
|
||||||
|
exception: str
|
||||||
|
addrs: list # list of strings
|
||||||
|
reason: str
|
||||||
|
#exec_id: int
|
||||||
|
execution: object
|
52
respodns/top1m.py
Normal file
52
respodns/top1m.py
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
urltop_default = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
|
||||||
|
csvfn_default = "top-1m.csv"
|
||||||
|
|
||||||
|
one_week = 7 * 24 * 60 * 60 # in seconds
|
||||||
|
|
||||||
|
def alive(fp, expire):
|
||||||
|
from os.path import exists, getmtime, getsize
|
||||||
|
from time import time
|
||||||
|
|
||||||
|
return exists(fp) and time() < getmtime(fp) + one_week and getsize(fp) > 2
|
||||||
|
|
||||||
|
def download_top1m(urltop=None, csvfn=None):
|
||||||
|
from io import BytesIO
|
||||||
|
from urllib.request import urlopen
|
||||||
|
from zipfile import ZipFile
|
||||||
|
|
||||||
|
if urltop is None:
|
||||||
|
urltop = urltop_default
|
||||||
|
if csvfn is None:
|
||||||
|
csvfn = csvfn_default
|
||||||
|
|
||||||
|
comp = BytesIO()
|
||||||
|
with urlopen(urltop) as re:
|
||||||
|
comp.write(re.read())
|
||||||
|
|
||||||
|
with ZipFile(comp) as zipf:
|
||||||
|
with zipf.open(csvfn) as f:
|
||||||
|
uncomp = f.read()
|
||||||
|
comp.close()
|
||||||
|
|
||||||
|
return uncomp.decode("utf-8")
|
||||||
|
|
||||||
|
def retrieve_top1m_entries(csv_fp="top-1m.csv"):
|
||||||
|
from sys import stderr
|
||||||
|
|
||||||
|
if alive(csv_fp, one_week):
|
||||||
|
with open(csv_fp, "r") as f:
|
||||||
|
uncomp = f.read()
|
||||||
|
else:
|
||||||
|
print("downloading", csv_fp, file=stderr)
|
||||||
|
uncomp = download_top1m()
|
||||||
|
with open(csv_fp, "w") as f:
|
||||||
|
f.write(uncomp)
|
||||||
|
|
||||||
|
# we could use the csv module, but this is totally overkill
|
||||||
|
# for data that *should* be just a subset of ascii.
|
||||||
|
lines = uncomp.splitlines()
|
||||||
|
entries = [(lambda a: (int(a[0]), a[2]))(line.partition(","))
|
||||||
|
for line in lines]
|
||||||
|
return entries
|
||||||
|
|
||||||
|
top1m = download_top1m
|
71
respodns/util.py
Normal file
71
respodns/util.py
Normal file
|
@ -0,0 +1,71 @@
|
||||||
|
def right_now():
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
return datetime.now(timezone.utc)
|
||||||
|
|
||||||
|
def detect_gfw(r, ip, check):
|
||||||
|
# attempt to detect interference from the Great Firewall of China.
|
||||||
|
#from .ips import china
|
||||||
|
#if r in china: return True
|
||||||
|
|
||||||
|
# class D or class E, neither of which are correct for a (public?) DNS.
|
||||||
|
#if int(r.partition(".")[0]) >= 224: return True
|
||||||
|
|
||||||
|
rs = lambda prefix: r.startswith(prefix)
|
||||||
|
de = lambda suffix: check.domain.endswith(suffix)
|
||||||
|
hosted = de("facebook.com") or de("instagram.com") or de("whatsapp.com")
|
||||||
|
if rs("31.13.") and not hosted: return True
|
||||||
|
if rs("66.220."): return True
|
||||||
|
if rs("69.63."): return True
|
||||||
|
if rs("69.171.") and not rs("69.171.250."): return True
|
||||||
|
if rs("74.86."): return True
|
||||||
|
if rs("75.126."): return True
|
||||||
|
if r == "64.13.192.74": return True
|
||||||
|
# more non-facebook GFW stuff:
|
||||||
|
# 31.13.64.33
|
||||||
|
# 31.13.70.1
|
||||||
|
# 31.13.70.20
|
||||||
|
# 31.13.76.16
|
||||||
|
# 31.13.86.1
|
||||||
|
# 173.252.110.21
|
||||||
|
# 192.99.140.48
|
||||||
|
# 199.16.156.40
|
||||||
|
# 199.16.158.190
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
async def getaddrs(server, domain, opts):
|
||||||
|
from dns.asyncresolver import Resolver
|
||||||
|
from dns.exception import Timeout
|
||||||
|
from dns.resolver import NXDOMAIN, NoAnswer, NoNameservers
|
||||||
|
#from dns.resolver import Resolver
|
||||||
|
from .ips import ipkey
|
||||||
|
|
||||||
|
res = Resolver(configure=False)
|
||||||
|
if opts.impatient:
|
||||||
|
res.timeout = 5
|
||||||
|
res.lifetime = 2
|
||||||
|
res.nameservers = [server]
|
||||||
|
try:
|
||||||
|
#ans = res.resolve(domain, "A", search=False)
|
||||||
|
ans = await res.resolve(domain, "A", search=False)
|
||||||
|
except NXDOMAIN:
|
||||||
|
return ["NXDOMAIN"]
|
||||||
|
except NoAnswer:
|
||||||
|
return ["NoAnswer"]
|
||||||
|
except NoNameservers:
|
||||||
|
return ["NoNameservers"]
|
||||||
|
except Timeout:
|
||||||
|
return ["Timeout"]
|
||||||
|
#return list(set(rr.address for rr in ans.rrset))
|
||||||
|
return sorted(set(rr.address for rr in ans.rrset), key=ipkey)
|
||||||
|
|
||||||
|
def read_ips(f):
|
||||||
|
# TODO: make async and more robust. (regex pls)
|
||||||
|
# TODO: does readlines() block if the pipe is left open i.e. user input?
|
||||||
|
for ip in f.readlines():
|
||||||
|
if "#" in ip:
|
||||||
|
ip, _, _ = ip.partition("#")
|
||||||
|
ip = ip.strip()
|
||||||
|
if ip.count(".") != 3:
|
||||||
|
continue
|
||||||
|
yield ip
|
31
setup.py
Normal file
31
setup.py
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
from setuptools import setup
|
||||||
|
|
||||||
|
setup(
|
||||||
|
name='respodns',
|
||||||
|
version='0.1.0',
|
||||||
|
packages=[
|
||||||
|
'respodns',
|
||||||
|
],
|
||||||
|
|
||||||
|
author='notwa',
|
||||||
|
author_email='cloningdonor+pypi@gmail.com',
|
||||||
|
url='https://github.com/notwa/respodns',
|
||||||
|
keywords='TODO',
|
||||||
|
description='DNS logger',
|
||||||
|
license='MIT',
|
||||||
|
zip_safe=True,
|
||||||
|
|
||||||
|
classifiers=[
|
||||||
|
'Development Status :: 4 - Beta',
|
||||||
|
'Environment :: Console',
|
||||||
|
'Intended Audience :: System Administrators',
|
||||||
|
'License :: OSI Approved :: ISC License (ISCL)',
|
||||||
|
'Natural Language :: English',
|
||||||
|
'Operating System :: OS Independent',
|
||||||
|
'Programming Language :: Python :: 3.7',
|
||||||
|
'Programming Language :: SQL',
|
||||||
|
'Topic :: Internet :: Name Service (DNS)',
|
||||||
|
'Topic :: System :: Logging',
|
||||||
|
'Topic :: Utilities',
|
||||||
|
]
|
||||||
|
)
|
Loading…
Add table
Reference in a new issue