From c95b6ba0e1c8dbd78c08fd099421553124c3bd41 Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Wed, 8 Nov 2023 05:53:08 -0800 Subject: [PATCH] use an alternative to the now-defunct Alexa top-1m --- respodns/top1m.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/respodns/top1m.py b/respodns/top1m.py index c68c8e2..ef37a17 100644 --- a/respodns/top1m.py +++ b/respodns/top1m.py @@ -1,4 +1,5 @@ -urltop_default = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip" +# https://github.com/PeterDaveHello/top-1m-domains +urltop_default = "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip" csvfn_default = "top-1m.csv" # path within the zip file one_week = 7 * 24 * 60 * 60 # in seconds @@ -51,5 +52,5 @@ def retrieve_top1m_entries(csv_fp="top-1m.csv"): # for data that *should* be just a subset of ascii. lines = uncomp.splitlines() entries = [(lambda a: (int(a[0]), a[2]))(line.partition(",")) - for line in lines] + for line in lines if line] return entries