use an alternative to the now-defunct Alexa top-1m
This commit is contained in:
parent
e9a36ed397
commit
c95b6ba0e1
|
@ -1,4 +1,5 @@
|
||||||
urltop_default = "http://s3.amazonaws.com/alexa-static/top-1m.csv.zip"
|
# https://github.com/PeterDaveHello/top-1m-domains
|
||||||
|
urltop_default = "https://s3-us-west-1.amazonaws.com/umbrella-static/top-1m.csv.zip"
|
||||||
csvfn_default = "top-1m.csv" # path within the zip file
|
csvfn_default = "top-1m.csv" # path within the zip file
|
||||||
|
|
||||||
one_week = 7 * 24 * 60 * 60 # in seconds
|
one_week = 7 * 24 * 60 * 60 # in seconds
|
||||||
|
@ -51,5 +52,5 @@ def retrieve_top1m_entries(csv_fp="top-1m.csv"):
|
||||||
# for data that *should* be just a subset of ascii.
|
# for data that *should* be just a subset of ascii.
|
||||||
lines = uncomp.splitlines()
|
lines = uncomp.splitlines()
|
||||||
entries = [(lambda a: (int(a[0]), a[2]))(line.partition(","))
|
entries = [(lambda a: (int(a[0]), a[2]))(line.partition(","))
|
||||||
for line in lines]
|
for line in lines if line]
|
||||||
return entries
|
return entries
|
||||||
|
|
Loading…
Reference in New Issue
Block a user