serious optimization
moral of today's commit is: don't fork grep for each line of a 5000-line file. this reduces run time (in my case) from 20 minutes to 15 seconds.
This commit is contained in:
parent
3316151b40
commit
dc1988adda
18
meow.sh
18
meow.sh
|
@ -2,8 +2,8 @@
|
||||||
SEP=$'\t'
|
SEP=$'\t'
|
||||||
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
|
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
|
||||||
|
|
||||||
URL_SEARCH='http://www.nyaa.se/'
|
URL_SEARCH="${URL_SEARCH:-http://www.nyaa.se/}"
|
||||||
URL_DOWNLOAD='http://www.nyaa.se/?page=download&tid='
|
URL_DOWNLOAD="${URL_DOWNLOAD:-http://www.nyaa.se/?page=download&tid=}"
|
||||||
|
|
||||||
# all timestamps are given in seconds since the epoch
|
# all timestamps are given in seconds since the epoch
|
||||||
declare -A searchquery
|
declare -A searchquery
|
||||||
|
@ -69,6 +69,18 @@ searchfilter() { # database regex [timestamp]
|
||||||
done < "$1"
|
done < "$1"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
searchfilter_fast() { # {database} {regex}
|
||||||
|
declare -A matched
|
||||||
|
while read -r; do
|
||||||
|
matched["$REPLY"]=1
|
||||||
|
done < <(cut -f3- "$1" | grep -nP "$2" | grep -Eo '^[^:]+')
|
||||||
|
n=0
|
||||||
|
while read -r; do
|
||||||
|
((n++))
|
||||||
|
[ "${matched[$n]:-0}" -eq 1 ] && echo "$REPLY"
|
||||||
|
done < "$1"
|
||||||
|
}
|
||||||
|
|
||||||
runfilter() { # {action} [database]
|
runfilter() { # {action} [database]
|
||||||
declare -A already
|
declare -A already
|
||||||
local action="${1:-echo}"
|
local action="${1:-echo}"
|
||||||
|
@ -93,7 +105,7 @@ runfilter() { # {action} [database]
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
done < <(for regex in "${searchregex[@]}"; do
|
done < <(for regex in "${searchregex[@]}"; do
|
||||||
searchfilter "$db" "${regex:1}"
|
searchfilter_fast "$db" "${regex:1}"
|
||||||
done)
|
done)
|
||||||
|
|
||||||
rm "$mark"
|
rm "$mark"
|
||||||
|
|
Loading…
Reference in New Issue
Block a user