serious optimization

moral of today's commit is: don't fork grep for each line of a 5000-line file. this reduces run time (in my case) from 20 minutes to 15 seconds.
2014-10-07 20:06:07 -07:00 · 2014-10-07 20:06:07 -07:00 · dc1988adda
parent 3316151b40
commit dc1988adda
1 changed files with 15 additions and 3 deletions
--- a/meow.sh
+++ b/meow.sh
@ -2,8 +2,8 @@
 SEP=$'\t'
 curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)

-URL_SEARCH='http://www.nyaa.se/'
-URL_DOWNLOAD='http://www.nyaa.se/?page=download&tid='
+URL_SEARCH="${URL_SEARCH:-http://www.nyaa.se/}"
+URL_DOWNLOAD="${URL_DOWNLOAD:-http://www.nyaa.se/?page=download&tid=}"

 # all timestamps are given in seconds since the epoch
 declare -A searchquery
@ -69,6 +69,18 @@ searchfilter() { # database regex [timestamp]
    done < "$1"
 }

+searchfilter_fast() { # {database} {regex}
+    declare -A matched
+    while read -r; do
+         matched["$REPLY"]=1
+    done < <(cut -f3- "$1" | grep -nP "$2" | grep -Eo '^[^:]+')
+    n=0
+    while read -r; do
+        ((n++))
+        [ "${matched[$n]:-0}" -eq 1 ] && echo "$REPLY"
+    done < "$1"
+}
+
 runfilter() { # {action} [database]
    declare -A already
    local action="${1:-echo}"
@ -93,7 +105,7 @@ runfilter() { # {action} [database]
            break
        }
    done < <(for regex in "${searchregex[@]}"; do
-        searchfilter "$db" "${regex:1}"
+        searchfilter_fast "$db" "${regex:1}"
    done)

    rm "$mark"