split searching from filtering, log searches

2014-03-15 11:56:56 -07:00 · 2014-03-15 11:56:56 -07:00 · 4fd2dfcee6
parent 027a8db2e1
commit 4fd2dfcee6
3 changed files with 54 additions and 37 deletions
--- a/meow.sh
+++ b/meow.sh
@ -1,11 +1,11 @@
 #!/usr/bin/env bash
-SEP=$'\1'
+SEP=$'\t'
 curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
 # all timestamps are given in seconds since the epoch
-declare -A groupinsane # unsanitized group names
+declare -A searchquery
-declare -A groupregex
+declare -A searchregex
-declare -A grouptime # last seen release
+declare -A searchtime # last seen release
 die() {
    echo -E "$@" >&2
@ -17,8 +17,8 @@ retrieve() {
      "http://www.nyaa.se/"
 }
-nullcheck() { # {group name}
+nullcheck() { # {query}
-    [[ -n "$1" ]] || die "Null group name";
+    [[ -n "$1" ]] || die "Null search query";
 }
 sanitize() {
@ -29,67 +29,82 @@ splittags() { # {tag}
    awk -v tag="$1" -f "$SRCDIR/splittags.awk"
 }
-scrape() { # {group name} {timestamp}
+scrape() {
-    TZ=UTC0 awk -v g="$1" -v ts="${2:-0}" -v sep="$SEP" -f "$SRCDIR/scrape.awk"
+    TZ=UTC0 awk -v sep="$SEP" -f "$SRCDIR/scrape.awk"
 }
-watch() { # {group name} [regex...]
+watch() { # {search query} [regex...]
    nullcheck "$1"
    local gs="$(sanitize<<<"$1")" regex=
-    groupinsane[$gs]="$1"
+    searchquery[$gs]="$1"
    shift
    for regex; do
-        groupregex[$gs]+="|($regex)"
+        searchregex[$gs]+="|($regex)"
    done
 }
-touchgroup() { # {group name} {timestamp}
+touchquery() { # {search query} {timestamp}
    nullcheck "$1"
    local gs="$(sanitize<<<"$1")"
-    grouptime[$gs]="$2"
+    searchtime[$gs]="$2"
 }
-groupreleases() { # groupname [timestamp]
+search() {
    nullcheck "$1"
-    retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape "$1" "${2:-}"
+    retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape
-    [ ${PIPESTATUS[0]} = 0 ] || die "Failed to retrieve releases for $1"
+    [ ${PIPESTATUS[0]} = 0 ] || die "Failed to search for $1"
 }
-groupfilter() { # groupname regex [timestamp]
+searchfilter() { # key regex [timestamp]
-    groupreleases "$1" "${3:-}" | while IFS=$SEP read -r title etc; do
+    while IFS=$SEP read -r title etc; do
        grep -P "$2" <<< "$title" >/dev/null && echo -E "$title$SEP$etc"
-    done
+    done < db.txt
    [ ${PIPESTATUS[0]} = 0 ] || exit 1
 }
 cleanup() {
    local gs= v=
-    for gs in "${!grouptime[@]}"; do
+    for gs in "${!searchtime[@]}"; do
-        v="${grouptime[$gs]}"
+        v="${searchtime[$gs]}"
-        echo -E "touchgroup $gs $v" >> times.sh
+        echo -E "touchquery $gs $v" >> times.sh
        [ -e "$gs.xml" ] && rm "$gs.xml"
    done
    exit ${1:-1}
 }
-rungroup() {
+runfilter() {
-    local insane= regex= timestamp= res= _= recent=
+    local query= regex= timestamp= res= _= recent=
-    insane="${groupinsane[$1]}"
+    query="${searchquery[$1]}"
-    regex="${groupregex[$1]:1}"
+    regex="${searchregex[$1]:1}" # exclude first | character
-    timestamp="${grouptime[$1]}"
+    timestamp="${searchtime[$1]}"
-    res="$(groupfilter "$insane" "$regex" "$timestamp")"
+    res="$(searchfilter "$query" "$regex" "$timestamp")"
    [ $? = 0 ] || return $?
    IFS=$SEP read -r _ _ recent <<< "$res"
    [ -n "$recent" ] && {
-        grouptime[$1]="$recent"
+        searchtime[$1]="$recent"
        echo -E "$res"
    }
    return 0
 }
 runsearch() { # [database]
    local db="${1:-db.txt}"
    local tmp=`mktemp`
    touch "$db"
    for q in "${!searchquery[@]}"; do
        search "${searchquery[$q]}" \
        | while IFS=$SEP read -r title torrent time; do
            echo -E "$time$SEP$q$SEP$title$SEP$torrent"
        done
    done | sort -n -- "$db" - | uniq > $tmp
    # TODO: don't accidentally overwrite $db with something blank/incomplete
    #       maybe check if filesize has decreased and die if so
    mv $tmp "$db"
 }
 runall() {
    trap cleanup INT
    local ret=0 gs=
-    for gs in "${!groupregex[@]}"; do rungroup "$gs" || ret=1; done
+    for gs in "${!searchregex[@]}"; do runfilter "$gs" || ret=1; done
    cleanup $ret
 }
--- a/11
+++ b/11
@ -36,6 +36,11 @@ runactions() {
 . "$SRCDIR/meow.sh"
 . config.sh
-[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; }
+
-runall | runactions
+#[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; }
-exit ${PIPESTATUS[0]}
+#runall | runactions
 #exit ${PIPESTATUS[0]}
 runsearch
 exit 0
--- a/scrape.awk
+++ b/scrape.awk
@ -22,16 +22,13 @@ BEGIN{
    # http://stackoverflow.com/a/2123002
    m=split("Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec",d,"|")
    for(o=1;o<=m;o++) months[d[o]]=sprintf("%02d",o)
    glen=length(g)
 }
 function run(str) {
    title=unescape(opentag(str, "title"))
-    if (substr(title,1,glen+2) != "["g"]") return
+    pub=unescape(opentag(str, "pubDate"))
    pub=unescape(opentag($0, "pubDate"))
 #   "date -d \""pub "\" +%s" | getline pubunix
    pubunix=hotdate(pub)
    if (pubunix <= ts) return
    torrent=unescape(opentag(str, "link"))
    print title sep torrent sep pubunix
 }