meow.sh/meow.sh

#!/usr/bin/env bash
SEP=$'\t'
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)

URL_SEARCH="${URL_SEARCH:-https://www.nyaa.se/}"
URL_DOWNLOAD="${URL_DOWNLOAD:-https://www.nyaa.se/?page=download&tid=}"

# all timestamps are given in seconds since the epoch
declare -A searchquery
declare -A searchregex

die() {
    echo -E "$@" >&2
    exit 1
}

retrieve() {
    ${curl[@]} -G --data-urlencode "term=[$1]" -d page=rss "$URL_SEARCH"
}

nullcheck() { # {query}
    [[ -n "$1" ]] || die "Null search query";
}

sanitize() {
    sed -e 's/[^0-9a-zA-Z_]/_/g'
}

splittags() { # {tag}
    awk -v tag="$1" -f "$SRCDIR/splittags.awk"
}

scrape() {
    TZ=UTC0 awk -v sep="$SEP" -f "$SRCDIR/scrape.awk"
}

watch() { # {group name} [regex...]
    declare -a regexes
    local query="$1"
    shift
    for regex; do
        regexes+=("^\[$query\].*$regex")
    done
    watchany "$query" "${regexes[@]}"
}

watchany() { # {search query} [regex...]
    nullcheck "$1"
    local gs="$(sanitize<<<"$1")" regex=
    searchquery[$gs]="$1"
    shift
    for regex; do
        searchregex[$gs]+="|($regex)"
    done
}

search() {
    nullcheck "$1"
    retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape
    [ ${PIPESTATUS[0]} = 0 ] || die "Failed to search for $1"
}

searchfilter() { # database regex [timestamp]
    while read -r; do
        IFS=$SEP read -r time tid title <<< "$REPLY"
        [ "$time" -gt "${3:-0}" ] \
        && grep -qP "$2" <<< "$title" \
        && echo -E "$REPLY"
    done < "$1"
}

searchfilter_fast() { # {database} {regex}
    declare -A matched
    while read -r; do
         matched["$REPLY"]=1
    done < <(cut -f3- "$1" | grep -nP "$2" | grep -Eo '^[^:]+')
    n=0
    while read -r; do
        ((n++))
        [ "${matched[$n]:-0}" -eq 1 ] && echo "$REPLY"
    done < "$1"
}

runfilter() { # {action} [database]
    declare -A already
    local action="${1:-echo}"
    local mark="$action.txt"
    local db="${2:-db.txt}"
    local ret=0

    touch "$mark"
    while IFS=$SEP read -r tid time; do
        already["$tid"]="$time"
    done < "$mark"

    now="$(date +%s)"
    while IFS=$SEP read -r time tid title; do
        [ -n "${already[$tid]}" ] || {
            $action $time $tid "$title" && already[$tid]="$now"
        } || {
            echo "[meow.sh] failed to run $action" >&2
            echo "[meow.sh] torrent title: $title" >&2
            echo "[meow.sh] torrent id: $tid" >&2
            ret=1
            break
        }
    done < <(for regex in "${searchregex[@]}"; do
        searchfilter_fast "$db" "${regex:1}"
    done)

    rm "$mark"
    for tid in "${!already[@]}"; do
        echo "$tid$SEP${already[$tid]}" >> "$mark"
    done

    return "$ret"
}

runsearch() { # [database]
    local db="${1:-db.txt}"
    local tmp=`mktemp`
    touch "$db"

    for q in "${!searchquery[@]}"; do
        search "${searchquery[$q]}" \
        | while IFS=$SEP read -r title torrent time; do
            local tid="${torrent##*=}"
            echo -E "$time$SEP$tid$SEP$title"
        done
    done | sort -n -- "$db" - | uniq > $tmp

    fs_old="$(du -b "$db" | cut -f1)"
    fs_new="$(du -b $tmp | cut -f1)"
    [ "$fs_new" -ge "$fs_old" ] || die "new database is smaller than current!"
    mv $tmp "$db"
}