137 lines
3.2 KiB
Bash
137 lines
3.2 KiB
Bash
#!/usr/bin/env bash
|
|
SEP=$'\t'
|
|
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
|
|
|
|
URL_SEARCH="${URL_SEARCH:-https://www.nyaa.se/}"
|
|
URL_DOWNLOAD="${URL_DOWNLOAD:-https://www.nyaa.se/?page=download&tid=}"
|
|
|
|
# all timestamps are given in seconds since the epoch
|
|
declare -A searchquery
|
|
declare -A searchregex
|
|
|
|
die() {
|
|
echo -E "$@" >&2
|
|
exit 1
|
|
}
|
|
|
|
retrieve() {
|
|
${curl[@]} -G --data-urlencode "term=[$1]" -d page=rss "$URL_SEARCH"
|
|
}
|
|
|
|
nullcheck() { # {query}
|
|
[[ -n "$1" ]] || die "Null search query";
|
|
}
|
|
|
|
sanitize() {
|
|
sed -e 's/[^0-9a-zA-Z_]/_/g'
|
|
}
|
|
|
|
splittags() { # {tag}
|
|
awk -v tag="$1" -f "$SRCDIR/splittags.awk"
|
|
}
|
|
|
|
scrape() {
|
|
TZ=UTC0 awk -v sep="$SEP" -f "$SRCDIR/scrape.awk"
|
|
}
|
|
|
|
watch() { # {group name} [regex...]
|
|
declare -a regexes
|
|
local query="$1"
|
|
shift
|
|
for regex; do
|
|
regexes+=("^\[$query\].*$regex")
|
|
done
|
|
watchany "$query" "${regexes[@]}"
|
|
}
|
|
|
|
watchany() { # {search query} [regex...]
|
|
nullcheck "$1"
|
|
local gs="$(sanitize<<<"$1")" regex=
|
|
searchquery[$gs]="$1"
|
|
shift
|
|
for regex; do
|
|
searchregex[$gs]+="|($regex)"
|
|
done
|
|
}
|
|
|
|
search() {
|
|
nullcheck "$1"
|
|
retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape
|
|
[ ${PIPESTATUS[0]} = 0 ] || die "Failed to search for $1"
|
|
}
|
|
|
|
searchfilter() { # database regex [timestamp]
|
|
while read -r; do
|
|
IFS=$SEP read -r time tid title <<< "$REPLY"
|
|
[ "$time" -gt "${3:-0}" ] \
|
|
&& grep -qP "$2" <<< "$title" \
|
|
&& echo -E "$REPLY"
|
|
done < "$1"
|
|
}
|
|
|
|
searchfilter_fast() { # {database} {regex}
|
|
declare -A matched
|
|
while read -r; do
|
|
matched["$REPLY"]=1
|
|
done < <(cut -f3- "$1" | grep -nP "$2" | grep -Eo '^[^:]+')
|
|
n=0
|
|
while read -r; do
|
|
((n++))
|
|
[ "${matched[$n]:-0}" -eq 1 ] && echo "$REPLY"
|
|
done < "$1"
|
|
}
|
|
|
|
runfilter() { # {action} [database]
|
|
declare -A already
|
|
local action="${1:-echo}"
|
|
local mark="$action.txt"
|
|
local db="${2:-db.txt}"
|
|
local ret=0
|
|
|
|
touch "$mark"
|
|
while IFS=$SEP read -r tid time; do
|
|
already["$tid"]="$time"
|
|
done < "$mark"
|
|
|
|
now="$(date +%s)"
|
|
while IFS=$SEP read -r time tid title; do
|
|
[ -n "${already[$tid]}" ] || {
|
|
$action $time $tid "$title" && already[$tid]="$now"
|
|
} || {
|
|
echo "[meow.sh] failed to run $action" >&2
|
|
echo "[meow.sh] torrent title: $title" >&2
|
|
echo "[meow.sh] torrent id: $tid" >&2
|
|
ret=1
|
|
break
|
|
}
|
|
done < <(for regex in "${searchregex[@]}"; do
|
|
searchfilter_fast "$db" "${regex:1}"
|
|
done)
|
|
|
|
rm "$mark"
|
|
for tid in "${!already[@]}"; do
|
|
echo "$tid$SEP${already[$tid]}" >> "$mark"
|
|
done
|
|
|
|
return "$ret"
|
|
}
|
|
|
|
runsearch() { # [database]
|
|
local db="${1:-db.txt}"
|
|
local tmp=`mktemp`
|
|
touch "$db"
|
|
|
|
for q in "${!searchquery[@]}"; do
|
|
search "${searchquery[$q]}" \
|
|
| while IFS=$SEP read -r title torrent time; do
|
|
local tid="${torrent##*=}"
|
|
echo -E "$time$SEP$tid$SEP$title"
|
|
done
|
|
done | sort -n -- "$db" - | uniq > $tmp
|
|
|
|
fs_old="$(du -b "$db" | cut -f1)"
|
|
fs_new="$(du -b $tmp | cut -f1)"
|
|
[ "$fs_new" -ge "$fs_old" ] || die "new database is smaller than current!"
|
|
mv $tmp "$db"
|
|
}
|