split searching from filtering, log searches
This commit is contained in:
parent
027a8db2e1
commit
4fd2dfcee6
75
meow.sh
75
meow.sh
|
@ -1,11 +1,11 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
SEP=$'\1'
|
SEP=$'\t'
|
||||||
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
|
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
|
||||||
|
|
||||||
# all timestamps are given in seconds since the epoch
|
# all timestamps are given in seconds since the epoch
|
||||||
declare -A groupinsane # unsanitized group names
|
declare -A searchquery
|
||||||
declare -A groupregex
|
declare -A searchregex
|
||||||
declare -A grouptime # last seen release
|
declare -A searchtime # last seen release
|
||||||
|
|
||||||
die() {
|
die() {
|
||||||
echo -E "$@" >&2
|
echo -E "$@" >&2
|
||||||
|
@ -17,8 +17,8 @@ retrieve() {
|
||||||
"http://www.nyaa.se/"
|
"http://www.nyaa.se/"
|
||||||
}
|
}
|
||||||
|
|
||||||
nullcheck() { # {group name}
|
nullcheck() { # {query}
|
||||||
[[ -n "$1" ]] || die "Null group name";
|
[[ -n "$1" ]] || die "Null search query";
|
||||||
}
|
}
|
||||||
|
|
||||||
sanitize() {
|
sanitize() {
|
||||||
|
@ -29,67 +29,82 @@ splittags() { # {tag}
|
||||||
awk -v tag="$1" -f "$SRCDIR/splittags.awk"
|
awk -v tag="$1" -f "$SRCDIR/splittags.awk"
|
||||||
}
|
}
|
||||||
|
|
||||||
scrape() { # {group name} {timestamp}
|
scrape() {
|
||||||
TZ=UTC0 awk -v g="$1" -v ts="${2:-0}" -v sep="$SEP" -f "$SRCDIR/scrape.awk"
|
TZ=UTC0 awk -v sep="$SEP" -f "$SRCDIR/scrape.awk"
|
||||||
}
|
}
|
||||||
|
|
||||||
watch() { # {group name} [regex...]
|
watch() { # {search query} [regex...]
|
||||||
nullcheck "$1"
|
nullcheck "$1"
|
||||||
local gs="$(sanitize<<<"$1")" regex=
|
local gs="$(sanitize<<<"$1")" regex=
|
||||||
groupinsane[$gs]="$1"
|
searchquery[$gs]="$1"
|
||||||
shift
|
shift
|
||||||
for regex; do
|
for regex; do
|
||||||
groupregex[$gs]+="|($regex)"
|
searchregex[$gs]+="|($regex)"
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
touchgroup() { # {group name} {timestamp}
|
touchquery() { # {search query} {timestamp}
|
||||||
nullcheck "$1"
|
nullcheck "$1"
|
||||||
local gs="$(sanitize<<<"$1")"
|
local gs="$(sanitize<<<"$1")"
|
||||||
grouptime[$gs]="$2"
|
searchtime[$gs]="$2"
|
||||||
}
|
}
|
||||||
|
|
||||||
groupreleases() { # groupname [timestamp]
|
search() {
|
||||||
nullcheck "$1"
|
nullcheck "$1"
|
||||||
retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape "$1" "${2:-}"
|
retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape
|
||||||
[ ${PIPESTATUS[0]} = 0 ] || die "Failed to retrieve releases for $1"
|
[ ${PIPESTATUS[0]} = 0 ] || die "Failed to search for $1"
|
||||||
}
|
}
|
||||||
|
|
||||||
groupfilter() { # groupname regex [timestamp]
|
searchfilter() { # key regex [timestamp]
|
||||||
groupreleases "$1" "${3:-}" | while IFS=$SEP read -r title etc; do
|
while IFS=$SEP read -r title etc; do
|
||||||
grep -P "$2" <<< "$title" >/dev/null && echo -E "$title$SEP$etc"
|
grep -P "$2" <<< "$title" >/dev/null && echo -E "$title$SEP$etc"
|
||||||
done
|
done < db.txt
|
||||||
[ ${PIPESTATUS[0]} = 0 ] || exit 1
|
[ ${PIPESTATUS[0]} = 0 ] || exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
cleanup() {
|
cleanup() {
|
||||||
local gs= v=
|
local gs= v=
|
||||||
for gs in "${!grouptime[@]}"; do
|
for gs in "${!searchtime[@]}"; do
|
||||||
v="${grouptime[$gs]}"
|
v="${searchtime[$gs]}"
|
||||||
echo -E "touchgroup $gs $v" >> times.sh
|
echo -E "touchquery $gs $v" >> times.sh
|
||||||
[ -e "$gs.xml" ] && rm "$gs.xml"
|
[ -e "$gs.xml" ] && rm "$gs.xml"
|
||||||
done
|
done
|
||||||
exit ${1:-1}
|
exit ${1:-1}
|
||||||
}
|
}
|
||||||
|
|
||||||
rungroup() {
|
runfilter() {
|
||||||
local insane= regex= timestamp= res= _= recent=
|
local query= regex= timestamp= res= _= recent=
|
||||||
insane="${groupinsane[$1]}"
|
query="${searchquery[$1]}"
|
||||||
regex="${groupregex[$1]:1}"
|
regex="${searchregex[$1]:1}" # exclude first | character
|
||||||
timestamp="${grouptime[$1]}"
|
timestamp="${searchtime[$1]}"
|
||||||
res="$(groupfilter "$insane" "$regex" "$timestamp")"
|
res="$(searchfilter "$query" "$regex" "$timestamp")"
|
||||||
[ $? = 0 ] || return $?
|
[ $? = 0 ] || return $?
|
||||||
IFS=$SEP read -r _ _ recent <<< "$res"
|
IFS=$SEP read -r _ _ recent <<< "$res"
|
||||||
[ -n "$recent" ] && {
|
[ -n "$recent" ] && {
|
||||||
grouptime[$1]="$recent"
|
searchtime[$1]="$recent"
|
||||||
echo -E "$res"
|
echo -E "$res"
|
||||||
}
|
}
|
||||||
return 0
|
return 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
runsearch() { # [database]
|
||||||
|
local db="${1:-db.txt}"
|
||||||
|
local tmp=`mktemp`
|
||||||
|
touch "$db"
|
||||||
|
for q in "${!searchquery[@]}"; do
|
||||||
|
search "${searchquery[$q]}" \
|
||||||
|
| while IFS=$SEP read -r title torrent time; do
|
||||||
|
echo -E "$time$SEP$q$SEP$title$SEP$torrent"
|
||||||
|
done
|
||||||
|
done | sort -n -- "$db" - | uniq > $tmp
|
||||||
|
# TODO: don't accidentally overwrite $db with something blank/incomplete
|
||||||
|
# maybe check if filesize has decreased and die if so
|
||||||
|
mv $tmp "$db"
|
||||||
|
}
|
||||||
|
|
||||||
runall() {
|
runall() {
|
||||||
trap cleanup INT
|
trap cleanup INT
|
||||||
local ret=0 gs=
|
local ret=0 gs=
|
||||||
for gs in "${!groupregex[@]}"; do rungroup "$gs" || ret=1; done
|
for gs in "${!searchregex[@]}"; do runfilter "$gs" || ret=1; done
|
||||||
cleanup $ret
|
cleanup $ret
|
||||||
}
|
}
|
||||||
|
|
11
run
11
run
|
@ -36,6 +36,11 @@ runactions() {
|
||||||
|
|
||||||
. "$SRCDIR/meow.sh"
|
. "$SRCDIR/meow.sh"
|
||||||
. config.sh
|
. config.sh
|
||||||
[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; }
|
|
||||||
runall | runactions
|
#[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; }
|
||||||
exit ${PIPESTATUS[0]}
|
#runall | runactions
|
||||||
|
#exit ${PIPESTATUS[0]}
|
||||||
|
|
||||||
|
runsearch
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|
|
@ -22,16 +22,13 @@ BEGIN{
|
||||||
# http://stackoverflow.com/a/2123002
|
# http://stackoverflow.com/a/2123002
|
||||||
m=split("Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec",d,"|")
|
m=split("Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec",d,"|")
|
||||||
for(o=1;o<=m;o++) months[d[o]]=sprintf("%02d",o)
|
for(o=1;o<=m;o++) months[d[o]]=sprintf("%02d",o)
|
||||||
glen=length(g)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function run(str) {
|
function run(str) {
|
||||||
title=unescape(opentag(str, "title"))
|
title=unescape(opentag(str, "title"))
|
||||||
if (substr(title,1,glen+2) != "["g"]") return
|
pub=unescape(opentag(str, "pubDate"))
|
||||||
pub=unescape(opentag($0, "pubDate"))
|
|
||||||
# "date -d \""pub "\" +%s" | getline pubunix
|
# "date -d \""pub "\" +%s" | getline pubunix
|
||||||
pubunix=hotdate(pub)
|
pubunix=hotdate(pub)
|
||||||
if (pubunix <= ts) return
|
|
||||||
torrent=unescape(opentag(str, "link"))
|
torrent=unescape(opentag(str, "link"))
|
||||||
print title sep torrent sep pubunix
|
print title sep torrent sep pubunix
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user