split searching from filtering, log searches

This commit is contained in:
Connor Olding 2014-03-15 11:56:56 -07:00
parent 027a8db2e1
commit 4fd2dfcee6
3 changed files with 54 additions and 37 deletions

75
meow.sh
View File

@ -1,11 +1,11 @@
#!/usr/bin/env bash #!/usr/bin/env bash
SEP=$'\1' SEP=$'\t'
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1) curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
# all timestamps are given in seconds since the epoch # all timestamps are given in seconds since the epoch
declare -A groupinsane # unsanitized group names declare -A searchquery
declare -A groupregex declare -A searchregex
declare -A grouptime # last seen release declare -A searchtime # last seen release
die() { die() {
echo -E "$@" >&2 echo -E "$@" >&2
@ -17,8 +17,8 @@ retrieve() {
"http://www.nyaa.se/" "http://www.nyaa.se/"
} }
nullcheck() { # {group name} nullcheck() { # {query}
[[ -n "$1" ]] || die "Null group name"; [[ -n "$1" ]] || die "Null search query";
} }
sanitize() { sanitize() {
@ -29,67 +29,82 @@ splittags() { # {tag}
awk -v tag="$1" -f "$SRCDIR/splittags.awk" awk -v tag="$1" -f "$SRCDIR/splittags.awk"
} }
scrape() { # {group name} {timestamp} scrape() {
TZ=UTC0 awk -v g="$1" -v ts="${2:-0}" -v sep="$SEP" -f "$SRCDIR/scrape.awk" TZ=UTC0 awk -v sep="$SEP" -f "$SRCDIR/scrape.awk"
} }
watch() { # {group name} [regex...] watch() { # {search query} [regex...]
nullcheck "$1" nullcheck "$1"
local gs="$(sanitize<<<"$1")" regex= local gs="$(sanitize<<<"$1")" regex=
groupinsane[$gs]="$1" searchquery[$gs]="$1"
shift shift
for regex; do for regex; do
groupregex[$gs]+="|($regex)" searchregex[$gs]+="|($regex)"
done done
} }
touchgroup() { # {group name} {timestamp} touchquery() { # {search query} {timestamp}
nullcheck "$1" nullcheck "$1"
local gs="$(sanitize<<<"$1")" local gs="$(sanitize<<<"$1")"
grouptime[$gs]="$2" searchtime[$gs]="$2"
} }
groupreleases() { # groupname [timestamp] search() {
nullcheck "$1" nullcheck "$1"
retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape "$1" "${2:-}" retrieve "$1" | tr -d '\r\n'"$SEP" | splittags item | scrape
[ ${PIPESTATUS[0]} = 0 ] || die "Failed to retrieve releases for $1" [ ${PIPESTATUS[0]} = 0 ] || die "Failed to search for $1"
} }
groupfilter() { # groupname regex [timestamp] searchfilter() { # key regex [timestamp]
groupreleases "$1" "${3:-}" | while IFS=$SEP read -r title etc; do while IFS=$SEP read -r title etc; do
grep -P "$2" <<< "$title" >/dev/null && echo -E "$title$SEP$etc" grep -P "$2" <<< "$title" >/dev/null && echo -E "$title$SEP$etc"
done done < db.txt
[ ${PIPESTATUS[0]} = 0 ] || exit 1 [ ${PIPESTATUS[0]} = 0 ] || exit 1
} }
cleanup() { cleanup() {
local gs= v= local gs= v=
for gs in "${!grouptime[@]}"; do for gs in "${!searchtime[@]}"; do
v="${grouptime[$gs]}" v="${searchtime[$gs]}"
echo -E "touchgroup $gs $v" >> times.sh echo -E "touchquery $gs $v" >> times.sh
[ -e "$gs.xml" ] && rm "$gs.xml" [ -e "$gs.xml" ] && rm "$gs.xml"
done done
exit ${1:-1} exit ${1:-1}
} }
rungroup() { runfilter() {
local insane= regex= timestamp= res= _= recent= local query= regex= timestamp= res= _= recent=
insane="${groupinsane[$1]}" query="${searchquery[$1]}"
regex="${groupregex[$1]:1}" regex="${searchregex[$1]:1}" # exclude first | character
timestamp="${grouptime[$1]}" timestamp="${searchtime[$1]}"
res="$(groupfilter "$insane" "$regex" "$timestamp")" res="$(searchfilter "$query" "$regex" "$timestamp")"
[ $? = 0 ] || return $? [ $? = 0 ] || return $?
IFS=$SEP read -r _ _ recent <<< "$res" IFS=$SEP read -r _ _ recent <<< "$res"
[ -n "$recent" ] && { [ -n "$recent" ] && {
grouptime[$1]="$recent" searchtime[$1]="$recent"
echo -E "$res" echo -E "$res"
} }
return 0 return 0
} }
runsearch() { # [database]
local db="${1:-db.txt}"
local tmp=`mktemp`
touch "$db"
for q in "${!searchquery[@]}"; do
search "${searchquery[$q]}" \
| while IFS=$SEP read -r title torrent time; do
echo -E "$time$SEP$q$SEP$title$SEP$torrent"
done
done | sort -n -- "$db" - | uniq > $tmp
# TODO: don't accidentally overwrite $db with something blank/incomplete
# maybe check if filesize has decreased and die if so
mv $tmp "$db"
}
runall() { runall() {
trap cleanup INT trap cleanup INT
local ret=0 gs= local ret=0 gs=
for gs in "${!groupregex[@]}"; do rungroup "$gs" || ret=1; done for gs in "${!searchregex[@]}"; do runfilter "$gs" || ret=1; done
cleanup $ret cleanup $ret
} }

11
run
View File

@ -36,6 +36,11 @@ runactions() {
. "$SRCDIR/meow.sh" . "$SRCDIR/meow.sh"
. config.sh . config.sh
[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; }
runall | runactions #[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; }
exit ${PIPESTATUS[0]} #runall | runactions
#exit ${PIPESTATUS[0]}
runsearch
exit 0

View File

@ -22,16 +22,13 @@ BEGIN{
# http://stackoverflow.com/a/2123002 # http://stackoverflow.com/a/2123002
m=split("Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec",d,"|") m=split("Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec",d,"|")
for(o=1;o<=m;o++) months[d[o]]=sprintf("%02d",o) for(o=1;o<=m;o++) months[d[o]]=sprintf("%02d",o)
glen=length(g)
} }
function run(str) { function run(str) {
title=unescape(opentag(str, "title")) title=unescape(opentag(str, "title"))
if (substr(title,1,glen+2) != "["g"]") return pub=unescape(opentag(str, "pubDate"))
pub=unescape(opentag($0, "pubDate"))
# "date -d \""pub "\" +%s" | getline pubunix # "date -d \""pub "\" +%s" | getline pubunix
pubunix=hotdate(pub) pubunix=hotdate(pub)
if (pubunix <= ts) return
torrent=unescape(opentag(str, "link")) torrent=unescape(opentag(str, "link"))
print title sep torrent sep pubunix print title sep torrent sep pubunix
} }