From c291e986b74422ce0a5fbb01e7f59413f58b286e Mon Sep 17 00:00:00 2001 From: Connor Olding Date: Sat, 25 May 2013 19:15:43 -0700 Subject: [PATCH] i do too much before my first commit --- config.sh | 4 +++ nyaa.sh | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++ run | 11 +++++++ scrape.awk | 39 ++++++++++++++++++++++++ splittags.awk | 13 ++++++++ 5 files changed, 151 insertions(+) create mode 100644 config.sh create mode 100644 nyaa.sh create mode 100755 run create mode 100644 scrape.awk create mode 100644 splittags.awk diff --git a/config.sh b/config.sh new file mode 100644 index 0000000..5062b5f --- /dev/null +++ b/config.sh @@ -0,0 +1,4 @@ +watch Commie 'Yuyushiki - \d\d' 'Aiura - \d\d' +watch rori 'Hentai Ouji to Warawanai Neko - ' +watch FFF 'DATE A LIVE - \d\d(?!v0)' +#watch 'Nimei-Raws' '.' diff --git a/nyaa.sh b/nyaa.sh new file mode 100644 index 0000000..64162b1 --- /dev/null +++ b/nyaa.sh @@ -0,0 +1,84 @@ +#!/usr/bin/env bash +SEP=$'\1' + +die() { + echo -E "$@" 1>&2 + exit 1 +} + +nullcheck() { + [[ -n "$1" ]] || die "Null group name"; +} + +sanitize() { + sed -e 's/[^0-9a-zA-Z_]/_/g' +} + +splittags() { + awk -v tag="$1" -f splittags.awk +} + +scrape() { + TZ=UTC0 awk -v g="$1" -v timestamp="${2:-0}" -v sep="$SEP" -f scrape.awk +} + +declare -A groupinsane # unsanitized group names +declare -A groupshows # regexes +watch() { # {group name} [regex...] + nullcheck "$1" + local gs="$(sanitize<<<"$1")" + groupinsane[$gs]="$1" + shift + while (( "$#" )); do + groupshows[$gs]+="|($1)" + shift + done +} + +declare -A grouptimes # last times timestamp +touchgroup() { # {group name} {unix time} + nullcheck "$1" + local gs="$(sanitize<<<"$1")" + grouptimes[$gs]="$2" +} + +groupreleases() { # groupname [timestamp] + nullcheck "$1" + # TODO: escapeurl $1 + local URL="http://www.nyaa.eu/?page=search&term=%5B$1%5D&page=rss" + curl -LsS "$URL" > "$1.xml" || die "Failed to retrieve releases for $1" + tr -d '\r\n'"$SEP" < "$1.xml" | splittags item | scrape "$1" "${2:-}" +} + +groupfilter() { # groupname regex [timestamp] + groupreleases "$1" "${3:-}" | while IFS=$SEP read -r title torrent; do + grep -P "$2" <<< "$title" 1>/dev/null && echo "$title$SEP$torrent" + done +} + +cleanup() { + for gs in "${!grouptimes[@]}"; do + local v="${grouptimes[$gs]}" + echo "touchgroup $gs $v" >> times.sh + [ -e "$gs.xml" ] && rm "$gs.xml" + done + exit 0 +} + +# TODO: optionally buffer lists so interrupting and restarting wont give the same output + +runall() { + trap cleanup INT + + local insane regex timestamp now + for gs in "${!groupshows[@]}"; do + insane="${groupinsane[$gs]}" + regex="${groupshows[$gs]:1}" + timestamp="${grouptimes[$gs]}" + now="$(date -u '+%s')" + groupfilter "$insane" "$regex" "$timestamp" + touchgroup "$gs" "$now" + done + + cleanup +} diff --git a/run b/run new file mode 100755 index 0000000..5f54c0c --- /dev/null +++ b/run @@ -0,0 +1,11 @@ +#!/usr/bin/env bash +prettify() { + while IFS=$SEP read -r title torrent time; do + echo "$title"$'\n'"dl:"$'\t'"$torrent"$'\n'"at:"$'\t'"$(date -d @"$time")" + done +} + +. nyaa.sh +. config.sh +[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; } +runall | prettify diff --git a/scrape.awk b/scrape.awk new file mode 100644 index 0000000..bc4c0f1 --- /dev/null +++ b/scrape.awk @@ -0,0 +1,39 @@ +function opentag(str, tag) { + topen="<"tag">" + tclose="" + len=length(tag) + begin=index(str, topen) + end=index(str, tclose) + return (begin && end) ? substr(str, begin+len+2, end-begin-len-2) : "" +} + +function unescape(str) { + while (match(str, /&#([0-9]+);/, b)) + str=substr(str, 1, RSTART-1) sprintf("%c",b[1]) substr(str, RSTART+RLENGTH) + return str +} + +function hotdate(str) { + split(str, d, "[ :]") + return mktime(d[4]" "months[d[3]]" "d[2]" "d[5]" "d[6]" "d[7]) +} + +BEGIN{ + # http://stackoverflow.com/a/2123002 + m=split("Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec",d,"|") + for(o=1;o<=m;o++) months[d[o]]=sprintf("%02d",o) + glen=length(g) +} + +function run(str) { + title=unescape(opentag(str, "title")) + if (substr(title,1,glen+2) != "["g"]") return + pub=unescape(opentag($0, "pubDate")) +# "date -d \""pub "\" +%s" | getline pubunix + pubunix=hotdate(pub) + if (pubunix <= timestamp) return + torrent=unescape(opentag(str, "link")) + print title sep torrent sep pubunix +} + +{run($0)} diff --git a/splittags.awk b/splittags.awk new file mode 100644 index 0000000..689f54a --- /dev/null +++ b/splittags.awk @@ -0,0 +1,13 @@ +{ + str=$0 + topen="<"tag">" + tclose="" + len=length(tag) + for (;;) { + begin=index(str, topen) + end=index(str,tclose) + if (!(begin || end)) break + print substr(str, begin+len+2, end-begin-len-2) + str=substr(str, end+len+3) + } +}