From c291e986b74422ce0a5fbb01e7f59413f58b286e Mon Sep 17 00:00:00 2001
From: Connor Olding <cloningdonor@gmail.com>
Date: Sat, 25 May 2013 19:15:43 -0700
Subject: [PATCH] i do too much before my first commit

---
 config.sh     |  4 +++
 nyaa.sh       | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++
 run           | 11 +++++++
 scrape.awk    | 39 ++++++++++++++++++++++++
 splittags.awk | 13 ++++++++
 5 files changed, 151 insertions(+)
 create mode 100644 config.sh
 create mode 100644 nyaa.sh
 create mode 100755 run
 create mode 100644 scrape.awk
 create mode 100644 splittags.awk

diff --git a/config.sh b/config.sh
new file mode 100644
index 0000000..5062b5f
--- /dev/null
+++ b/config.sh
@@ -0,0 +1,4 @@
+watch Commie 'Yuyushiki - \d\d' 'Aiura - \d\d'
+watch rori 'Hentai Ouji to Warawanai Neko - '
+watch FFF 'DATE A LIVE - \d\d(?!v0)'
+#watch 'Nimei-Raws' '.'
diff --git a/nyaa.sh b/nyaa.sh
new file mode 100644
index 0000000..64162b1
--- /dev/null
+++ b/nyaa.sh
@@ -0,0 +1,84 @@
+#!/usr/bin/env bash
+SEP=$'\1'
+
+die() {
+  echo -E "$@" 1>&2
+  exit 1
+}
+
+nullcheck() {
+  [[ -n "$1" ]] || die "Null group name";
+}
+
+sanitize() {
+  sed -e 's/[^0-9a-zA-Z_]/_/g'
+}
+
+splittags() {
+  awk -v tag="$1" -f splittags.awk
+}
+
+scrape() {
+  TZ=UTC0 awk -v g="$1" -v timestamp="${2:-0}" -v sep="$SEP" -f scrape.awk
+}
+
+declare -A groupinsane # unsanitized group names
+declare -A groupshows # regexes
+watch() { # {group name} [regex...]
+  nullcheck "$1"
+  local gs="$(sanitize<<<"$1")"
+  groupinsane[$gs]="$1"
+  shift
+  while (( "$#" )); do
+    groupshows[$gs]+="|($1)"
+    shift
+  done
+}
+
+declare -A grouptimes # last times timestamp
+touchgroup() { # {group name} {unix time}
+  nullcheck "$1"
+  local gs="$(sanitize<<<"$1")"
+  grouptimes[$gs]="$2"
+}
+
+groupreleases() { # groupname [timestamp]
+  nullcheck "$1"
+  # TODO: escapeurl $1
+  local URL="http://www.nyaa.eu/?page=search&term=%5B$1%5D&page=rss"
+  curl -LsS "$URL" > "$1.xml" || die "Failed to retrieve releases for $1"
+  tr -d '\r\n'"$SEP" < "$1.xml" | splittags item | scrape "$1" "${2:-}"
+}
+
+groupfilter() { # groupname regex [timestamp]
+  groupreleases "$1" "${3:-}" | while IFS=$SEP read -r title torrent; do
+    grep -P "$2" <<< "$title" 1>/dev/null && echo "$title$SEP$torrent"
+  done
+}
+
+cleanup() {
+  for gs in "${!grouptimes[@]}"; do
+    local v="${grouptimes[$gs]}"
+    echo "touchgroup $gs $v" >> times.sh
+    [ -e "$gs.xml" ] && rm "$gs.xml"
+  done
+  exit 0
+}
+
+# TODO: optionally buffer lists so interrupting and restarting wont give the same output
+
+runall() {
+  trap cleanup INT
+
+  local insane regex timestamp now
+  for gs in "${!groupshows[@]}"; do
+    insane="${groupinsane[$gs]}"
+    regex="${groupshows[$gs]:1}"
+    timestamp="${grouptimes[$gs]}"
+    now="$(date -u '+%s')"
+    groupfilter "$insane" "$regex" "$timestamp"
+    touchgroup "$gs" "$now"
+  done
+
+  cleanup
+}
diff --git a/run b/run
new file mode 100755
index 0000000..5f54c0c
--- /dev/null
+++ b/run
@@ -0,0 +1,11 @@
+#!/usr/bin/env bash
+prettify() {
+  while IFS=$SEP read -r title torrent time; do
+      echo "$title"$'\n'"dl:"$'\t'"$torrent"$'\n'"at:"$'\t'"$(date -d @"$time")"
+  done
+}
+
+. nyaa.sh
+. config.sh
+[ -e times.sh ] && { . times.sh; mv times.sh times.sh.old; }
+runall | prettify
diff --git a/scrape.awk b/scrape.awk
new file mode 100644
index 0000000..bc4c0f1
--- /dev/null
+++ b/scrape.awk
@@ -0,0 +1,39 @@
+function opentag(str, tag) {
+  topen="<"tag">"
+  tclose="</"tag">"
+  len=length(tag)
+  begin=index(str, topen)
+  end=index(str, tclose)
+  return (begin && end) ? substr(str, begin+len+2, end-begin-len-2) : ""
+}
+
+function unescape(str) {
+  while (match(str, /&#([0-9]+);/, b))
+    str=substr(str, 1, RSTART-1) sprintf("%c",b[1]) substr(str, RSTART+RLENGTH)
+  return str
+}
+
+function hotdate(str) {
+  split(str, d, "[ :]")
+  return mktime(d[4]" "months[d[3]]" "d[2]" "d[5]" "d[6]" "d[7])
+}
+
+BEGIN{
+  # http://stackoverflow.com/a/2123002
+  m=split("Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec",d,"|")
+  for(o=1;o<=m;o++) months[d[o]]=sprintf("%02d",o)
+  glen=length(g)
+}
+
+function run(str) {
+  title=unescape(opentag(str, "title"))
+  if (substr(title,1,glen+2) != "["g"]") return
+  pub=unescape(opentag($0, "pubDate"))
+# "date -d \""pub "\" +%s" | getline pubunix
+  pubunix=hotdate(pub)
+  if (pubunix <= timestamp) return
+  torrent=unescape(opentag(str, "link"))
+  print title sep torrent sep pubunix
+}
+
+{run($0)}
diff --git a/splittags.awk b/splittags.awk
new file mode 100644
index 0000000..689f54a
--- /dev/null
+++ b/splittags.awk
@@ -0,0 +1,13 @@
+{
+  str=$0
+  topen="<"tag">"
+  tclose="</"tag">"
+  len=length(tag)
+  for (;;) {
+    begin=index(str, topen)
+    end=index(str,tclose)
+    if (!(begin || end)) break
+    print substr(str, begin+len+2, end-begin-len-2)
+    str=substr(str, end+len+3)
+  }
+}