2012-08-19 01:43:15 -07:00
|
|
|
#!/bin/bash
|
2012-08-19 02:48:06 -07:00
|
|
|
set -o nounset
|
|
|
|
|
2012-08-19 04:58:01 -07:00
|
|
|
# danny wants $20 to search for more than 2 tags
|
|
|
|
# so we'll search for realtags and grep for faketags
|
|
|
|
# note that special tags like "score:>10" must be the first or second
|
|
|
|
realtags="$1"
|
|
|
|
faketags=${2:-}
|
2012-08-19 01:43:15 -07:00
|
|
|
|
|
|
|
ip="67.202.114.134" # danbooru.donmai.us
|
2012-08-19 04:58:01 -07:00
|
|
|
webpage="post/index?tags=$realtags&limit=100&page="
|
2012-08-19 01:43:15 -07:00
|
|
|
pages=0
|
|
|
|
tempfile=`mktemp`
|
2012-08-19 02:48:06 -07:00
|
|
|
current=
|
|
|
|
|
|
|
|
term()
|
|
|
|
{
|
|
|
|
rm $current # don't leave it incomplete so it may be redownloaded
|
|
|
|
exit 1
|
|
|
|
}
|
|
|
|
trap 'term' TERM INT
|
2012-08-19 01:43:15 -07:00
|
|
|
|
2012-08-19 02:48:06 -07:00
|
|
|
get='wget -q -erobots=off -O-' # mimic curl
|
|
|
|
#get='curl'
|
2012-08-19 01:43:15 -07:00
|
|
|
|
|
|
|
page=1
|
|
|
|
while true; do
|
|
|
|
$get "http://$ip/${webpage}${page}" > "$tempfile"
|
|
|
|
|
2012-08-19 04:58:01 -07:00
|
|
|
posts=$(grep 'Post\.register({' "$tempfile")
|
|
|
|
IFS=$'\n'
|
|
|
|
for post in $posts; do
|
|
|
|
IFS=' '
|
|
|
|
tags=$(echo $post | grep -oP '(?<=tags":")([^"]+)')
|
|
|
|
if [ -z "$tags" ]; then continue; fi
|
|
|
|
|
|
|
|
nomatch=0
|
|
|
|
for faketag in $faketags; do
|
|
|
|
unwanted=0
|
|
|
|
if [[ $faketag == -* ]]; then
|
|
|
|
faketag=${faketag:1}
|
|
|
|
unwanted=1
|
|
|
|
fi
|
|
|
|
echo $tags | grep -F -- "$faketag" > /dev/null
|
|
|
|
result=$?
|
|
|
|
if [[ $result != $unwanted ]]; then
|
|
|
|
nomatch=1
|
|
|
|
break
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
if (($nomatch)); then continue; fi
|
|
|
|
|
|
|
|
url=$(echo "$post" | grep -oP '(?<=file_url":")([^"]+)')
|
|
|
|
if [ -z "$url" ]; then continue; fi
|
2012-08-19 01:43:15 -07:00
|
|
|
name=$(echo "$url" | cut -d/ -f5)
|
2012-08-19 02:48:06 -07:00
|
|
|
current="$name"
|
2012-08-19 04:58:01 -07:00
|
|
|
if [ -n "$name" ]; then
|
|
|
|
echo $name
|
|
|
|
$get "$url" > $name
|
2012-08-19 01:43:15 -07:00
|
|
|
fi
|
|
|
|
done
|
2012-08-19 04:58:01 -07:00
|
|
|
IFS=' '
|
2012-08-19 01:43:15 -07:00
|
|
|
|
|
|
|
if (("$pages" == "0")); then
|
|
|
|
# first iteration, discover pagecount
|
|
|
|
pages=$(grep -oPm1 \
|
|
|
|
'(?<=>)\d+(?=</a> <a href="/post/index[^"]+" >>>)' \
|
|
|
|
$tempfile)
|
|
|
|
pages=${pages:-1}
|
|
|
|
fi
|
|
|
|
|
|
|
|
let page++
|
|
|
|
if (("$page" > "$pages")); then break; fi
|
|
|
|
done
|
|
|
|
|
|
|
|
rm $tempfile
|