Merge remote-tracking branch 'danbooru_scrape/master'

This commit is contained in:
Connor Olding 2018-10-11 16:45:30 +02:00
commit 52ee141c09
2 changed files with 84 additions and 0 deletions

44
boorufind Executable file
View File

@ -0,0 +1,44 @@
#!/bin/zsh
q=$1
url='http://danbooru.donmai.us/posts.xml'
de=--data-urlencode
curl=(curl -sS -m 32 --connect-timeout 8 --retry 3 --retry-delay 1)
die() {
echo -E "$@" >&2
exit 1
}
push() {
if [ -z $md5 ] || [ -z $ext ] || [ -z $size ]; then
print missing value >&2
return
fi
local fn=$md5.$ext
local s="$(find . -name $fn -print -quit 2>/dev/null)"
[ -n "$s" ] && print $s || print \#$fn
md5= ext= size=
}
page=1
while; do
once=0
${curl[@]} -G -d limit=100 -d page=$page $de tags=$q $url \
| xml2 2>/dev/null \
| while IFS== read -r tree val; do
[ $tree = /posts/post ] && push && continue
[ ${tree:0:12} = /posts/post/ ] && tree=${tree:12} || continue
once=1
case $tree in
(md5) md5=$val ;;
(file-ext) ext=$val ;;
(file-size) size=$val ;;
esac
done
[[ ${PIPESTATUS[1]} -eq 0 ]] || die "curl failed on page $page"
[[ $once -eq 0 ]] && exit 0 # no posts left, well done
push
let page++
done

40
boorugrab Executable file
View File

@ -0,0 +1,40 @@
#!/bin/zsh
dir=$1
url='http://danbooru.donmai.us/data/'
curl=(curl -sS -m 900 --connect-timeout 8 --retry 3 --retry-delay 1)
cleanup() {
[ -e $dir/$md5e ] && {
rm -f $dir/$md5e
echo "\e[F\e[K$md5e - canceled"
}
exit 1
}
trap cleanup INT
mkdir -p $dir
while read -r; do
if [[ ${REPLY[1]} = "#" ]]; then
md5e=${REPLY:1}
echo "$md5e - downloading..."
${curl[@]} $url$md5e > $dir/$md5e || {
rm -f $dir/$md5e
echo "\e[F\e[K$md5e - failed"
}
echo "\e[F\e[K$md5e - downloaded!"
else
md5e=${REPLY##*/}
[ $REPLY -ef $dir/$md5e ] && {
echo "$md5e - skipping (ef)"
continue
}
[ -s $dir/$md5e ] && {
echo "$md5e - skipping (s)"
continue
}
echo "$md5e - copying..."
cp $REPLY $dir
echo "\e[F\e[K$md5e - copied!"
fi
done