#!/bin/bash # downloads files from fileplanet by a span of numeric IDs # USAGE: # $ bash download_pages_and_files_from_fileplanet.sh 1 123 # would try to download all files and their download pages # with the IDs 1 to 123 # Files will be downloaded to ./www.fileplanet.com/NUMERICID/download/ # Random thoughts: # we can go with http://www.fileplanet.com/NUMERICID/download/ # For the numeric ID we can use both 012345 OR 12345 formats # We will be using the one without the leading zeros, since that is how Fileplanet links internally. echo "You will be downloading $1 to $2, you rock!" echo "Let's go!" mkdir logs mkdir "$1-$2" cd $1-$2/ for i in $(seq $1 $2) do echo "Trying to download $i" downloadpageurl="www.fileplanet.com/${i}/download/" # fileplanet returns a "302 Found" for non-existing IDs # redirecting to "Location: /error/error.shtml?aspxerrorpath=/autodownload.aspx # we don't want those files, so "--max-redirect=0" wget -nv -a pages_$1_$2.log --force-directories --max-redirect=0 http://${downloadpageurl} # extract the session download link to the actual file we want # the URL is enclosed by single quotes. The second grep will get everything from http until the last '. The rev/cut will remove the trailing '. linktowget=$(grep default-file-download-link ${downloadpageurl}index.html 2>/dev/null | grep -Eo "http.*'" | rev | cut -c 2- | rev) if [ ! -n "${linktowget}" ]; then echo "No download link found." else echo "Download link found, downloading ${linktowget}" # download the file to the same directory as its download page HTML wget -nv -a files_$1_$2.log --directory-prefix=${downloadpageurl} --referer="${downloadpageurl}" "${linktowget}" || echo "ERROR! If you see more than one, please Ctrl-C, check the files log and tell Schbirid!" fi echo "-----" done echo "Downloading finished! Yay!" echo -n "Counting files: " numberoffiles=$(ls -1 www.fileplanet.com/ | wc -l) echo $numberoffiles echo -n "Getting the size: " sizeofchunk=$(du -hs www.fileplanet.com/ | sed 's/\twww.*//') echo $sizeofchunk cd .. # just a handy local backup cp $1-$2/*.log logs/ echo "TARring!" tar -cf $1-$2.tar $1-$2/ && echo "TARring was a success. Now removing the directory." && rm -r $1-$2/ grep "ERROR" logs/*$1_$2.log || echo "Done. YAAAY!" echo "Here is copy'n'pastable meta for the wiki:" echo "|-" echo "| $1-$2" echo "| Done, locally" echo "| $numberoffiles" echo "| $sizeofchunk" echo "| insert nick name here" echo "|-" echo "And here is the s3cmd line:" echo "s3cmd --add-header x-archive-auto-make-bucket:1 --add-header \"x-archive-meta-description:Files from Fileplanet (www.fileplanet.com), all files from the ID range $1 to $2.\" put logs/*_$1_$2.log $1-$2.tar s3://FileplanetFiles_$1-$2/"