#!/bin/env bash PATH=/bin:/sbin:/usr/bin:/usr/sbin:/usr/local/bin:/usr/local/sbin export PATH clear Url="http://mirrors.cnnic.cn/apache/" DownListFile="/tmp/downlist.txt" DownListTmpFile="/tmp/tmplist.txt" DownFileType="zip$|gz$" DownList="" UrlBack="$Url" [ ! -f $DownListFile ] && touch $DownListFile || echo > $DownListFile [ ! -f $DownListTmpFile ] && touch $DownListTmpFile || echo > $DownListTmpFile CURL_URLS(){ Urls=`curl $UrlBack |awk -F "a href="" ‘{printf "%sn",$2}‘|awk -F """ ‘{printf "%sn",$1}‘|grep -vE "^$|^?|^http://"|^#` } URL_LIST(){ CURL_URLS for i in $Urls ;do echo "$UrlBack$i" >> $DownListTmpFile done } RECURSIVE_SEARCH_URL(){ UrlBackTmps=`cat $DownListTmpFile` [[ "$UrlBackTmps" == "" ]] && echo "no more page for search" && exit 1 for j in $UrlBackTmps ;do if [[ "${j##*/}" != "" ]] ;then echo "$j" >> $DownListFile else UrlBack="$j" URL_LIST fi UrlTmps=`grep -vE "$j$" $DownListTmpFile` echo "$UrlTmps" > $DownListTmpFile RECURSIVE_SEARCH_URL done } DOWNLOAD_FILE(){ DownList=`grep -E "$DownFileType" $DownListFile` for k in $DownList ;do FilePath=/tmp/${k#*//} [ ! -d `dirname $FilePath` ] && mkdir -p `dirname $FilePath` [ ! -f $FilePath ] && cd `dirname $FilePath` && curl -O $k done } URL_LIST $Urls RECURSIVE_SEARCH_URL
本文章来至源码世界 http://www.ymsky.net/views/1343.shtml
CURL版递归爬虫下载软件脚本,布布扣,bubuko.com
原文:http://blog.csdn.net/long7181226/article/details/22307979