第一版,比较粗糙,仅限于能用
正在写入的文件不能用tar进行压缩
--------压缩日志----------------------
94 access.log
95 tar: access.log: file changed as we read it
96 #### 压缩日志失败 ####
#!/bin/sh #分析nginx日志 DATE=`date ‘+%Y%m%d-%H%M‘` ARCHIVE=/usr/log_bak/nginx_$DATE.tar.gz MESSAGE=/usr/log_bak/"Nginx_Analysis""$DATE" FILENAME=/data/nginx/logs/access.log BACKNAME=/usr/log_bak/"nginx_bak" function Mail(){ mail -s "***Nginx Report***" 666@qq.com < $MESSAGE } function Bowser(){ key[0]=‘" 200 [0-9]{3}‘;word[0]=‘http 200‘ key[1]=‘" 206 [0-9]{3}‘;word[1]=‘http 206‘ key[2]=‘" 404 [0-9]{3}‘;word[2]=‘http 404‘ key[3]=‘" 503 [0-9]{3}‘;word[3]=‘http 503‘ ########## # seo/seo.html" target="_blank"> key[4]=‘Googlebot.*google.com/bot.html‘;word[4]=‘Google Browser‘ key[5]=‘Baiduspider.*baidu.com/search/spider.html‘;word[5]=‘Baidu Browser‘ key[6]=‘bingbot.*bing.com/bingbot.htm‘;word[6]=‘Bing Browser‘ #Soso ‘Sosospider.*soso.com/webspider.htm‘ #óDμà ‘YoudaoBot.*youdao.com/help/webmaster/spider/‘ #Yahoo?D1ú ‘Yahoo! Slurp China‘ ########## # key[7]=‘MSIE‘;word[7]=‘MSIE‘ key[8]=‘Gecko/.*Firefox‘;word[8]=‘Firefox‘ key[9]=‘AppleWebKit.*like Gecko‘;word[9]=‘Webkit‘ key[10]=‘Opera.*Presto‘;word[10]=‘Opera‘ key[11]=‘Windows NT 6.1‘;word[11]=‘Windows 7 访问‘ key[12]=‘Macintosh; Intel Mac OS X‘;word[12]=‘Mac OS X 访问‘ key[13]=‘X11.*Linux‘;word[13]=‘Linux with X11‘ key[14]=‘Android;‘;word[14]=‘Android‘ #Windows?μáD win2000‘Windows NT 5.0‘ winxp‘Windows NT 5.1‘ winvasta‘Windows NT 6.0‘ win7‘Windows NT 6.1 #SymbianOS ‘SymbianOS‘ ########## # key[15]=‘iPad.*like Mac OS X‘;word[15]=‘iPad 访问‘ key[16]=‘Nokia‘;word[16]=‘Nokia‘ key[17]=‘Nokia5800‘;word[17]=‘Nokia5800 XpressMusic‘ #iPhone ‘iPhone.*like Mac OS X‘ ########## # key[18]=‘GET /.*.mp3 HTTP‘;word[18]="访问 mp3 file" key[19]=‘GET /.*.jpg HTTP‘;word[19]="访问 jpg file" #echo $filename #echo "nginx日志: ${FILENAME},一共${totle}行,需要处理 ${#key[@]}条" >> $MESSAGE #echo "来源IP$(cat $FILENAME | awk ‘{print $1}‘ |sort|uniq|wc -l)" >> $MESSAGE i=4 echo "----浏览器来源----" >> $MESSAGE echo "--浏览器-----总计------占比--" >> $MESSAGE while [ $i -lt ${#key[@]} ] do s1=${word[$i]} s2=$(cat $BACKNAME | grep ‘‘"${key[$i]}"‘‘ | wc -l) s3=$(awk ‘BEGIN{printf "%.2f%",(‘$s2‘/‘$totle‘)*100}‘) echo "${s1} ${s2} ${s3}" >> $MESSAGE ((i++)) done if [[ $? == 0 ]]; then echo "分析浏览器标示成功" >> $MESSAGE else echo "分析浏览器标示失败" >> $MESSAGE fi echo "--------------------" >> $MESSAGE } Check_http_status() { #grep -ioE "HTTP\/1\.[1|0]\"[[:blank:]][0-9]{3}" access.log #拿到日志中所有的包含HTTP状态码的部分,拿出第二段来判断,并将结果分配到数组中 codes=(`grep -ioE "HTTP\/1\.[1|0]\"[[:blank:]][0-9]{3}" $BACKNAME | awk -F"[ ]+" ‘BEGIN{i=0;j=0;k=0;n=0;p=0;}{ if($2>=100&&$2<200) {i++} else if($2>=200&&$2<300) {j++} else if($2>=300&&$2<400) {k++} else if($2>=400&&$2<500) {n++} else if($2>=500) {p++} }END{ print i?i:0,j?j:0,k?k:0,n?n:0,p?p:0,i+j+k+n+p }‘`) echo "--HTTP状态码---COUNT---PERCENT------" >> $MESSAGE echo "status[100+]:--${codes[0]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[0]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE echo "status[200+]:--${codes[1]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[1]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE echo "status[300+]:--${codes[2]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[2]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE echo "status[400+]:--${codes[3]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[3]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE echo "status[500+]:--${codes[4]}--$(awk ‘BEGIN{printf "%.2f%",(‘${codes[4]}‘/‘${codes[5]}‘)*100}‘)" >> $MESSAGE echo "----所有的状态码: ${codes[5]}----" >> $MESSAGE } function IpUrlTime(){ echo "来源IP共--$(cat $BACKNAME | awk ‘{print $1}‘ |sort|uniq|wc -l)--个" >> $MESSAGE ip=$(cat $BACKNAME | awk ‘{print $1}‘|sort | uniq -c | sort -nr | head -n 20) echo "----访问前20个IP统计----" >> $MESSAGE echo "$ip" >> $MESSAGE #通过日志查看当天访问页面的url: url=$(cat $BACKNAME | awk ‘{print $7}‘|sort | uniq -c | sort -nr |head -n 20) echo "----访问前20个URL统计----" >> $MESSAGE echo "$url" >> $MESSAGE #通过日志查看当天访问次数最多的时间段 time=$(awk ‘{print $4}‘ $BACKNAME |cut -c 14-18 | sort | uniq -c | sort -nr | head | head -n 20) echo "----访问前20个时间点统计----" >> $MESSAGE echo "$time" >> $MESSAGE } #----------start--------------- ip=`ifconfig | grep ‘inet addr:‘|grep -v ‘127.0.0.1‘|awk -F ‘[ :]+‘ ‘{print $4}‘` echo "--------Server $ip---------------" >> $MESSAGE echo "--------$(df -h)---------------" >> $MESSAGE cd /usr/log_bak if [ $? == 0 ] then echo "进入目录/usr/log_bak" >> $MESSAGE else echo "####进入目录失败,退出####" >> $MESSAGE exit 0 fi echo "---------------------" >> $MESSAGE echo "备份日志:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE echo "---------------------" >> $MESSAGE #bak access.log cp $FILENAME $BACKNAME #check bak if [[ $? == 0 ]] then echo "日志复制成功" >> $MESSAGE else echo "####日志复制失败,退出####" >> $MESSAGE exit 0 fi echo "-------------------------------" >> $MESSAGE echo "分析时间:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE echo "-------------------------------" >> $MESSAGE totle=$(cat $BACKNAME | wc -l) size=$(ls -sh $BACKNAME | awk ‘{print $1}‘) echo "nginx日志,${size},一共${totle}行 " >> $MESSAGE IpUrlTime Check_http_status Bowser echo "--------压缩日志----------------------" >> $MESSAGE #直接备份复制的文件,否者access.log1正在写入,无法压缩 tar czvf $ARCHIVE nginx_bak >> $MESSAGE 2>&1 #判断catalina.out备份是否成功 if [[ $? == 0 ]] then #创建备份文件的压缩包 # tar czvf $ARCHIVE $LOG >> log.txt 2>&1 echo "[$ARCHIVE] 日志压缩成功!" >> $MESSAGE # clear access.log > $FILENAME if [[ $? == 0 ]] then echo "清空日志清空日志成功" >> $MESSAGE rm -f $BACKNAME else echo "###清空日志失败 Failed #####" >> $MESSAGE fi #只需保留备份文件的压缩包即可 else echo "#### 压缩日志失败 ####" >> $MESSAGE exit 0 fi echo "---------------------" >> $MESSAGE echo "结束时间:" $(date +"%y-%m-%d %H:%M:%S") >> $MESSAGE echo "---------------------" >> $MESSAGE Mail
原文:http://www.cnblogs.com/taiguyiba/p/6741525.html