Get_Web_banner(批量获取网站banner)

时间：2019-04-19 12:52:28 阅读：325 评论：0 收藏：0 [点我收藏+]

这算是实现的第一个flag吧，批量获取网站的banner，并写入csv中。获取的info包括：状态码、title、server、content-type。

# -*- coding: utf-8 -*-
import requests
import re
import csv
import chardet
import threading

targets = []
csv_file = ‘jd.csv‘

def get_banner_export_csv():
    with open(‘urls.txt‘,‘r‘) as a:
        for target in a:
            if target not in targets:
                targets.append(target)
            else:
                continue
        for url in targets:
            url = str(url).strip()
            if ‘http‘ or ‘https‘ not in url:
                url = ‘http://‘ + url
            try:
                req = requests.get(url,timeout=(5,20),verify=False,allow_redirects=False) #发出一次请求，禁止302跳转。
                if ‘charset‘ not in req.headers.get(‘Content-Type‘," "):
                    req.encoding = chardet.detect(req.content).get(‘encoding‘) #解决网页编码问题
                pattern = re.compile(r‘<title>(.*?)</title>‘,re.S)
                title = re.findall(pattern,req.text)[0]
                stat_code = str(req.status_code)
                if ‘30‘ in stat_code:
                    location = req.headers[‘Location‘]
                else:
                    location = ‘‘
                if ‘Server‘ in req.headers:
                    server = str(req.headers[‘Server‘])
                else:
                    server = ‘‘
                if ‘Content-Type‘ in req.headers:
                    type = str(req.headers[‘Content-Type‘])
                else:
                    type = ‘‘
                if ‘30‘ not in stat_code:
                    print("{} {} {} {}".format(stat_code,url,title,type,server))
                else:
                    print("{} {} {} {} {}".format(stat_code,url,title,location,type,server))

                with open(csv_file,‘a+‘,encoding=‘utf-8‘,newline=‘‘) as f: #写入相关信息
                    writer = csv.writer(f)
                    writer.writerow([stat_code,url,title,location,type,server])
            except Exception as e:
                print(url + ‘ ‘ + str(e))



def main():
    thread = threading.Thread(target=get_banner_export_csv,)
    thread.start()
    with open(csv_file,‘a+‘,encoding=‘utf-8‘,newline=‘‘) as g:
        writer = csv.writer(g)
        writer.writerow([‘stat_code‘,‘url‘,‘title‘,‘loaction‘,‘type‘,‘server‘])



main()

使用方法：

1.在当目录下urls.txt中放入需要获取banner的url(可有http可无http)。

2.csv文件名/路径需要自己修改

Get_Web_banner(批量获取网站banner)

原文：https://www.cnblogs.com/P1g3/p/10735233.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)