首页 > 编程语言 > 详细

python 爬取媒体文件(无防火墙)

时间:2019-05-11 00:22:47      阅读:141      评论:0      收藏:0      [点我收藏+]
#coding = utf-8
import requests
import pandas as pd 
import os,time

root_path = ./根目录/
input_file = 码表.xlsx
url = http://api.map.baidu.com/geocoder/v2/?id = %s&local=1
fail_file = root_path +fail.csv

class Auto_down:
    def __init__(self):
        print("--start--")
    def read_excel(self):
        # pd.read_excel(converters = {u‘列名‘:str})按照str类型读入,不会出现0被舍去的情况
        sheet = pd.read_excel(input_file,converters = {u列名:str},sheetname = 子表名)
        cust_Id = sheet[cust_id]
        void_Id = sheet[void_id]

        for i in range(len(cust_Id)):
            self.create_file(cust_Id[i],void_Id[i])

    def download_voice(self,custid_filename,voiceid):
        print(voiceid)
        try:
            r = requests.get(url%voiceid)
            return_code = r.status_code
            if return_code == 200:
                voice_filename = %s/%s.mp3%(custid_filename,voiceid)
                with open(voice_filename, wb) as fd:
                    fd.write(r.content)
            else:
                with open(fail_file, a+) as ff:
                    ff.write(voiceid + \n)
        except:
            print(request url is fail!!)
            with open(fail_file, a+) as ff:
                ff.write(voiceid + \n)

    def create_file(self, custid, voiceid):
        custid_filename = root_path + custid
        if not os.path.exists(custid_filename):
            os.mkdir(custid_filename)
        else:
            self.download_voice(custid_filename,voiceid)

if __name__ == __main__:
    tStart = time.clock()

    AD = Auto_down()
    AD.read_excel()

    tEnd = time.clock()

    print("%s s"%(tEnd - tStart))

 

#coding = utf-8
import requests

root_path = "./下载/"

url = ""
fail_file = root_path + fail.csv
voiceid = 11111
for i in range(3):
    try:
        r = requests.get(url)
        return_code = r.status_code
        if r.status_code == 200:
            voice_filename = root_path + dada.fdf
            with open(voice_filename,wb) as fd:
                fd.write(r.content)
        else:
            with open(fail_file,a+) as ff:
                ff.write(voiceid + \n)
    except:
        prin("fail")
        with open(fail_file,a+) as ff:
            ff.write(voiceid + \n)

 

 

r = request.get(url)
r.status_code 获取响应状态码
r.text 获取响应内容
r.headers 获取响应头
r.encoding 获取响应编码
r.content 获取二进制响应内容
r.json() 获取JSON响应内容

python 爬取媒体文件(无防火墙)

原文:https://www.cnblogs.com/smuxiaolei/p/10847369.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!