首页 > 编程语言 > 详细

Python爬取b站视频

时间:2020-12-02 12:50:15      阅读:23      评论:0      收藏:0      [点我收藏+]
import requests
import re
import random


class BLBL(object):
    def __init__(self, url, cookie, referer):
        # 需要爬取的网页前缀 例如:https://www.bilibili.com/video/av49035382       ?from=search&seid=1058195128616882249
        self.base_url = url
        # cookie内容
        self.cookie = cookie
        # referer内容
        self.referer = referer
        # 请求头信息
        self.accept = text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3
        self.accept_Encoding = gzip, deflate, br
        self.accept_Language = zh-CN,zh;q=0.9,en;q=0.8
        self.user_agent = "User-Agent:Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) "

    def html(self):
        # 访问起始网页需添加的请求头,不加的话,得不到完整的源代码(反爬)
        base_headers = {
            Accept: self.accept,
            Accept-Encoding: self.accept_Encoding,
            Accept-Language: self.accept_Language,
            Cache-Control: max-age=0,
            Connection: keep-alive,
            Cookie: self.cookie,
            Host: www.bilibili.com,
            Referer: self.referer,
            Upgrade-Insecure-Requests: 1,
            User-Agent: self.user_agent
        }
        # 请求网页
        base_response = requests.get(self.base_url, headers=base_headers)
        print(base_headers)
        # 获取网页html代码
        html = base_response.text
        # print(html.headers)
        return html

    def xin_xi(self, html):
        print(html)
        try:
            # 获取视频名称
            video_name = re.search(<title>(.+)</title>, html, re.S).group(1) + .flv
        except:
            # 如果获取失败,就随机一个名字
            video_name = str(random.randint(100000,1000000))+.flv
        print(video_name)
        # 获取视频链接
        download_url = re.search(r("url":"|"baseUrl":"|"backupUrl":\[")(.+?)("|"]), html, re.S).group(2)
        print(download_url,111)
        # 获取主机信息
        host = re.search(r//(.+\.com), download_url, re.S).group(1)
        print(host)
        return video_name, download_url, host

    def video(self, html):
        # 获取视频名称,视频网址,主机
        video_name, download_url, host = self.xin_xi(html)
        # 请求视频下载地址时需要添加的请求头
        download_headers = {
            User-Agent: self.user_agent,
            Referer: self.referer,
            Origin: https://www.bilibili.com,
            Host: host,
            Accept: self.accept,
            Accept-Encoding: self.accept_Encoding,
            Accept-Language: self.accept_Language
        }
        # 获取视频资源,并写入文件
        with open(video_name, wb) as f:
            f.write(requests.get(download_url, headers=download_headers, stream=True, verify=False).content)

    def run(self):
        html = self.html()
        self.video(html)
        print(爬取成功)


if __name__ == __main__:
    url=input("请输入网址:")
    cookie = "" //这里要自己登录b站后得到打开控制台得到自己的cookie
    blbl = BLBL(url, cookie, referer)
    blbl.run()

 

Python爬取b站视频

原文:https://www.cnblogs.com/shiguanggege/p/14072636.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!