# -*- coding: utf-8 -*- import selenium from selenium import webdriver import time import urllib.request from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By import re class WeiBoVdeioDown(object): def __init__(self, url, fileName): # 设置下载某一个人相册中的视频地址 self.url = url # 设置下载文件本地地址 self.fileName = fileName def __get_fileName_and_down_url(self): """ 获得下载视频名称与视频地址 :return: """ self.driver = webdriver.Firefox() self.driver.get(self.url) WebDriverWait(self.driver, 200, 0.5).until( EC.presence_of_element_located((By.CLASS_NAME, ‘photo_module‘))) # 指定像素 var c = document.getElementsByClassName(‘WB_frame‘)[0].scrollHeight=1000 # js_code = "document.getElementsByClassName(‘WB_frame‘)[0].scrollHeight=8000" # self.driver.execute_script(js_code) # print("拖动滑动条到底部...") # time.sleep(60) self.li = self.driver.find_elements_by_class_name(‘photo_module‘) # 遍历li得到视频说明与视频连接,并保存 video_inf = {} # 得到第一个窗口的句柄值 self.fu_handle = self.driver.current_window_handle for i in self.li: file_name = i.find_element_by_xpath(‘./a/div‘).text video_url = i.find_element_by_xpath(‘./a‘).get_attribute(‘href‘) print("视频名称:", file_name, "视频连接地址:", video_url) # 此处的视频连接地址不是最终的需要在此请求爬取 if video_url.find("http://video.weibo.com") != -1: new_url = self.__get_final_video_url(video_url) video_inf[file_name] = new_url return video_inf def __get_final_video_url(self, url): """ 得到最终的视频地址 :param url: :return: """ # 根据url打开新的窗口 js = "window.open(‘{}‘)".format(url) self.driver.execute_script(js) # 输出当前窗口的句柄 # print("当前窗口的句柄:", self.driver.current_window_handle, "ddd:", self.fu_handle) # 获取当前窗口句柄集合(列表类型) # 得到新的窗口的句柄 child_handle = None handles = self.driver.window_handles for handle in handles: if handle != self.fu_handle: child_handle = handle # print("子窗口句柄值:", child_handle) # print(handles) # 输出句柄集合 # 切换窗口 self.driver.switch_to.window(child_handle) try: WebDriverWait(self.driver, 200, 0.5).until( EC.presence_of_element_located((By.ID, ‘playerRoom‘))) time.sleep(3) temp = re.findall(r‘<video src="(.*)"‘, self.driver.page_source) if len(temp) > 0: final_url = temp[0].replace("amp;", "") print("新的地址:ee", "html:" + final_url) else: final_url = "" print("无地址") time.sleep(3) self.driver.close() # 切换回主窗口 self.driver.switch_to.window(self.fu_handle) except: final_url = "" return "http:" + final_url def download(self, url): """ 视频下载 :return: """ msg = self.__get_fileName_and_down_url() for file_name, url in msg.items(): print("fileName:", file_name, "url:", url) if len(url) > 0: print("正在下载视频{}".format(file_name)) print("路径:", ‘{}{}.mp4‘.format(self.fileName, file_name)) urllib.request.urlretrieve(url, ‘{}{}.mp4‘.format(self.fileName, file_name)) # //f.us.sinaimg.cn/001S2GEdlx07rpeAv93O01041200q03A0E010.mp4?label=mp4_hd&template=844x480.25.0&Expires=1550404650&ssig=kTARYMkU1Y&KID=unistore,video if __name__ == ‘__main__‘: url = "https://weibo.com/p/1005052420864952/photos?type=video#place" # 个人主页相册中的视频地址(自己可以更改,也可以添加程序来自动爬取) aa = WeiBoVdeioDown(url, ‘E:\玉面小嫣然‘) # 创建对象,输入下载地址和本地保存地址,默认保存文件名是微博上面名称 url1 = "//f.us.sinaimg.cn/001S2GEdlx07rpeAv93O01041200q03A0E010.mp4?label=mp4_hd&template=844x480.25.0&Expires=1550404650&ssig=kTARYMkU1Y&KID=unistore,video" # aa.down_url(url) aa.download(url)
出现问题: 1、可能会出现异常,这里程序对异常处理不是很合适,可以通过后期对函数中的url地址进行判断,
2、滑动条自动加载,向下滑动,自动加载内容,查看网上解释可以通过以下实现
# js_code = "document.getElementsByClassName(‘WB_frame‘)[0].scrollHeight=8000"
# self.driver.execute_script(js_code)
其中js_code 是要执行的代码,scrollHeight=10000是滑动到底部,这里测试没有成功,其他页面有些可以成功,似乎是元素找错了,需要找到这个滚动条的真正属于的容器才可以,有兴趣的可以自己去试试
第一次实现,不喜勿
# 下载视频,音乐和图片还可以通过如下实现
import re url= ‘https://m10.music.126.net/20190218085252/d95768f7ab127a67ba24c1f6cba652e3/ymusic/015f/0f52/0f0b/65259539178803971cd18a5de46cfb76.mp3‘ import requests r = requests.get(url, stream=True) with open(‘E:\\a.mp3‘, ‘wb‘) as f: for i in r.iter_content(1024): f.write(i) url = ‘http://f.us.sinaimg.cn/001R971olx07re2bg08g01041203HWqe0E020.mp4?label=mp4_720p&template=1268x720.20.0&Expires=1550453293&ssig=CZ7K3Algxi&KID=unistore,video‘ r = requests.get(url, stream=True) with open(‘E:\\a.mp4‘, ‘wb‘) as f: for i in r.iter_content(1024): f.write(i)
对于小文件还可以是使用如下下载
with open(‘E:\\a.mp4‘, ‘wb‘) as f:
f.write(r.raw.read()
原文:https://www.cnblogs.com/GZBSYS/p/10393806.html