1 # coding:utf-8 2 # 用webdriver登录并获取cookies,并用requests发送请求,以豆瓣为例 3 from selenium import webdriver 4 import requests 5 import time 6 import json 7 import sys 8 reload(sys) 9 sys.setdefaultencoding(‘utf-8‘) 10 11 def main(): 12 # 从命令行参数获取登录用户名和密码 13 user_name = sys.argv[1] 14 password = sys.argv[2] 15 16 # 豆瓣登录页面URL 17 login_url = ‘https://www.douban.com/accounts/login‘ 18 19 # 获取chrome的配置 20 opt = webdriver.ChromeOptions() 21 # 在运行的时候不弹出浏览器窗口 22 # opt.set_headless() 23 24 # 获取driver对象 25 driver = webdriver.Chrome(chrome_options = opt) 26 # 打开登录页面 27 driver.get(login_url) 28 29 print ‘opened login page...‘ 30 31 # 向浏览器发送用户名、密码,并点击登录按钮 32 driver.find_element_by_name(‘form_email‘).send_keys(user_name) 33 driver.find_element_by_name(‘form_password‘).send_keys(password) 34 # 多次登录需要输入验证码,这里给一个手工输入验证码的时间 35 time.sleep(6) 36 driver.find_element_by_class_name(‘btn-submit‘).submit() 37 print ‘submited...‘ 38 # 等待2秒钟 39 time.sleep(2) 40 41 # 创建一个requests session对象 42 s = requests.Session() 43 # 从driver中获取cookie列表(是一个列表,列表的每个元素都是一个字典) 44 cookies = driver.get_cookies() 45 # 把cookies设置到session中 46 for cookie in cookies: 47 s.cookies.set(cookie[‘name‘],cookie[‘value‘]) 48 # 关闭driver 49 driver.close() 50 51 # 需要登录才能看到的页面URL 52 page_url = ‘https://www.douban.com/accounts/‘ 53 # 获取该页面的HTML 54 resp = s.get(page_url) 55 resp.encoding = ‘utf-8‘ 56 print ‘status_code = {0}‘.format(resp.status_code) 57 # 将网页内容存入文件 58 with open(‘html.txt‘,‘w+‘) as fout: 59 fout.write(resp.text) 60 61 print ‘end‘ 62 63 if __name__ == ‘__main__‘: 64 main()
原文:https://www.cnblogs.com/cmbobo/p/12298204.html