selenium模块实现浏览器自动化搜索京东商品并获取页面源码
from selenium import webdriver
from time import sleep
# 先实例化一个浏览器对象,并传入驱动程序
bro = webdriver.Chrome(executable_path=‘chromedriver.exe‘)
# 打开浏览器对一个网址发起请求
bro.get(‘https://www.jd.com/‘)
sleep(2)
# 进行标签定位
search_input = bro.find_element_by_id(‘key‘)
search_input.send_keys(‘ipad‘) # 给输入框输入内容
search_btn = bro.find_element_by_xpath(‘//*[@id="search"]/div/div[2]/button‘)
search_btn.click() # 点击搜索按钮
sleep(2)
# 执行js代码,滑动滚动条至页面底部
bro.execute_script(‘window.scrollTo(0,document.body.scrollHeight)‘)
sleep(2)
# 获取整张页面的数据
page_text = bro.page_source
print(page_text)
sleep(2)
# 退出
bro.quit()
selenium模块实现浏览器自动化访问药监总局多个页面,并获取页面上的信息
from selenium import webdriver
from time import sleep
from lxml import etree
bro = webdriver.Chrome(executable_path=‘chromedriver.exe‘)
bro.get(‘http://scxk.nmpa.gov.cn:81/xk/‘)
sleep(2)
# 获取第一页的页面源码
page_text = bro.page_source
page_text_list = [page_text]
# 循环两次,再获取后面两页的数据
for i in range(1, 3):
next_btn = bro.find_element_by_xpath(‘//*[@id="pageIto_next"]‘).click() # 定位到下一页的按钮,并点击
sleep(1)
page_text_list.append(bro.page_source)
sleep(2)
bro.quit()
# 解析获取的页面数据,提取需要的信息
for page_text in page_text_list:
tree = etree.HTML(page_text)
li_list = tree.xpath(‘//*[@id="gzlist"]/li‘)
for li in li_list:
c_name = li.xpath(‘./dl/@title‘)[0]
c_num = li.xpath(‘./ol/@title‘)[0]
print(c_name+‘:‘+c_num)
selenium模块动作链的基本使用
from selenium import webdriver
from time import sleep
from selenium.webdriver import ActionChains
bro = webdriver.Chrome(executable_path=‘chromedriver.exe‘)
bro.get(‘https://www.runoob.com/try/try.php?filename=jqueryui-api-droppable‘)
# 注意iframe标签嵌套的页面里的标签不能直接定位到
bro.switch_to.frame(‘iframeResult‘)
div_tag = bro.find_element_by_id(‘draggable‘)
# 拖动=点击+滑动
# 生成一个动作对象
action = ActionChains(bro)
action.click_and_hold(div_tag)
for i in range(5):
action.move_by_offset(17, 5).perform() # (水平,垂直),一定要加上.perform()动作才会执行!
# action.move_to_element() # 移动到某个标签
sleep(0.5)
action.release() # 动作实行完毕后释放
sleep(3)
bro.quit()
原文:https://www.cnblogs.com/straightup/p/13693372.html