python--slenium爬取笔趣阁小说网站

时间：2019-10-07 23:05:45 阅读：267 评论：0 收藏：0 [点我收藏+]

# -*- coding: utf-8 -*-
from selenium import webdriver
import requests,re,pprint,time

url=‘https://www.sbiquge.com/5_5374/‘
browser = webdriver.Chrome(‘C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe‘)

response= requests.get(url)
html=  response.text
result = re.findall(‘\w\shref\s="/5_5374/(\d+)‘,html,re.S)
esult = re.findall(‘\w\shref\s="/5_5374/\d+\.html">(.*?)</a>‘,html,re.S)
print(result)
#print(esult)
n=0
while n<10000:
	try:
		browser.get(url+"{}.html".format(result [n]))
		time.sleep(0.1)
		n=n+1
		cont=browser.find_element_by_xpath(‘// *[ @ id = "book"] / div[2] / h1‘)
		con=browser.find_element_by_class_name(‘showtxt‘)
		print(cont.text)
		print(con.text)
		novel_name = ‘诛仙.txt‘
		with open(novel_name,‘a‘,encoding=‘utf-8‘)as f:
			f.write(‘\n\n‘+cont.text+‘\n\n\t‘+con.text+n)
	except Exception as e:
		print(e)
		print(result[n]+"这章加载太慢了====================================================================================!")
		pass
browser.close()

原文：https://www.cnblogs.com/fqqwz/p/11632495.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)