import urllib.request import urllib.parse import requests import csv from lxml import etree from selenium import webdriver import time url=‘https://www.aqistudy.cn/historydata/index.php‘ #broswer = webdriver.Chrome(‘C:\\Users\\lenovo\\AppData\\Local\\Programs\\Python\\Python37-32\\chromedriver.exe‘) #broswer.get(url) #c=broswer.find_elements_by_xpath(‘//td[@align="center"]‘) #headers={ ‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36‘} response=requests.get(url) html=etree.HTML(response.text) city=html.xpath(‘/html/body/div[3]/div/div[1]/div[2]/div[2]/ul/div[2]/li/a/text()‘) urls =‘https://www.aqistudy.cn/historydata/monthdata.php?city=‘ listw=[‘名字‘,‘月份‘,‘范围‘,‘质量等级‘,‘PM2.5‘,‘PM10‘,‘SO2‘,‘CO‘,‘NO2‘,‘O3‘] #with open(‘C:\\Users\\lenovo\\Desktop\\good3.csv‘, ‘a+‘, newline=‘‘, encoding=‘utf-8‘)as f: # writers = csv.writer(f) #writers.writerow(listw) liste=[] for i in city: urls1=urls+urllib.parse.quote(i) broswer = webdriver.Chrome() broswer.get(urls1) time.sleep(3) with open(‘C:\\Users\\lenovo\\Desktop\\{}.csv‘.format(i), ‘a+‘, newline=‘‘, encoding=‘utf-8‘)as f: writers = csv.writer(f) writers.writerow(listw) c = broswer.find_elements_by_xpath(‘//td[@align="center"]‘) for j in range(0, len(c), 10): with open(‘C:\\Users\\lenovo\\Desktop\\{}.csv‘.format(i), ‘a+‘, newline=‘‘, encoding=‘utf-8‘)as f: writers = csv.writer(f) liste = [i, c[j].text, c[j + 1].text, c[j + 2].text, c[j + 3].text, c[j + 4].text, c[j + 5].text, c[j + 6].text, c[j + 7].text, c[j + 8].text, c[j + 9].text] writers.writerow(liste) broswer.quit()
原文:https://www.cnblogs.com/persistence-ok/p/11029227.html