代码:
1 # -*- coding: utf-8 -*- 2 """ 3 Created on Mon May 27 21:10:59 2019 4 5 @author: Benny 6 """ 7 8 import csv 9 import os 10 import requests 11 import pandas 12 from bs4 import BeautifulSoup 13 allUniv = [] 14 def getHTMLText(url): 15 try: 16 r = requests.get(url, timeout=30) 17 r.raise_for_status() 18 r.encoding = ‘utf-8‘ 19 return r.text 20 except: 21 return "" 22 def fillUnivList(soup): 23 data = soup.find_all(‘tr‘) 24 for tr in data: 25 ltd = tr.find_all(‘td‘) 26 if len(ltd)==0: 27 continue 28 singleUniv = [] 29 for td in ltd: 30 singleUniv.append(td.string) 31 allUniv.append(singleUniv) 32 def writercsv(save_road,num,title): 33 if os.path.isfile(‘test_launch1.csv‘): 34 with open(‘test_launch1.csv‘,‘a‘,newline=‘‘)as f: 35 csv_write=csv.writer(f,dialect=‘excel‘) 36 for i in range(num): 37 u=allUniv[i] 38 csv_write.writerow(u) 39 else: 40 with open(‘test_launch1.csv‘,‘w‘,newline=‘‘)as f: 41 csv_write=csv.writer(f,dialect=‘excel‘) 42 csv_write.writerow(title) 43 for i in range(num): 44 u=allUniv[i] 45 csv_write.writerow(u) 46 47 title=["排名","学校名称","省市","总分","生源质量","培养结果","科研规模","科研质量","顶尖成果","顶尖人才","科技服务","产学研究合作","成果转化"] 48 save_road="C:\\Users\\Benny\\Desktop\\Python\\Python练习\sqlit_test02.csv" 49 def main(): 50 url = ‘http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html‘ 51 html = getHTMLText(url) 52 soup = BeautifulSoup(html, "html.parser") 53 fillUnivList(soup) 54 writercsv(save_road,10,title) 55 main()
文件截屏如下:(这里只是保存了前十名的数据,可以通过更改num来保存更多)
原文:https://www.cnblogs.com/shuxincheng/p/10933546.html