首页 > 编程语言 > 详细

Python 中国大学排名定向爬虫

时间:2019-10-26 17:16:02      阅读:106      评论:0      收藏:0      [点我收藏+]

代码来自于中国大学Mooc北京理工大学Pythont教学团队https://www.icourse163.org/learn/BIT-1001870001#/learn/content?type=detail&id=1211970249&cid=1215042961

1.函数版

#中国大学定向爬虫
import requests
from bs4 import BeautifulSoup
import bs4
     
def getHTMLText(url):
    try:
        r = requests.get(url, timeout=30)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""
     
def fillUnivList(ulist, html):
    soup = BeautifulSoup(html, "html.parser")
    for tr in soup.find(tbody).children:
        if isinstance(tr, bs4.element.Tag):
            tds = tr(td)
            ulist.append([tds[0].string, tds[1].string, tds[3].string])
     
def printUnivList(ulist, num):
    tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}"
    print(tplt.format("排名","学校名称","总分",chr(12288)))
    for i in range(num):
        u=ulist[i]
        print(tplt.format(u[0],u[1],u[2],chr(12288)))
         
def main():
    uinfo = []
    #url = ‘https://www.zuihaodaxue.cn/zuihaodaxuepaiming2018.html‘
    url = http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html
    html = getHTMLText(url)
    fillUnivList(uinfo, html)
    printUnivList(uinfo, 20) # 20 univs
main()

 

2.修改无函数版用于学习

#中国大学定向爬虫
import requests
from bs4 import BeautifulSoup
import bs4

ulist = [] 
url = http://www.zuihaodaxue.com/zuihaodaxuepaiming2018.html
try:
    r = requests.get(url, timeout=30)
    r.raise_for_status()
    r.encoding = r.apparent_encoding
except:
    print("爬取失败")
html = r.text
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find(tbody).children:
    if isinstance(tr, bs4.element.Tag):
       tds = tr(td)
       ulist.append([tds[0].string, tds[1].string, tds[3].string])
     
tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}"
print(tplt.format("排名","学校名称","总分",chr(12288))) #使得中文对齐
num = 20
for i in range(num): #打印前20名
    u=ulist[i]
    print(tplt.format(u[0],u[1],u[2],chr(12288)))
print("爬取完毕")

 

Python 中国大学排名定向爬虫

原文:https://www.cnblogs.com/xdd1997/p/11743826.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!