import requests import json import time import os from threading import Timer def getHTML(url): try: r=requests.get(url,timeout=30) r.raise_for_status() r.encoding=r.apparent_encoding datas=json.loads(r.text)[‘data‘] except: print(‘连接失败‘) else: result="" for data in datas: result+=data[‘StudentNo‘]+‘,‘+data[‘RealName‘]+‘,‘+data[‘DateAdded‘]+‘,‘+data[‘Title‘]+‘,‘+data[‘Url‘]+‘\n‘ f=open(‘F:\羊车门爬虫作业\hwlist.csv‘,‘w‘) f.write(result) f.close() os.mkdir(‘F:\羊车门爬虫作业\hwFolder‘) os.chdir(‘F:\羊车门爬虫作业\hwFolder‘) for data in datas: os.mkdir(data[‘StudentNo‘]) os.chdir(data[‘StudentNo‘]) try: web=requests.get(data[‘Url‘],timeout=30) web.raise_for_status() web.encoding=web.apparent_encoding except: print(‘连接失败‘) else: f=open(data[‘StudentNo‘]+‘.html‘,‘wb‘) f.write(web.content) f.close() os.chdir(os.path.pardir) url=‘https://edu.cnblogs.com/Homework/GetAnswers?homeworkId=2420&_=1543216235911‘
原文:https://www.cnblogs.com/sninius/p/10027877.html