首页 > 编程语言 > 详细

爬取拉钩网 python3.7

时间:2019-10-07 14:57:30      阅读:186      评论:0      收藏:0      [点我收藏+]
import requests
import time
import json
def main():
url_start = "https://www.lagou.com/jobs/list_运维?city=%E6%88%90%E9%83%BD&cl=false&fromSearch=true&labelWords=&suginput="
url_parse = "https://www.lagou.com/jobs/positionAjax.json?city=成都&needAddtionalResult=false"
headers = {
‘Accept‘: ‘application/json, text/javascript, */*; q=0.01‘,
‘Referer‘: ‘https://www.lagou.com/jobs/list_%E8%BF%90%E7%BB%B4?city=%E6%88%90%E9%83%BD&cl=false&fromSearch=true&labelWords=&suginput=‘,
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36‘
}
for x in range(1, 5):
data = {
‘first‘: ‘true‘,
‘pn‘: str(x),
‘kd‘: ‘运维‘
}
s = requests.Session()
s.get(url_start, headers=headers, timeout=3) # 请求首页获取cookies
cookie = s.cookies # 为此次获取的cookies
response = s.post(url_parse, data=data, headers=headers, cookies=cookie, timeout=3) # 获取此次文本
time.sleep(5)
response.encoding = response.apparent_encoding
text = json.loads(response.text)
info = text["content"]["positionResult"]["result"]
for i in info:
print(i["companyFullName"])
companyFullName = i["companyFullName"]
print(i["positionName"])
positionName = i["positionName"]
print(i["salary"])
salary = i["salary"]
print(i["companySize"])
companySize = i["companySize"]
print(i["skillLables"])
skillLables = i["skillLables"]
print(i["createTime"])
createTime = i["createTime"]
print(i["district"])
district = i["district"]
print(i["stationname"])
stationname = i["stationname"]

if __name__ == ‘__main__‘:
main()

爬取拉钩网 python3.7

原文:https://www.cnblogs.com/weixin272958297/p/11630241.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!