""" 1.爬取网站的内容 1)网址 请求头 2)请求方式 get post 3)请求函数urlopen(url,data=data,timeout=1) Request(url=url,data=data,headers=headers,method="POST") """ from urllib.request import urlopen,Request from urllib.parse import urlencode from urllib.error import URLError import socket #爬取的网站 url = "http://httpbin.org/post" #请求头 headers ={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36" } #请求数据 dict ={ ‘name‘:‘mm‘ } #请求输出函数 def print_status(response): #输出类型type #print(type(response)) #响应状态status status_code = response.status #网站信息 getheaders = response.getheaders() Server = response.getheader(‘Server‘) #网站的内容 html = response.read() print("请求状态:",status_code) print("网站信息:",getheaders) print("网站服务器类型:",Server) print("网站的内容:",html) #请求网址urlopen方法 def response_urlopen(url): #请求网址 res = urlopen(url,timeout=1) #打开内容返回res响应 return res #请求网址Request方法 def response_Request(url,dict,headers): #编码,转换为UTF-8的字节流 data = bytes(urlencode(dict),encoding=‘utf-8‘) #请求响应Reqeust req = Request(url,headers=headers,data=data,method="POST") #添加请求头 req.add_header(‘User-Agent‘,‘Mozilla/5.0 (Windows NT 10.0; Win64; x64)‘) #基本请求 response = urlopen(req) #响应状态status status_code = response.status #网站信息 getheaders = response.getheaders() Server = response.getheader(‘Server‘) #网站的内容 html = response.read().decode(‘utf-8‘) print("============请求状态=============") print("请求状态:",status_code) print("============网站信息=============") print("网站信息:",getheaders) print("网站服务器类型:",Server) print("============网页内容=============") print("网站的内容:",html) #调用请求函数 response_Request(url,dict,headers)
二.
原文:https://www.cnblogs.com/Crown-V/p/12642900.html