import os
import requests
from bs4 import BeautifulSoup
import lxml
def Gethtml(url):
response=requests.get(url)
response.encoding="utf-8"
# print(response.text)
return response.content
def parseHtml(html):
msg=BeautifulSoup(html,features="lxml")
for item in msg.findAll("a"):
print(item.get("href"))
#print(msg)
url="http://wwww.baidu.com"
#Gethtml(url)
parseHtml(Gethtml(url))
原文:https://www.cnblogs.com/yanwuming/p/11626790.html