import resquests #import urllib.request
from bs4 import BeautifulSoup
from collections import OrderedDict
import pandas as pd
import numpy as np
import re
url=‘http://‘
response=requests.get(url)
html=BeautifulSoup(response.text)
div=html.find_all(div) #html.find_all(name=‘div‘,attrs={‘id‘:})
ul=div.find(‘ul‘)
li=ul.find(‘li‘)
str=li.stripped_strings()
pattern=re.compile(‘‘)
if pattern.search(str):
re.sub(‘‘,‘‘,str)
li=BeautifulSoup(str).find(‘li‘) #正则匹配
feature={}
feature[key].append()
dw=pd.DataFrame(data=feature,columns=feature.keys())
dw.to_excel(filename)
注:1、查找过程中注意异常的抓取(try/except)
原文:https://www.cnblogs.com/Dai-py/p/10635708.html