抓取“猫眼”TOP100榜
import requests from bs4 import BeautifulSoup headers = { ‘user-agent‘: ‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36‘ } def open_url(url): r = requests.get(url, headers=headers) r.encoding = ‘utf-8‘ html = r.text return html def get_info(html): # num = 1 soup = BeautifulSoup(html, ‘lxml‘) text_info = soup.find_all(‘div‘, class_="movie-item-info") # for i in text_info: # info = str(num) + ‘.‘ + i.text.replace(‘ ‘, ‘‘).replace(‘\n‘, ‘ ‘) + ‘\n‘ # print(info) # num += 1 return text_info def main(): num = 1 for i in range(0, 10): url = ‘https://maoyan.com/board/4?offset=‘ + str(0+i*10) html = open_url(url) info_list = get_info(html) for info in info_list: info = str(num) + ‘.‘ + info.text.replace(‘ ‘, ‘‘).replace(‘\n‘, ‘ ‘) + ‘\n‘ num += 1 with open(‘猫眼TOP100榜.txt‘, ‘a+‘) as file: file.write(info) print(‘猫眼TOP100榜下载完成!‘) if __name__ == ‘__main__‘: main()
原文:https://www.cnblogs.com/jsxxd/p/13740903.html