谏言:穷则独善其身,达则兼济天下
爬取B站弹幕数据进行词云显示
import requests import re import csv headers={ ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.122 Safari/537.36‘ } # url=‘https://api.bilibili.com/x/v1/dm/list.so?oid=57236482‘ # url=‘https://api.bilibili.com/x/v1/dm/list.so?oid=171484836‘ url=‘https://api.bilibili.com/x/v1/dm/list.so?oid=186803402‘ data=requests.get(url,headers=headers) html_doc=data.content.decode(‘utf-8‘) res=re.compile(‘<d.*?>(.*?)</d>‘) bullet_screen=re.findall(res,html_doc) # print(bullet_screen) for i in bullet_screen: with open(‘B站1.csv‘,‘a‘,newline=‘‘,encoding=‘utf-8-sig‘) as f: writer=csv.writer(f) danmu_data=[] danmu_data.append(i) writer.writerow(danmu_data) import jieba import wordcloud import imageio f = open(‘B站1.csv‘,encoding=‘utf-8‘) txt=f.read() txt_list=jieba.lcut(txt) string=" ".join(txt_list) mk=imageio.imread(r‘图片2.png‘) # w=wordcloud.WordCloud(width=1000,height=700,background_color=‘white‘,font_path=‘msyh.ttc‘,mask=mk,scale=15,stopwords={‘ ‘},contour_width=5,contour_color=‘red‘) w=wordcloud.WordCloud(width=1000,height=700,background_color=‘white‘,font_path=‘msyh.ttc‘,scale=15,stopwords={‘ ‘},mask=mk) w.generate(string) w.to_file(‘r5.png‘)
原文:https://www.cnblogs.com/python-study-notebook/p/12833634.html