本节目录
常用函数一:词云图
# -*- coding: utf-8 -*- """ Datetime: 2020/06/28 Author: Zhang Yafei Description: 词云图 """ import os import random import matplotlib.pyplot as plt import numpy as np from PIL import Image from pylab import mpl from wordcloud import WordCloud import pandas as pd plt.figure(figsize=(13, 6), dpi=500) # 定义一个函数 def grey_color_func(word, font_size, position, orientation, ranndom_state=None, **kwargs): return "hsl(0,0%%,%d%%)" % random.randint(60, 100) def plot_wordcloud(cols, rows, num, title, frequence_dict, mask_image=‘mask1.jpg‘): plt.subplot(rows, cols, num) # 背景图 root = os.path.dirname(__file__) mask = np.array(Image.open(os.path.join(root, mask_image))) # 设置字体 font = os.path.join(root, ‘simsun.ttc‘) wordcloud = WordCloud(font_path=font, mask=mask, background_color=‘white‘).generate_from_frequencies(frequencies=frequence_dict) # 设置默认字体 mpl.rcParams[‘font.sans-serif‘] = [‘SimHei‘] # 标题 plt.title(title) # 显示我们生成图片 plt.imshow(wordcloud) plt.tight_layout() # 去掉x,y轴的标签 plt.axis("off") def plot_wordcloud1(frequence_dict, title=None,mask_image=‘mask.png‘, bcolor=‘white‘): # 背景图 root = os.path.dirname(__file__) mask = np.array(Image.open(os.path.join(root, mask_image))) # 设置字体 font = os.path.join(root, ‘simsun.ttc‘) wordcloud = WordCloud(scale=6, font_path=font, mask=mask, max_words=60, background_color=bcolor).generate_from_frequencies(frequencies=frequence_dict) # 设置默认字体 mpl.rcParams[‘font.sans-serif‘] = [‘SimHei‘] # 标题 if title: plt.title(title) # 显示我们生成图片 plt.imshow(wordcloud) # 去掉x,y轴的标签 plt.axis("off") if title: plt.savefig(os.path.join(‘res‘, f‘{title}.png‘)) else: plt.savefig(os.path.join(‘res‘, ‘词云图.png‘)) plt.show() def read_data(): df = pd.read_excel(‘word_cloud_data.xlsx‘, index_col=0) return df[‘freq‘].to_dict() if __name__ == ‘__main__‘: data_dict = read_data() plot_wordcloud1(frequence_dict=data_dict)
# -*- coding: utf-8 -*- """ Datetime: 2020/07/26 Author: Zhang Yafei Description: pyecharts实现词云图 """ from pyecharts import WordCloud, Page import pandas as pd def read_data(): df = pd.read_excel(‘word_cloud_data.xlsx‘, index_col=0) return df.index, df[‘freq‘] # return list(zip(df.index, df[‘freq‘])) def plot_wordcloud(): words, freq = read_data() wordcloud = WordCloud("新冠疫情研究词云图", width=800, height=600) wordcloud.add("", words, freq, shape=‘circle‘, word_size_range=[20, 100]) # wordcloud.add("", words, freq, shape=‘circle‘) # name 图列名称 str # attr word数据,词云图中显示的词语 list # value 词语权重,相当于词频 list # shape 生成的词云图轮廓, 有‘circle‘, ‘cardioid‘, ‘diamond‘, ‘triangle-forward‘, ‘triangle‘, ‘pentagon‘, ‘star‘可选 list # word_gap 单词间隔,默认为20 int # word_size_range 单词字体大小范围,默认为[12,60] list # rotate_step 旋转单词角度,默认为45 wordcloud.render(‘新冠研究词云图.html‘) if __name__ == ‘__main__‘: plot_wordcloud()
原文:https://www.cnblogs.com/zhangyafei/p/13382788.html