#导入相应的第三那方库 import pandas as pd import numpy as np import matplotlib.pyplot as plt import matplotlib as mpl import pickle import seaborn as sns import re from wordcloud import * %matplotlib inline mpl.rcParams[‘font.sans-serif‘] = [‘SimHei‘] mpl.rcParams[‘axes.unicode_minus‘] = False #读取数据 data = pd.read_csv(‘D:\Dataanalysis\data.csv‘) #查看前5行 data.head()
#以user_id为维度,删除重复数据,并统计用户总数 print(‘用户总数:‘,len(data.drop_duplicates([‘user_id‘]))) 用户总数:828934
#检查是否有空值 print(data.isnull().any().any()) False
from pandas import to_datetime from datetime import datetime reg_user = data[[‘user_id‘,‘register_time‘]] reg_user.head() #将注册时间转化为日期格式 reg_user[‘register_time‘] = to_datetime(reg_user.register_time,format=‘%Y/%m/%d‘) reg_user[‘register_time‘]=reg_user.register_time.apply(lambda x: datetime.strftime(x,"%Y-%m-%d")) #计算每天的注册人数 reg_user = reg_user.groupby([‘register_time‘]).user_id.count() #可视化 fig = plt.figure(figsize=(10, 8)) plt.plot(reg_user) plt.xticks(rotation=90) plt.title(‘用户注册分布图‘) plt.show()
#活跃用户 act_user = data[data[‘avg_online_minutes‘]>=30] #付费用户 pay_user = data[data[‘pay_price‘]>0] #付费率 pay_user_rate = pay_user[‘user_id‘].count()/act_user[‘user_id‘].count() print(‘付费率: %.2f‘ %(pay_user_rate)) 付费率: 0.31
#玩家总付费金额 pay_sum = data[‘pay_price‘].sum() #ARPU ARPU = pay_sum / act_user[‘user_id‘].count() print(‘ARPU: %.2f‘ %(ARPU)) ARPU: 8.86
5.2.3 ARPPU(平均每付费用户收入)
#ARPPU ARPPU = pay_sum / pay_user[‘user_id‘].count() print(‘ARPPU: %.2f‘ %(ARPPU)) ARPPU: 28.49
手游市场的平均ARPU在5元左右,该手游的ARPU为8.86元,高于平均水平,付费率达到了31%,ARPPU为28.49,可以看出该手游的玩家很愿意在游戏中充值,后续需要进一步评估付费用户的价值,针对性的制定运营策略。
#根据要塞等级进行分类 user_grade = data.groupby([‘bd_stronghold_level‘]).user_id.count() user_grade = data[[‘user_id‘,‘bd_stronghold_level‘,‘pay_count‘,‘pay_price‘]] #通过数据透视表汇总各等级人数分布,总付费次数,总菲菲金额 table = pd.pivot_table(user_grade,index=[‘bd_stronghold_level‘],values=[‘user_id‘,‘pay_count‘,‘pay_price‘],aggfunc={‘user_id‘: ‘count‘,‘pay_count‘: ‘sum‘,‘pay_price‘: ‘sum‘}) user_pay = pd.DataFrame(table.to_records()) #计算各等级付费人数 user_count = user_grade[user_grade[‘pay_price‘]>0].groupby(‘bd_stronghold_level‘).user_id.count() user_pay[‘user_count‘] = user_count user_pay.head() #计算等级付费转化率,等级人均付费次数,等级人均付费金额 user_pay[‘pay_rate‘] = user_pay.user_count / user_pay.user_id user_pay[‘user_avgcount‘] = user_pay.pay_count / user_pay.user_count user_pay[‘user_avgmoney‘] = user_pay.pay_price / user_pay.user_count #修改列名 user_pay.columns = [‘要塞等级‘,‘付费次数‘,‘付费总额‘,‘玩家人数‘,‘付费人数‘,‘付费转化率‘,‘人均付费次数‘,‘人均付费总额‘] #调整列名 user_pay = user_pay[[‘要塞等级‘,‘付费次数‘,‘付费总额‘,‘玩家人数‘,‘付费人数‘,‘付费转化率‘,‘人均付费次数‘,‘人均付费总额‘]] user_pay
x = user_pay[‘要塞等级‘] y = user_pay[‘付费转化率‘] fig = plt.figure(figsize=(10,8)) plt.plot(x,y,ls=‘-‘,lw=2) plt.xticks(x,range(0,len(x),1)) plt.grid(True) plt.title(‘要塞等级和付费转化率的关系‘) plt.show()
x = user_pay[‘要塞等级‘] y = user_pay[‘人均付费总额‘] fig = plt.figure(figsize=(10,8)) plt.plot(x,y,ls=‘-‘,lw=2) plt.xticks(x,range(0,len(x),1)) plt.grid(True) plt.title(‘要塞等级与人均付费金额的关系‘) plt.show()
x = user_pay[‘要塞等级‘] y = user_pay[‘人均付费次数‘] fig = plt.figure(figsize=(10,8)) plt.plot(x,y,ls=‘-‘,lw=2) plt.xticks(x,range(0,len(x),1)) plt.grid(True) plt.title(‘要塞等级与人均付费次数的关系‘) plt.show()
x = user_pay[‘要塞等级‘] y = user_pay[‘玩家人数‘] fig = plt.figure(figsize=(10,8)) plt.bar(x,y) plt.xticks(x,range(0,len(x),1)) plt.title(‘各个要塞等级的玩家人数‘) plt.show()
#对10级以上的玩家进行分类 sup_user = data[(data[‘pay_price‘]>=500)&(data[‘bd_stronghold_level‘]>=10)] sub_user = data[(data[‘pay_price‘]<500)&(data[‘bd_stronghold_level‘]<10)]
#制作资源相关数据集 wood_avg = [sup_user[‘wood_reduce_value‘].mean(),sub_user[‘wood_reduce_value‘].mean()] stone_avg = [sup_user[‘stone_reduce_value‘].mean(),sub_user[‘stone_reduce_value‘].mean()] ivory_avg = [sup_user[‘ivory_reduce_value‘].mean(),sub_user[‘ivory_reduce_value‘].mean()] meat_avg = [sup_user[‘meat_reduce_value‘].mean(),sub_user[‘meat_reduce_value‘].mean()] magic_avg = [sup_user[‘magic_reduce_value‘].mean(),sub_user[‘magic_reduce_value‘].mean()] df = {‘高氪玩家‘: [wood_avg[0],stone_avg[0],ivory_avg[0],meat_avg[0],magic_avg[0]], ‘低氪玩家‘: [wood_avg[1],stone_avg[1],ivory_avg[1],meat_avg[1],magic_avg[1]]} resource = pd.DataFrame(df,index =[‘木头‘,‘石头‘,‘象牙‘,‘肉‘,‘魔法‘]).round(2) #可视化 resource.plot(kind=‘bar‘,figsize=(10,8),legend=True,logy=True) x = np.arange(len(resource.index)) y1 = np.array(list(resource[‘高氪玩家‘])) y2 = np.array(list(resource[‘低氪玩家‘])) for a,b in zip(x,y1): plt.text(a,b,b,ha=‘right‘,va=‘bottom‘) for a,b in zip(x,y2): plt.text(a,b,b,ha=‘left‘,va=‘bottom‘) plt.title(‘玩家资源使用量‘,fontsize=20) plt.xlabel(‘资源种类‘,fontsize=18) plt.ylabel(‘资源使用量‘,fontsize=18) plt.tick_params(labelsize=14) plt.show()
#制作加速券相关数据集 general_avg = [sup_user[‘general_acceleration_reduce_value‘].mean(),sub_user[‘general_acceleration_reduce_value‘].mean()] building_avg = [sup_user[‘building_acceleration_reduce_value‘].mean(),sub_user[‘building_acceleration_reduce_value‘].mean()] reaserch_avg = [sup_user[‘reaserch_acceleration_reduce_value‘].mean(),sub_user[‘reaserch_acceleration_reduce_value‘].mean()] training_avg = [sup_user[‘training_acceleration_reduce_value‘].mean(),sub_user[‘training_acceleration_reduce_value‘].mean()] treament_avg = [sup_user[‘treatment_acceleration_reduce_value‘].mean(),sub_user[‘treatment_acceleration_reduce_value‘].mean()] df = {‘高氪玩家‘: [general_avg[0],building_avg[0],reaserch_avg[0],training_avg[0],treament_avg[0]], ‘低氪玩家‘: [general_avg[1],building_avg[1],reaserch_avg[1],training_avg[1],treament_avg[1]]} acceleration = pd.DataFrame(df, index=[‘通用‘,‘建筑‘,‘科研‘,‘训练‘,‘治疗‘]).round(2) #可视化 acceleration.plot(kind = ‘bar‘, figsize=(10,8),logy=True,legend=True) x = np.arange(len(acceleration.index)) a = np.array(list(acceleration[‘高氪玩家‘])) b = np.array(list(acceleration[‘低氪玩家‘])) for m,n in zip(x,a): plt.text(m,n,n,ha=‘right‘,va=‘bottom‘) for m,n in zip(x,b): plt.text(m,n,n,ha=‘left‘,va=‘bottom‘) plt.title(‘玩家加速券使用情况‘,fontsize=20) plt.xlabel(‘加速券类型‘,fontsize=18) plt.ylabel(‘加速券使用量‘,fontsize=18) plt.tick_params(labelsize=14) plt.show
#平均在线时长数据 avg_online_time = [sup_user[‘avg_online_minutes‘].mean(),sub_user[‘avg_online_minutes‘].mean()] df = {‘高氪玩家‘: [avg_online_time[0]], ‘低氪玩家‘: [avg_online_time[1]]} online_time = pd.DataFrame(df,index=[‘平均在线时长‘]).round(2) #可视化 online_time.plot(kind=‘bar‘,legend=True,figsize=(10,8)) x = np.arange(len(online_time.index)) a = np.array(list(online_time[‘高氪玩家‘])) b = np.array(list(online_time[‘低氪玩家‘])) for m,n in zip(x,a): plt.text(m,n,n,ha=‘right‘,va=‘bottom‘,fontsize=12) for m,n in zip(x,b): plt.text(m,n,n,ha=‘left‘,va=‘bottom‘,fontsize=12) plt.xticks(rotation=0) plt.tick_params(labelsize=14) plt.title(‘玩家平均在线时长‘,fontsize=20) plt.show()
#pvp相关数据 pvp_battle_avg = [sup_user[‘pvp_battle_count‘].mean(), sub_user[‘pvp_battle_count‘].mean()] pvp_lanch_avg = [sup_user[‘pvp_lanch_count‘].mean(), sub_user[‘pvp_lanch_count‘].mean()] pvp_win_avg = [sup_user[‘pvp_win_count‘].mean(), sub_user[‘pvp_win_count‘].mean()] data = {‘高氪玩家‘:[pvp_battle_avg[0], pvp_lanch_avg[0], pvp_win_avg[0]], ‘低氪玩家‘:[pvp_battle_avg[1], pvp_lanch_avg[1], pvp_win_avg[1]]} PVP = pd.DataFrame(data, index = [‘PVP次数‘, ‘主动发起PVP次数‘, ‘PVP胜利次数‘]).round(2) #可视化 PVP.plot(kind=‘bar‘,legend=True,figsize=(10,8)) plt.title(‘玩家pvp对局情况‘,fontsize=20) plt.tick_params(rotation=0,labelsize=14) plt.show()
#玩家pve对局数据 pve_battle_count = [sup_user[‘pve_battle_count‘].mean(),sub_user[‘pve_battle_count‘].mean()] pve_lanch_count = [sup_user[‘pve_lanch_count‘].mean(),sub_user[‘pve_lanch_count‘].mean()] pve_win_count = [sup_user[‘pve_win_count‘].mean(),sub_user[‘pve_win_count‘].mean()] df = {‘高氪玩家‘: [pve_battle_count[0],pve_lanch_count[0],pve_win_count[0]], ‘低氪玩家‘: [pve_battle_count[1],pve_lanch_count[1],pve_win_count[1]]} PVE = pd.DataFrame(df,index=[‘PVE次数‘,‘主动发起PVE次数‘,‘PVE胜利次数‘]).round(2) #可视化 PVE.plot(kind=‘bar‘,legend=True,figsize=(10,8)) plt.title(‘玩家pve对局情况‘,fontsize=20) plt.tick_params(rotation=0,labelsize=14) plt.show()
原文:https://www.cnblogs.com/noahcocy/p/14481943.html