首页 > 编程语言 > 详细

python——回归案例

时间:2020-02-09 17:41:46      阅读:70      评论:0      收藏:0      [点我收藏+]
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams[font.sans-serif] = [stxiHei]from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import seaborn as sns
from sklearn.metrics import r2_score, mean_squared_error




data = pd.read_csv(rStatistics\汽车销售数据.csv,encoding = utf-8)
data = data[data[公路里程数].notna()]#只有1行缺失值,直接舍弃
data = data[[传统汽车销量,国内生产总值当季值(亿元)x1, 汽油价格(元/吨)x2,
       人民币贷款基准利率%x3, 汽车总产量(万辆)x4, 公路里程数, 汽车整车股票指数, 消费者信心指数]]
data.head()



#数据的相关关系
cormatrix = data.corr() 
cormatrix *= np.tri(*cormatrix.values.shape,k=-1).T
cormatrix



#计算相关系数
corr_all = data.corr() 

#创建分布
mask = np.zeros_like(corr_all,dtype = np.bool) #
mask[np.triu_indices_from(mask)] = True#创建firue,画出heatmap
g,ax = plt.subplots(figsize = (9,7))
sns.heatmap(corr_all,mask = mask,square = True, linewidths = .5,ax = ax ,cmap = BuPu)
plt.title(Correlation of Feactures)
plt.show()


#特征选取
X = data[[国内生产总值当季值(亿元)x1, 汽油价格(元/吨)x2, 人民币贷款基准利率%x3,公路里程数, 汽车整车股票指数, 消费者信心指数]]
y = data[传统汽车销量]


plt.figure()for i in range(len(X.columns)):
    plt.scatter(X.iloc[:,i],
                y,
                color = np.array(plt.cm.tab10(i/len(X.columns))),
               label = X.columns[i])
    plt.legend()
    plt.show()


X = data[[国内生产总值当季值(亿元)x1, 汽油价格(元/吨)x2,公路里程数, 汽车整车股票指数, 消费者信心指数]]
y = data[传统汽车销量]
X.head()


#划分训练和测试数据集,为后续具有可重复性,设定随机种子random_state = 666
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state = 666)



#模型
lin_reg0 = LinearRegression()
lin_reg0.fit(X_train,y_train)
y_predict = lin_reg0.predict(X_test)print(r2_score: + str(r2_score(y_test,y_predict)))print(MSE: + str(mean_squared_error(y_test,y_predict)))

#定义个绘制参数的函数,绘制残差图:
def plot_risiduals(model,X_train,X_test,y_train,y_test):
    fig,ax = plt.subplots(figsize = (7,4))

    risiduals_train = model.predict(X_train) - y_train
    ax.scatter(y_train,risiduals_train,label = Train,color = r)

    risiduals_test = model.predict(X_test) - y_test
    ax.scatter(y_test,risiduals_test,label = Test,color = k) 

    plt.axhline(y=0.0, c="b", ls="--", lw=2)
    plt.title(Risiduals)
    plt.xlabel(True)
    plt.ylabel(Risiduals)
plot_risiduals(lin_reg0,X_train,X_test,y_train,y_test)

#调整特征数量
X_train = X_train[[国内生产总值当季值(亿元)x1, 汽车整车股票指数, 消费者信心指数]]
X_test = X_test[[国内生产总值当季值(亿元)x1, 汽车整车股票指数, 消费者信心指数]]
lin_reg1 = LinearRegression()
lin_reg1.fit(X_train,y_train)
y_predict = lin_reg1.predict(X_test)print(r2_score: + str(r2_score(y_test,y_predict)))print(MSE: + str(mean_squared_error(y_test,y_predict)))
r2_score:0.9167941097031658
MSE:3878.5666590026112
plot_risiduals(lin_reg1,X_train,X_test,y_train,y_test)


#预测
True_Predict = {True:y_test,Predict:y_predict,Risiduals:y_test-y_predict}
pd.DataFrame(True_Predict)

转自:https://mp.weixin.qq.com/s/o3TIX_7t2nsc6z-J5Lo-XQ

python——回归案例

原文:https://www.cnblogs.com/zym-yc/p/12287550.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!