首页 > 编程语言 > 详细

信用评分预测模型(五)--Logistic回归算法

时间:2019-12-26 14:55:58      阅读:190      评论:0      收藏:0      [点我收藏+]

前言

下面将对数据利用Logistic得到结果。

代码

import math
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score #划分数据 交叉验证
from sklearn.linear_model.logistic import LogisticRegression
from sklearn.metrics import accuracy_score 
# import statsmodels.api as sm
#混淆矩阵计算
from sklearn import metrics
from sklearn.metrics import roc_curve, auc,roc_auc_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

#df_german=pd.read_excel("german_woe.xlsx")
df_german=pd.read_excel("dataset\german.xls")
#df_german=pd.read_excel("df_after_vif.xlsx")
y=df_german.ix[:,-1]
x=df_german.ix[:,:-1]
#x=df_german.ix[:,"Credit Amount":"Purpose"]
l1 = []
for i in range(1000):
    print('****'*50)
    print('第',i+1,'次test')
    # X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.4, random_state=0)
    x_train,x_test,y_train,y_test=train_test_split(x,y,stratify=y,train_size=0.6,random_state=i+1)
    x_test2,x_check,y_test2,y_check=train_test_split(x_test,y_test,train_size=0.25,random_state=i+1)
    
    classifier = LogisticRegression()
    classifier.fit(x_train, y_train)
# predictions = classifier.predict(X_test)

#验证
    print("accuracy on the training subset:{:.3f}".format(classifier.score(x_train,y_train)))
    print("accuracy on the test subset:{:.3f}".format(classifier.score(x_check,y_check)))
    l1.append(classifier.score(x_check,y_check))

print('max index',l1.index(max(l1))+1)
 
 
# #得分公式
# '''
# P0 = 50
# PDO = 10
# theta0 = 1.0/20
# B = PDO/np.log(2)
# A = P0 + B*np.log(theta0)
# '''
# def Score(probability):
#     #底数是e
#     score = A-B*np.log(probability/(1-probability))
#     return score
# #批量获取得分
# def List_score(pos_probablity_list):
#     list_score=[]
#     for probability in pos_probablity_list:
#         score=Score(probability)
#         list_score.append(score)
#     return list_score
 
# P0 = 50
# PDO = 10
# theta0 = 1.0/20
# B = PDO/np.log(2)
# A = P0 + B*np.log(theta0)
# print("A:",A)
# print("B:",B)
# list_coef = list(classifier.coef_[0])
# intercept= classifier.intercept_
 
# #获取所有x数据的预测概率,包括好客户和坏客户,0为好客户,1为坏客户
# probablity_list=classifier.predict_proba(x)
# #获取所有x数据的坏客户预测概率
# pos_probablity_list=[i[1] for i in probablity_list]
# #获取所有客户分数
# list_score=List_score(pos_probablity_list)
# list_predict=classifier.predict(x)
# df_result=pd.DataFrame({"label":y,"predict":list_predict,"pos_probablity":pos_probablity_list,"score":list_score})
 
# # df_result.to_excel("score_proba.xlsx")
# # 打印结果
# # print(df_result)
 
# #变量名列表
# list_vNames=df_german.columns
# #去掉第一个变量名target
# list_vNames=list_vNames[0:-1]
# df_coef=pd.DataFrame({"variable_names":list_vNames,"coef":list_coef})

# # df_coef.to_excel("coef.xlsx")
# # 打印变量相关度
# # print(df_coef)


# y_true=y_test
# y_pred=classifier.predict(X_test)
# accuracyScore = accuracy_score(y_true, y_pred)
# print('model accuracy is:',accuracyScore)
 
# #precision,TP/(TP+FP) (真阳性)/(真阳性+假阳性)
# precision=precision_score(y_true, y_pred)
# print('model precision is:',precision)
 
# #recall(sensitive)敏感度,(TP)/(TP+FN)
# sensitivity=recall_score(y_true, y_pred)
# print('model sensitivity is:',sensitivity)

# #F1 = 2 x (精确率 x 召回率) / (精确率 + 召回率)
# #F1 分数会同时考虑精确率和召回率,以便计算新的分数。可将 F1 分数理解为精确率和召回率的加权平均值,其中 F1 分数的最佳值为 1、最差值为 0:
# f1Score=f1_score(y_true, y_pred)
# print("f1_score:",f1Score)
 
# def AUC(y_true, y_scores):
#     auc_value=0
#     #auc第二种方法是通过fpr,tpr,通过auc(fpr,tpr)来计算AUC
#     fpr, tpr, thresholds = metrics.roc_curve(y_true, y_scores, pos_label=1)
#     auc_value= auc(fpr,tpr) ###计算auc的值
#     #print("fpr:",fpr)
#     #print("tpr:",tpr)
#     #print("thresholds:",thresholds)
#     if auc_value<0.5:
#         auc_value=1-auc_value
#     return auc_value
 
# def Draw_roc(auc_value):
#     fpr, tpr, thresholds = metrics.roc_curve(y, list_score, pos_label=0)
#     #画对角线
#     plt.plot([0, 1], [0, 1], '--', color=(0.6, 0.6, 0.6), label='Diagonal line')
#     plt.plot(fpr,tpr,label='ROC curve (area = %0.2f)' % auc_value)
#     plt.title('ROC curve') 
#     plt.legend(loc="lower right")
 
# #评价AUC表现
# def AUC_performance(AUC):
#     if AUC >=0.7:
#         print("good classifier")
#     if 0.7>AUC>0.6:
#         print("not very good classifier")
#     if 0.6>=AUC>0.5:
#         print("useless classifier")
#     if 0.5>=AUC:
#         print("bad classifier,with sorting problems")
         
# #Auc验证,数据采用测试集数据
# auc_value=AUC(y, list_score)
# print("AUC:",auc_value)
# #评价AUC表现
# AUC_performance(auc_value)
# #绘制ROC曲线
# Draw_roc(auc_value)

# # print(predictions)
# # plt.show()

分析

暂略

信用评分预测模型(五)--Logistic回归算法

原文:https://www.cnblogs.com/LieDra/p/12018588.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!