======================================================================
另外一篇基于《机器学习实战》的Logistic回归分析的博客请参考:点击阅读,其主要是采用Python代码实现回归模型
还有一篇是纯实战案例博客请参考 ,Logistic回归模型案例实战:《机器学习实战》Logistic回归算法(2)
目录:
1、概念
2、简单线性回归(Simple Liner Regession)
3、多元性回归(Mutiple Regession)
4、非线性回归(Logistic Regession)
#coding:utf-8 ''' Created on 2015年11月8日 @author: Administrator ''' import numpy as np import math #求解皮尔逊相关系数 def computeCorrelation(X, Y): xBar = np.mean(X) yBar = np.mean(Y) SSR = 0 varX = 0 varY = 0 for i in range(0, len(X)): #对应分子部分 diffXXBar = X[i] - xBar diffYYBar = Y[i] - yBar SSR +=(diffXXBar * diffYYBar) #对应分母求和部分 varX += diffXXBar**2 varY += diffYYBar**2 SST = math.sqrt(varX * varY) return SSR/SST def polyfit(x, y, degree): results = {} #coeffs 为相关系数,x自变量,y因变量,degree为最高幂 coeffs = np.polyfit(x, y, degree) #定义一个字典存放值,值为相关系数list results['polynomial'] = coeffs.tolist() #p相当于直线方程 p = np.poly1d(coeffs) yhat = p(x) #传入x,计算预测值为yhat ybar = np.sum(y)/len(y) #计算均值 #对应公式 ssreg = np.sum((yhat - ybar) ** 2) sstot = np.sum((y - ybar) ** 2) results['determination'] = ssreg / sstot print" results :",results return results testX = [1, 3, 8, 7, 9] testY = [10, 12, 24, 21, 34] #输出的是简单线性回归的皮尔逊相关度和R平方值 print "r : ",computeCorrelation(testX, testY) print "r^2 : ",str(computeCorrelation(testX, testY)**2) # print polyfit(testX, testY, 1)["determination"]
<span style="font-family:Microsoft YaHei;"><span style="font-size:18px;">#coding:utf8 ''' Created on 2016年4月24日 @author: Gamer Think ''' #Simple Regession import numpy as np #周广告播放数量 x = [1,3,2,1,3] #周汽车销售数据 y = [14,24,18,17,27] #使用最小二乘法 def fitSLR(x,y): n = len(x) denominator = 0 numerator = 0 for i in range(0,n): numerator += (x[i]-np.mean(x)* (y[i]-np.mean(y)) ) denominator += (x[i]-np.mean(x))**2 print "denominator:",denominator print "numerator:",numerator b1 = numerator/float(denominator) # b0 = np.mean(y)/float(np.mean(x)) b0 = np.mean(y)-b1*np.mean(x) return b0,b1 def predict(b0,b1,x): return b0+b1*x b0,b1 = fitSLR(x,y) x_test = 6 print "y_test:",predict(b0,b1,x_test)</span></span>
<span style="font-family:Microsoft YaHei;"><span style="font-size:18px;">#coding:utf-8 ''' Created on 2016年4月24日 @author: Gamer Think ''' from sklearn import linear_model import numpy as np from numpy import genfromtxt #可以将非array格式的list转化为array datapath = "data.csv" deliverData = genfromtxt(datapath,delimiter=",") #将csv文件转化为numpy.array格式 print "data:",deliverData X= deliverData[:,:-1] Y = deliverData[:,-1] print "X:",X print "Y:",Y regr = linear_model.LinearRegression() regr.fit(X,Y) print "coefficients:",regr.coef_ #与X结合的值 print "intercept:",regr.intercept_ #类似于截距 x_pre = [102,6] y_pre = regr.predict(x_pre) print "Y-Predict:",y_pre </span></span>
<span style="font-family:Microsoft YaHei;"><span style="font-family:Microsoft YaHei;font-size:18px;">#coding:utf-8 ''' Created on 2016年4月24日 @author: Gamer Think ''' from numpy import genfromtxt import numpy as np from sklearn import datasets, linear_model dataPath = "dataDumpy.csv" deleveryData = genfromtxt(dataPath, delimiter=',') print "data:\n",deleveryData X = deleveryData[:, :-1] Y = deleveryData[:, -1] print "X: ",X print "Y: ",Y regr = linear_model.LinearRegression() regr.fit(X, Y) print "Coefficients:",regr.coef_ #与X结合的值 print "Intercept:",regr.intercept_ #类似于截距 # xPred = [102,6,0,0,1] yPred = regr.predict(xPred) print "predict y : ",yPred</span></span>
非线性回归又称为逻辑回归
<span style="font-family:Microsoft YaHei;"><span style="font-size:18px;">#coding:utf-8 ''' Created on 2016年4月24日 @author: Gamer Think ''' import numpy as np import random ''' 梯度下降算法 参数说明:X,Y theta:一组向量和x相乘的一组值 alpha:梯度下降时的参数,即每一步下降多少 m:实例的个数 numIteration:迭代计算的次数,可以理解为梯度下降多少步 ''' def gradientDescent(X,Y,theta,alpha,m,numIteration): x_trains = X.transpose() #X的转置矩阵 for i in range(0,numIteration): hypothesis = np.dot(X,theta) #内积形式,X与theta的乘积 ,求出y的估计值 loss = hypothesis - Y #估计值与真实值之间的差 #通用的梯度下降算法,和logistic Regession中所描述的cost函数不一致 cos = np.sum(loss**2)/(2*m) print "Iteration %d | Cost:%f" % (i,cos) gradient = np.dot(x_trains,loss)/m theta = theta - alpha*gradient return theta ''' numPoints : 点的个数 bias :偏好? variance : 统计学概念, 偏差和 产生样本点和对应的标签 ''' def genData(numPoints,bias,variance): X = np.zeros(shape=(numPoints,2)) #归类的数据 Y = np.zeros(shape=numPoints) #归类的标签 for i in range(0,numPoints): #从0~len(numPoints)-1执行如下 X[i][0] = 1 X[i][1] = i #制造target数据 Y[i] = (i+bias) + random.uniform(0,1)*variance return X,Y X,Y = genData(100, 25, 10) # print "X:",X # print "Y:",Y m, n = np.shape(X) n_y = np.shape(Y) # print "x shape :", m, " ", n # print "y length :",n_y numIterations =100000 alpha = 0.0005 theta = np.ones(n) theta = gradientDescent(X, Y, theta, alpha, m, numIterations) print "theta: " ,theta </span></span>
原文:http://blog.csdn.net/gamer_gyt/article/details/51232733