#knn 鸢尾花实现,决策边界,k值,回归 import numpy as np import matplotlib.pyplot as plt # KNN分类器 from sklearn.neighbors import KNeighborsClassifier #数据集 from sklearn import datasets #数据集划分 from sklearn.model_selection import train_test_split #鸢尾花数据导入 # iris是一个对象类型的数据 #其中包括了data(鸢尾花的特征)和target(分类标签) #鸢尾花 标签(target)是0,1,2 #标签名字(target_name)是setosa、versicolor,virginica #瓣的长度、宽度、花萼的长度、宽度,一共4个特征 #shape(n,4) iris = load_iris() # 将样本与标签分开 x = iris[‘data‘] y = iris[‘target‘] print(x.shape, y.shape) # (150, 4) (150,) #划分数据集,按照8:2分训练集与测试集 #通常3:1 x_train, x_test, y_train,y_test = train_test_split( x,y, test_size = 0.2) # 8:2 print(x_train.shape, x_test.shape, y_train.shape, y_test.shape) # (120, 4) (30, 4) (120,) (30,) #使用kNeighborsClassifier训练模型 #参数k(n_neighbors)=5 #欧式距离(metric=minkowski & p=2) clf = KNeighborsClassifier(n_neighbors=5, p=2, metric="minkowski") #传递训练集 clf.fit(x_train, y_train) # fit可以简单的认为是表格存储 # KNeighborsClassifier() #评估模型,检验泛化性能 #predict函数进行模型预测,并计算出预测准确率 #返回预测标签 #y_predict 用x_test测试,分类 #返回预测的x_test标签属于哪类 #然后验证预测的标签和结果有多少一样,计算预测准确率 y_predict = clf.predict(x_test) y_predict.shape # (30,) acc = sum(y_predict == y_test) / y_test.shape[0] acc #不预测x_test标签属于哪类 #直接用score获取预测准确率 print(" {:.3f}".format(clf.score(x_test, y_test))) #预测新数据标签 x_new = np.array([[4, 2.5, 1.6, 0.3]]) prediction = clf.predict(x_new) print("鸢尾花种类: {}".format( iris[‘target_names‘][prediction]))
#k取不同值时,训练集准确率和测试集准确率 #k取不同值时,记录2个准确率,然后x取k值,y取准确度,画2个折线图 from sklearn import neighbors from sklearn.model_selection import train_test_split from sklearn.datasets import load_iris from matplotlib import pyplot as plt plt.rcParams[‘font.sans-serif‘] = [‘Arial Unicode MS‘] from pylab import * #鸢尾花按照8:2分训练集与测试集 iris = load_iris() x = iris[‘data‘] y = iris[‘target‘] x_train, x_test, y_train,y_test = train_test_split( x,y, test_size = 0.2) # 8:2 #分别保存在不同的邻居个数前提下 #模型在训练集与测试集上的预测准确率 training_accuracy = [] #训练集预测准确率 test_accuracy = [] #测试集预测准确率 # n_neighbors取值:1,3,5,7,9,11 n = [1,3,5,7,9,11] for n_neighbors in n: # 构建模型 clf = neighbors.KNeighborsClassifier(n_neighbors=n_neighbors) clf.fit(x_train, y_train) # 记录训练集预测准确率 training_accuracy.append(clf.score(x_train, y_train)) # 记录测试(泛化)集预测准确率 test_accuracy.append(clf.score(x_test, y_test)) #对结果进行可视化展示 plt.plot(n, training_accuracy, label=U"训练精度值") plt.plot(n, test_accuracy, label=U"测试精度值") plt.ylabel(U"预测精度值") plt.xlabel(U"邻居个数") plt.legend() plt.show()
#决策边界(划分范围) #需要等高线基础 import numpy as np import pandas as pd import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score #导入iris数据 from sklearn.datasets import load_iris iris = load_iris() X=iris.data y=iris.target X=X[:,:2] #只取前两列 X_train, X_test, y_train, y_test = train_test_split( X, y, stratify=y, random_state=42) #划分数据,random_state固定划分方式 #导入模型 from sklearn.neighbors import KNeighborsClassifier #训练模型 knn = KNeighborsClassifier(n_neighbors=5) knn.fit(X_train, y_train) y_pred = knn.predict(X_test) def plot_decision_boundary(clf , axes): xp=np.linspace(axes[0], axes[1], 300) #均匀300个横坐标 yp=np.linspace(axes[2], axes[3], 300) #均匀300个纵坐标 x1, y1=np.meshgrid(xp, yp) #生成300x300个点 #np.c_:是按行连接两个矩阵,就是把两矩阵左右叠加,要求行数相等 xy=np.c_[x1.ravel(), y1.ravel()] y_pred = clf.predict(xy).reshape(x1.shape) #训练之后平铺 custom_cmap = ListedColormap([‘#fafab0‘,‘#9898ff‘,‘#a0faa0‘]) plt.contourf(x1, y1, y_pred, alpha=0.3, cmap=custom_cmap) plot_decision_boundary(knn, axes=[4, 8, 1.5, 5]) #画三种类型的点 p1=plt.scatter(X[y==0,0], X[y==0, 1], color=‘blue‘) p2=plt.scatter(X[y==1,0], X[y==1, 1], color=‘green‘) p3=plt.scatter(X[y==2,0], X[y==2, 1], color=‘red‘) #设置注释 plt.legend([p1, p2, p3], iris[‘target_names‘], loc=‘upper right‘) plt.show() #https://www.cnblogs.com/onenoteone/p/12441726.html
#回归 # 生成样本数据 np.random.seed(0) X = np.sort(5 * np.random.rand(40, 1), axis=0) T = np.linspace(0, 5, 500)[:, np.newaxis] y = np.sin(X).ravel() # 给目标集添加一些噪音数据 y[::5] += 1 * (0.5 - np.random.rand(8)) # 训练回归预测模型,k值分别去1,3,5,7,9,11 for i, n_neighbors in enumerate([1,3,5]): knn = neighbors.KNeighborsRegressor(n_neighbors=n_neighbors) y_ = knn.fit(X, y).predict(T) plt.subplot(3, 1, i + 1) plt.scatter(X, y, color=‘darkorange‘, label=‘data‘) plt.plot(T, y_, color=‘navy‘, label=‘prediction‘) plt.axis(‘tight‘) plt.legend() plt.title("KNeighborsRegressor (k = %i,R^2 = %.3f)" % (n_neighbors,knn.score(X, y))) plt.tight_layout() plt.show()
原文:https://www.cnblogs.com/17s4029/p/13703139.html