1 # -*- coding: utf-8 -*- 2 """ 3 kNN : ??? ?? 4 """ 5 6 import numpy as np # ?????, ???? ?? 7 import matplotlib.pyplot as plt 8 9 # 1. ??? ? ?? x,y ??? ??? 10 plt.scatter(1.2, 1.1) # A ?? 11 plt.scatter(1.0, 1.0) 12 plt.scatter(1.8, 0.8) # B ?? 13 plt.scatter(2, 0.9) 14 15 plt.scatter(1.6, 0.85, color=‘r‘) # ????(???? ?? ??) 16 plt.show() 17 18 # 2. DATA ??? ?? ?? 19 p1 = [1.2, 1.1] # A ?? 20 p2 = [1.0, 1.0] 21 p3 = [1.8, 0.8] # B ?? 22 p4 = [2, 0.9] 23 category = [‘A‘,‘A‘,‘B‘,‘B‘] # ??? ?? ????(Y??) 24 p5 = [1.6, 0.85] # ???? 25 26 # data ?? ?? ?? 27 def data_set(): 28 # ???? ?? : numpy? ?? 29 know_group = np.array([p1, p2, p3, p4]) # ??? ?? 30 not_know_group = np.array(p5) # ???? ?? ?? 31 class_category = np.array(category) # ??(????) 32 return know_group,not_know_group,class_category 33 34 know_group,not_know_group,class_category=data_set() 35 print(‘??? ??‘) 36 """ 37 [[1.2 1.1] 38 [1. 1. ] 39 [1.8 0.8] 40 [2. 0.9]] 41 """ 42 print(know_group) 43 print(‘???? ?? ??‘) 44 print(not_know_group) #[1.6 0.85] 45 46 print(‘??‘) 47 print(class_category) #[‘A‘ ‘A‘ ‘B‘ ‘B‘] 48 49 # 50 #?(-) -> ??(**) -> ?(sum) -> ???(sqrt) 51 52 diff=know_group-not_know_group #2?? -1?? 53 print(‘?=\n‘,diff) 54 """ 55 ?= 56 [[-0.4 0.25] 57 [-0.6 0.15] 58 [ 0.2 -0.05] 59 [ 0.4 0.05]] 60 """ 61 62 sq_diff = diff ** 2 63 sq_sum = sq_diff.sum(axis=1) #??? ?? 64 print(sq_sum) #[0.2225 0.3825 0.0425 0.1625] 65 distance=np.sqrt(sq_sum) 66 print(distance) #[0.47169906 0.61846584 0.20615528 0.40311289] 67 #[3 4 1 2]?? k=3 (B(2)>A(1)) 68 print(class_category)#[‘A‘ ‘A‘ ‘B‘ ‘B‘] 69 70 def classfy(know,not_know,cate,k): 71 #????? ????? 72 diff=know-not_know 73 sq_diff = diff ** 2 74 sq_sum = sq_diff.sum(axis=1) 75 distance=np.sqrt(sq_sum) 76 77 #2.?? ??? ?? ???? ?? -> index 78 sortDist=distance.argsort() #sort->index 79 #print(sortDist) #[2 3 0 1] 80 81 #3.??? ?? 82 class_result={} #? set 83 for i in range(k):#0~2 84 key = cate[sortDist[i]] #i=0 -> ‘B‘ 85 class_result[key]=class_result.get(key,0)+1 86 return class_result 87 88 89 #?? ?? 90 class_result=classfy(know_group,not_know_group,class_category,3) 91 print(class_result) #{‘B‘: 2, ‘A‘: 1} 92 93 #vot ?? 94 def class_vote(class_result): 95 return max(class_result,key=class_result.get) 96 97 vote_result=class_vote(class_result) 98 print("????=",vote_result)#????= B
1 # -*- coding: utf-8 -*- 2 """ 3 class ?? 4 """ 5 6 import numpy as np 7 from Step01_kNN import data_set 8 9 10 know_group,not_know_group,class_category=data_set() 11 12 #class =Func1+Func2+Func3 13 class kNNclassify: 14 15 #1.??? ?? 16 def classfy(self,know,not_know,cate,k): 17 #????? ????? 18 diff=know-not_know 19 sq_diff = diff ** 2 20 sq_sum = sq_diff.sum(axis=1) 21 distance=np.sqrt(sq_sum) 22 23 #2.?? ??? ?? ???? ?? -> index 24 sortDist=distance.argsort() #sort->index 25 #print(sortDist) #[2 3 0 1] 26 27 #3.??? ??(k=3) 28 self.class_result={} #? set 29 for i in range(k):#0~2 30 key = cate[sortDist[i]] #i=0 -> ‘B‘ 31 self.class_result[key]=self.class_result.get(key,0)+1 32 33 #vot ?? 34 def class_vote(self): 35 return max(self.class_result,key=self.class_result.get) 36 37 #class object ?? 38 obj=kNNclassify() #??? 39 40 #objext.menber : self.class_result 41 obj.classfy(know_group,not_know_group,class_category,3) 42 43 vote_result=obj.class_vote() 44 print(‘kNN ????=‘,vote_result)#kNN ????= B
1 # -*- coding: utf-8 -*- 2 """ 3 ??? ??? - NB 4 """ 5 import pandas as pd 6 from sklearn import model_selection#train/test 7 from sklearn.naive_bayes import GaussianNB 8 9 iris=pd.read_csv("../data/iris.csv") 10 print(iris.head()) 11 """ 12 Sepal.Length Sepal.Width Petal.Length Petal.Width Species 13 0 5.1 3.5 1.4 0.2 setosa 14 1 4.9 3.0 1.4 0.2 setosa 15 2 4.7 3.2 1.3 0.2 setosa 16 3 4.6 3.1 1.5 0.2 setosa 17 4 5.0 3.6 1.4 0.2 setosa 18 """ 19 20 #2. x,y ?? ?? 21 cols=list(iris.columns) 22 x_cols=cols[:4] #X:1~4(???) 23 y_cols=cols[-1] #y:5(???) 24 25 #3.train/test split 26 iris_df=iris 27 print(iris_df.shape)#(150, 5) 28 train_iris,test_iris=model_selection.train_test_split(iris_df,test_size=0.3,random_state=123) 29 print(train_iris.shape)#(105, 5) 30 print(test_iris.shape)#(45, 5) 31 32 #4. model?? train set 33 obj=GaussianNB() #object 34 model=obj.fit(train_iris[x_cols],train_iris[y_cols]) 35 36 #5.model ?? 37 pred=model.predict(test_iris[x_cols]) #Y?? 38 Y = test_iris[y_cols] #?? 39 40 #confusion matrix 41 matrix=pd.crosstab(pred,Y) 42 print(matrix) 43 """ 44 Species setosa versicolor virginica 45 row_0 46 setosa 18 0 0 47 versicolor 0 10 2 48 virginica 0 0 15 49 """ 50 51 acc= (matrix.ix[0,0]+matrix.ix[1,1]+matrix.ix[2,2])/len(Y) 52 print(‘?????=‘,acc)#?????= 0.9555555555555556
1 # -*- coding: utf-8 -*- 2 """ 3 SVM Model 4 """ 5 import pandas as pd 6 from sklearn import model_selection#train/test 7 from sklearn import svm #model 8 9 iris=pd.read_csv("../data/iris.csv") 10 print(iris.head()) 11 """ 12 Sepal.Length Sepal.Width Petal.Length Petal.Width Species 13 0 5.1 3.5 1.4 0.2 setosa 14 1 4.9 3.0 1.4 0.2 setosa 15 2 4.7 3.2 1.3 0.2 setosa 16 3 4.6 3.1 1.5 0.2 setosa 17 4 5.0 3.6 1.4 0.2 setosa 18 """ 19 20 #2. x,y ?? ?? 21 cols=list(iris.columns) 22 x_cols=cols[:4] #X:1~4(???) 23 y_cols=cols[-1] #y:5(???) 24 25 #3.train/test split 26 iris_df=iris 27 print(iris_df.shape)#(150, 5) 28 train_iris,test_iris=model_selection.train_test_split(iris_df,test_size=0.3,random_state=123) 29 print(train_iris.shape)#(105, 5) 30 print(test_iris.shape)#(45, 5) 31 32 #4.model -SVM 33 obj=svm.SVC() 34 model=obj.fit(train_iris[x_cols],train_iris[y_cols]) 35 36 37 #5.model ?? 38 pred=model.predict(test_iris[x_cols]) 39 Y=test_iris[y_cols] 40 41 #confusion matrix 42 matrix=pd.crosstab(pred,Y) 43 print(matrix) 44 """ 45 Species setosa versicolor virginica 46 row_0 47 setosa 18 0 0 48 versicolor 0 10 1 49 virginica 0 0 16 50 """ 51 52 acc= (matrix.ix[0,0]+matrix.ix[1,1]+matrix.ix[2,2])/len(Y) 53 print(‘?????=‘,acc)#?????= 0.9777777777777777
1 # -*- coding: utf-8 -*- 2 """ 3 NB vs SWM 4 -data set :sparse matrix ?? 5 -file name:../data/spam_tran_test.npy 6 """ 7 from sklearn.naive_bayes import GaussianNB 8 from sklearn import svm 9 import numpy as np 10 import pandas as pd 11 12 #1.file Loading 13 X_train,X_test,y_train,y_test=np.load("../data/spam_tran_test.npy") 14 print(X_train.shape) #(3901, 4000) 15 print(X_test.shape) #(1673, 4000) 16 print(type(y_train))#<class ‘list‘> 17 print(len(y_train)) #3901 18 print(len(y_test)) #1673 19 20 #list -> numpy???: ???? ?? 21 y_train=np.array(y_train) 22 y_test=np.array(y_test) 23 print(type(y_train))#<class ‘numpy.ndarray‘> ???? ????? 24 25 #2.NB model?? 26 obj =GaussianNB() 27 nb_model=obj.fit(X_train,y_train) 28 29 pred=nb_model.predict(X_test) 30 Y=y_test 31 32 matrix=pd.crosstab(pred,Y) 33 print("nb matrix\n",matrix) 34 """ 35 col_0 0(ham) 1(spam) 36 row_0 37 0 1264 28 38 1 167 214 39 """ 40 acc=(matrix.ix[0,0]+matrix.ix[1,1])/len(Y) 41 print("NB acc=",acc) #NB acc= 0.8834429169157203 42 43 44 #2) ???:??? yes-> ??? yes 45 precision=matrix.ix[1,1]/(matrix.ix[1,0]+matrix.ix[1,1]) 46 print("???=",precision)#???= 0.5616797900262467 47 48 #3) ???:???yes -> ??? yes 49 recall=matrix.ix[1,1]/(matrix.ix[0,1]+matrix.ix[1,1]) 50 print("???=",recall)#???= 0.8842975206611571 51 52 #4) f1 score:precision,recall 53 f1_score=2 * (precision*recall)/(precision+recall) 54 print(‘f1_score=‘,f1_score)#f1_score= 0.6869983948635634 55 56 57 #3.SVM model 58 svm_obj =svm.SVC(kernel=‘linear‘)#kenel ?? 59 svm_model=svm_obj.fit(X_train,y_train) 60 61 svm_pred=svm_model.predict(X_test) 62 svm_Y=y_test 63 64 svm_matrix=pd.crosstab(svm_pred,svm_Y) 65 print("svm matrix\n",svm_matrix) 66 67 """ 68 svm matrix 69 col_0 0 1 70 row_0 71 0 1428 36 72 1 3 206 73 """ 74 75 svm_acc=(svm_matrix.ix[0,0]+svm_matrix.ix[1,1])/len(svm_Y) 76 print("svm acc=",svm_acc) #svm acc= 0.976688583383144
1 # -*- coding: utf-8 -*- 2 """ 3 Created on Sat Feb 23 15:52:23 2019 4 5 @author: 502-03 6 """ 7 8 from sklearn.naive_bayes import GaussianNB 9 from sklearn import svm 10 import numpy as np 11 import pandas as pd 12 13 #1.file Loading 14 X_train,X_test,y_train,y_test=np.load("../data/sms_spam_data.npy") 15 print(X_train.shape) #(4446, 6000) 16 print(X_test.shape) #(1112, 6000) 17 print(type(y_train))#<class ‘pandas.core.series.Series‘> 18 print(len(y_train)) #4446 19 print(len(y_test)) #1112 20 21 #NB model ?? 22 obj=GaussianNB() 23 nb_model=obj.fit(X_train,y_train) 24 nb_pred=nb_model.predict(X_test) 25 nb_Y=y_test 26 27 nb_tab=pd.crosstab(nb_pred,nb_Y) 28 print("nb_tab=\n",nb_tab) 29 """ 30 nb_tab= 31 type ham spam 32 row_0 33 ham 812 10 34 spam 156 134 35 """ 36 nb_acc=(nb_tab.ix[0,0]+nb_tab.ix[1,1])/len(nb_Y) 37 print("nb_acc=",nb_acc) #nb_acc= 0.8507194244604317 38 39 40 41 42 #svm 43 obj=svm.SVC(kernel=‘linear‘) 44 svc_model=obj.fit(X_train,y_train) 45 svc_pred=svc_model.predict(X_test) 46 svc_Y=y_test 47 48 svc_tab=pd.crosstab(svc_pred,svc_Y) 49 print("svc_tab=\n",svc_tab) 50 """ 51 svc_tab= 52 type ham spam 53 row_0 54 ham 964 20 55 spam 4 124 56 """ 57 svc_acc=(svc_tab.ix[0,0]+svc_tab.ix[1,1])/len(svc_Y) 58 print("svc_acc=",svc_acc) #svc_acc= 0.9784172661870504 59 60 precision=svc_tab.ix[1,1]/(svc_tab.ix[1,0]+svc_tab.ix[1,1]) 61 print("???",precision)#??? 0.96875 62 63 recall=svc_tab.ix[1,1]/(svc_tab.ix[0,1]+svc_tab.ix[1,1]) 64 print("???",recall)#??? 0.8611111111111112 65 66 f1_score=2* (precision * recall)/(precision + recall) 67 print("f1_score",f1_score)#f1_score 0.911764705882353
原文:https://www.cnblogs.com/kingboy100/p/10424437.html