首页 > 其他 > 详细

PCA降维

时间:2019-02-25 22:00:23      阅读:267      评论:0      收藏:0      [点我收藏+]

PCA主成分分析,无监督学习降维方法:

import matplotlib.pyplot as plt
import matplotlib.image as maping
import matplotlib
import numpy as np
import seaborn as sns
import pandas as pds
import plotly.graph_objs as go
import plotly.tools as tls
%matplotlib inline

from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

 使用sklearn方式展现PCA

train=pds.read_csv("train.csv")
train.head()
 labelpixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8pixel9pixel10pixel11pixel12pixel13pixel14pixel15pixel16pixel17pixel18pixel19pixel20pixel21pixel22pixel23pixel24pixel25pixel26pixel27pixel28pixel29pixel30pixel31pixel32pixel33pixel34pixel35pixel36pixel37pixel38pixel39pixel40pixel41pixel42pixel43pixel44pixel45pixel46pixel47pixel48pixel49pixel50pixel51pixel52pixel53pixel54pixel55pixel56pixel57pixel58...pixel724pixel725pixel726pixel727pixel728pixel729pixel730pixel731pixel732pixel733pixel734pixel735pixel736pixel737pixel738pixel739pixel740pixel741pixel742pixel743pixel744pixel745pixel746pixel747pixel748pixel749pixel750pixel751pixel752pixel753pixel754pixel755pixel756pixel757pixel758pixel759pixel760pixel761pixel762pixel763pixel764pixel765pixel766pixel767pixel768pixel769pixel770pixel771pixel772pixel773pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
print(train.shape)
(42000, 785)
target=train["label"]
train=train.drop("label",axis=1)
train.head()
pixel0pixel1pixel2pixel3pixel4pixel5pixel6pixel7pixel8pixel9pixel10pixel11pixel12pixel13pixel14pixel15pixel16pixel17pixel18pixel19pixel20pixel21pixel22pixel23pixel24pixel25pixel26pixel27pixel28pixel29pixel30pixel31pixel32pixel33pixel34pixel35pixel36pixel37pixel38pixel39pixel40pixel41pixel42pixel43pixel44pixel45pixel46pixel47pixel48pixel49pixel50pixel51pixel52pixel53pixel54pixel55pixel56pixel57pixel58pixel59...pixel724pixel725pixel726pixel727pixel728pixel729pixel730pixel731pixel732pixel733pixel734pixel735pixel736pixel737pixel738pixel739pixel740pixel741pixel742pixel743pixel744pixel745pixel746pixel747pixel748pixel749pixel750pixel751pixel752pixel753pixel754pixel755pixel756pixel757pixel758pixel759pixel760pixel761pixel762pixel763pixel764pixel765pixel766pixel767pixel768pixel769pixel770pixel771pixel772pixel773pixel774pixel775pixel776pixel777pixel778pixel779pixel780pixel781pixel782pixel783
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
3 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
4 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#数据标准化
from sklearn.preprocessing import StandardScaler
X=train.values
transform=StandardScaler()
X_std=transform.fit_transform(X)

c:\users\lenovo\appdata\local\programs\python\python36\lib\site-packages\sklearn\utils\validation.py:595: DataConversionWarning:

Data with input dtype int64 was converted to float64 by StandardScaler.

c:\users\lenovo\appdata\local\programs\python\python36\lib\site-packages\sklearn\utils\validation.py:595: DataConversionWarning:

Data with input dtype int64 was converted to float64 by StandardScaler.
#特征向量和特征值
mean_vec=np.mean(X_std,axis=0)
cov_mat=np.cov(X_std.T)
eig_vals,eig_vecs=np.linalg.eig(cov_mat)
#创建特征向量和特征值的元组
eig_pairs=[(np.abs(eig_vals[i]),eig_vecs[:,i] for i in range(len(eig_vals))]
#对特征向量和特征值进行排序
eig_pairs.sort(key=lambda x : x[0],reverse=True)
#计算累计解释方差
tot=sum(eig_vals)
var_exp=[(i/tot)*100 for i  in sorted(eig_vals,reverse=True)]
cum_var_exp=np.cumsum(var_exp)
#接下来使用Ploty可视化显示
trace1=go.Scatter(x=list(range(784)),y=cum_var_exp,mode="lines+markers",name="‘累计解释方差‘",line=dict(shape=spline,color=goldenrod))
trace2=go.Scatter(x=list(range(784)),y=var_exp,mode="lines+markers",name="‘单个解释方差‘",line=dict(shape=linear,color=black))
fig=tls.make_subplots(insets=[{cell:(1,1),l:0.7,b:0.5}],print_grid=True)
fig.append_trace(trace1,1,1)
fig.append_trace(trace2,1,1)
fig.layout.title="解释方差"
fig.layout.xaxis=dict(range=[0,80],title="特征列")
fig.layout.yaxis=dict(range=[0,60],title="解释变量")
This is the format of your plot grid:
[ (1,1) x1,y1 ]

With insets:
[ x2,y2 ] over [ (1,1) x1,y1 ]
#可视化特征值
n_components=30
pca=PCA(n_components=n_components).fit(train.values)
eigenvalues=pca.components_.reshape(n_components,28,28)
eigenvalues=pca.components_
n_row=4
n_col=7

#显示前8个特征值
plt.figure(figsize=(12,13))
for i in list(range(n_row*n_col)):
    offset=0
    plt.subplot(n_row,n_col,i+1)
    plt.imshow(eigenvalues[i].reshape(28,28),cmap=jet)
    title_text="Eigenvalue"+str(i+1)
    plt.title(title_text,size=6.5)
    plt.xticks(())
    plt.yticks(())
plt.show()

技术分享图片

plt.figure(figsize=(14,12))
for digit_num in range(0,70):
    plt.subplot(7,10,digit_num+1)
    grid_data=train.iloc[digit_num].as_matrix().reshape(28,28)
    plt.imshow(grid_data,interpolation="none",cmap="afmhot")
    plt.xticks([])
    plt.yticks([])
plt.tight_layout()

技术分享图片

 

#PCA使用在SK-learn中
del X
X=train[:6000].values
del train
X_std=StandardScaler().fit_transform(X)
pca=PCA(n_components=5)
pca.fit(X_std)
X_5d=pca.transform(X_std)
#使用散点图显示PCA效果
import plotly.offline as py
py.offline.init_notebook_mode(connected=True)
Target=target[:6000]
trace0=go.Scatter(x=X_5d[:,0],y=X_5d[:,1],mode="markers",text=Target,showlegend=False,marker=dict(size=8,color=Target,colorscale="Jet",showscale=False,line=dict(width=2,color="rgb(255,255,255)"),opacity=0.8))
data=[trace0]
layout=go.Layout(title="PCA",hovermode="closest",xaxis=dict(title="First Principal Component",ticklen=5,zeroline=False,gridwidth=2,),yaxis=dict(title="Second Principal Component",ticklen=5,gridwidth=2,),showlegend=True)
fig=dict(data=data,layout=layout)
py.iplot(fig,filename="style-scatter")

技术分享图片

from sklearn.cluster import KMeans
kmeans=KMeans(n_clusters=9)
X_clustered=kmeans.fit_predict(X_5d)
trace_Kmeans=go.Scatter(x=X_5d[:,0],y=X_5d[:,1],mode="markers",showlegend=False,marker=dict(size=8,color=X_clustered,colorscale="Portland",showscale=False,line=dict(width=2,color=rgb(255,255,255))))
layout=go.Layout(title="K-Means",hovermode="closest",xaxis=dict(title="First Principal Component",ticklen=5,zeroline=False,gridwidth=2,),yaxis=dict(title="Second Principal Component",ticklen=5,gridwidth=2,),showlegend=True)
data=[trace_Kmeans]
fig1=dict(data=data,layout=layout)
py.iplot(fig1,filename="svm")

技术分享图片

 

PCA降维

原文:https://www.cnblogs.com/knight-vien/p/10433683.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!