首页 > 其他 > 详细

我的代码-models

时间:2018-12-19 10:51:58      阅读:142      评论:0      收藏:0      [点我收藏+]


# coding: utf-8

# In[1]:


import pandas as pd
import numpy as np
from sklearn import tree
from sklearn.svm import SVC
from sklearn.grid_search import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import binarize
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import Normalizer
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score,recall_score,average_precision_score,auc
from imblearn.over_sampling import SMOTE


# In[37]:


data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")
data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors=‘coerce‘)
data.Target = data.Target.astype("category")
for i in range(7,25):
med = np.median(data.iloc[:,i][data.iloc[:,i].isna() == False])
data.iloc[:,i] = data.iloc[:,i].fillna(med)
nz = Normalizer()
data.iloc[:,17:19]=pd.DataFrame(nz.fit_transform(data.iloc[:,17:19]),columns=data.iloc[:,17:19].columns)
data.iloc[:,7:10]=pd.DataFrame(nz.fit_transform(data.iloc[:,7:10]),columns=data.iloc[:,7:10].columns)
data.to_csv(r"D:\Users\sgg91044\Desktop\impution\AEM214_imputed_normalized.csv")


# In[2]:


data= pd.read_csv(r"D:\Users\sgg91044\Desktop\Copy of sampling.csv")
data.head()


# In[3]:


data.iloc[:,5:23] = data.iloc[:,5:23].apply(pd.to_numeric,errors=‘coerce‘)
data.Target = data.Target.astype("category")


# In[4]:


Y = data.Target
X = data.drop(columns=‘Target‘)


# In[5]:


X=X.drop(columns=[‘slotid‘,‘Recipe_Name‘,‘defect_count‘])


# In[6]:


X


# In[7]:


X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)


# In[8]:


sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)


# In[9]:


print(y_train.value_counts(), np.bincount(y_train_smote))


# In[10]:


from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)


# In[11]:


# Train on the training data
random_forest.fit(x_train_smote,y_train_smote)


# In[ ]:


# Make predictions on the test data
y_pred = random_forest.predict_proba(X_test)


# In[13]:


print(classification_report(y_pred=y_pred,y_true=y_test))


# In[14]:


f1_score(y_pred=y_pred,y_true=y_test)


# In[15]:


print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")


# In[16]:


print(confusion_matrix(y_pred=y_pred,y_true=y_test))


# In[21]:


svc=SVC(kernel=‘poly‘,degree=2,gamma=1,coef0=0)


# In[ ]:


svc.fit(x_train_smote,y_train_smote)


# In[ ]:


from sklearn.neural_network import MLPClassifier
mlp = MLPClassifier(activation=‘relu‘, solver=‘adam‘, alpha=0.0001)


# In[17]:


tuned_parameters = [{‘kernel‘: [‘rbf‘], ‘gamma‘: [1e-3, 1e-4],
‘C‘: [1, 10, 100, 1000]},
{‘kernel‘: [‘linear‘], ‘C‘: [1, 10, 100, 1000]},
{‘kernel‘:[‘poly‘],‘degree‘:[2,3,5]}]
clf = GridSearchCV(SVC(),param_grid=tuned_parameters,cv=3,scoring=‘recall‘,verbose=True)
clf.fit(x_train_smote,y_train_smote)


# In[18]:


data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling1.csv")
data.iloc[:,7:26] = data.iloc[:,7:26].apply(pd.to_numeric,errors=‘coerce‘)
data.Target = data.Target.astype("category")
data.eqpid = data.eqpid.astype("category")
Y = data.Target
X = data.drop(columns=‘Target‘)
X=X.drop(columns=[‘eqpid‘,‘lotid‘,‘Chamber‘,‘slotid‘,‘Step‘,‘Recipie_Name‘,‘defect_count‘])
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
print(y_train.value_counts(), np.bincount(y_train_smote))
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
# Train on the training data
random_forest.fit(x_train_smote,y_train_smote)


# In[19]:


# Make predictions on the test data
y_pred = random_forest.predict(X_test)
print(classification_report(y_pred=y_pred,y_true=y_test))


# In[20]:


print(confusion_matrix(y_pred=y_pred,y_true=y_test))


# In[21]:


f1_score(y_pred=y_pred,y_true=y_test)


# In[22]:


print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")


# In[71]:


data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling3.csv")
data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors=‘coerce‘)
data.Target = data.Target.astype("category")
Y = data.Target
X = data.drop(columns=‘Target‘)
X=X.drop(columns=[‘eqpid‘,‘lotid‘,‘Chamber‘,‘slotid‘,‘Step‘,‘Recipie_Name‘,‘defect_count‘])
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
print(y_train.value_counts(), np.bincount(y_train_smote))
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
# Train on the training data
random_forest.fit(x_train_smote,y_train_smote)


# In[72]:


# Make predictions on the test data
y_pred = random_forest.predict(X_test)
print(classification_report(y_pred=y_pred,y_true=y_test))


# In[53]:


f1_score(y_pred=y_pred,y_true=y_test)


# In[54]:


print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")


# In[55]:


data= pd.read_csv(r"D:\Users\sgg91044\Desktop\impution\sampling2.csv")
data.iloc[:,7:25] = data.iloc[:,7:25].apply(pd.to_numeric,errors=‘coerce‘)
data.Target = data.Target.astype("category")
Y = data.Target
X = data.drop(columns=‘Target‘)
X=X.drop(columns=[‘eqpid‘,‘lotid‘,‘Chamber‘,‘slotid‘,‘Step‘,‘Recipie_Name‘,‘defect_count‘])
X_train, X_test, y_train, y_test = train_test_split(
X, Y, test_size=0.2, random_state=0)
sm = SMOTE(random_state=12, ratio = 1.0)
x_train_smote, y_train_smote = sm.fit_sample(X_train, y_train)
print(y_train.value_counts(), np.bincount(y_train_smote))
from sklearn.ensemble import RandomForestClassifier

# Make the random forest classifier
random_forest = RandomForestClassifier(n_estimators = 100, random_state = 50, verbose = 1, oob_score = True, n_jobs = -1)
# Train on the training data
random_forest.fit(x_train_smote,y_train_smote)


# In[57]:


# Make predictions on the test data
y_pred = random_forest.predict(X_test)
print(classification_report(y_pred=y_pred,y_true=y_test))


# In[58]:


f1_score(y_pred=y_pred,y_true=y_test)


# In[59]:


print("Accuracy of Random_forest:",round(accuracy_score(y_pred=y_pred,y_true=y_test) * 100,2),"%")
print("Sensitivity of Random_forest:",round(recall_score(y_pred=y_pred,y_true=y_test)*100,2),"%")


# In[ ]:


import flask

 

我的代码-models

原文:https://www.cnblogs.com/aimee0207/p/10141651.html

(0)
(0)
   
举报
评论 一句话评论(0
关于我们 - 联系我们 - 留言反馈 - 联系我们:wmxa8@hotmail.com
© 2014 bubuko.com 版权所有
打开技术之扣,分享程序人生!