关于决策数的示例

时间：2016-08-09 19:02:01 阅读：98 评论：0 收藏：0 [点我收藏+]

# -*- coding: utf-8 -*-
"""
Created on Tue Aug 09 16:15:03 2016

@author: Administrator
"""

import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.cross_validation import train_test_split
from sklearn.metrics import classification_report
from sklearn.pipeline import Pipeline
from sklearn.grid_search import GridSearchCV

if __name__ == ‘__main__‘:
    ‘‘‘
    df = pd.read_csv(‘ad.data‘, header=None)
    explanatory_variable_columns = set(df.columns.values)
    response_variable_column = df[len(df.columns.values)-1]
    # The last column describes the targets
    explanatory_variable_columns.remove(len(df.columns.values)-1)
    y = [1 if e == ‘ad.‘ else 0 for e in response_variable_column]
    X = df[list(explanatory_variable_columns)]
    X.replace(to_replace=‘ *\?‘, value=-1, regex=True, inplace=True)
 ‘‘‘
    X = np.array([[0,0,0,0],
                  [0,0,0,1],
                  [0,0,1,0],
                  [0,0,1,1],
                  [0,1,0,0],
                  [0,1,0,1],
                  [0,1,1,0],
                  [0,1,1,1],
                  [1,0,0,0],
                  [1,0,0,1],
                  [1,0,1,0],
                  [1,0,1,1],
                  [1,1,0,0],
                  [1,1,0,1],
                  [1,1,1,0],
                  [1,1,1,1]])
    y = np.array([0,1,1,0,2,1,0,0,0,2,1,0,2,1,0,0]) #就要是一行向量（如果是多行，会报错）
   
   
    
    
    X_train, X_test, y_train, y_test = train_test_split(X, y)
    pipeline = Pipeline([
    (‘clf‘, DecisionTreeClassifier(criterion=‘entropy‘))
    ])
    parameters = {
    ‘clf__max_depth‘: (150, 155, 160),
    ‘clf__min_samples_split‘: (1, 2, 3),
    ‘clf__min_samples_leaf‘: (1, 2, 3)
    }
    
    grid_search = GridSearchCV(pipeline, parameters, n_jobs=-1,verbose=1, scoring=‘f1‘)
    grid_search.fit(X_train, y_train)
    print ‘Best score: %0.3f‘ % grid_search.best_score_
    print ‘Best parameters set:‘
    best_parameters = grid_search.best_estimator_.get_params()
    for param_name in sorted(parameters.keys()):
        print ‘\t%s: %r‘ % (param_name, best_parameters[param_name])
    predictions = grid_search.predict(X_test)
    print classification_report(y_test, predictions)

关于决策数的示例

原文：http://www.cnblogs.com/qqhfeng/p/5754174.html

踩

(0)

评论一句话评论（0）

分享档案

更多>

2021年09月23日 (328)
2021年09月24日 (313)
2021年09月17日 (191)
2021年09月15日 (369)
2021年09月16日 (411)
2021年09月13日 (439)
2021年09月11日 (398)
2021年09月12日 (393)
2021年09月10日 (160)
2021年09月08日 (222)