print "Performing greedy feature selection..." score_hist = [] N = 10 good_features = set([]) # Greedy feature selection loop while len(score_hist) < 2 or score_hist[-1][0] > score_hist[-2][0]: scores = [] for f in range(len(Xts)): if f not in good_features: feats = list(good_features) + [f] Xt = sparse.hstack([Xts[j] for j in feats]).tocsr() score = cv_loop(Xt, y, model, N) scores.append((score, f)) print "Feature: %i Mean AUC: %f" % (f, score) good_features.add(sorted(scores)[-1][1]) score_hist.append(sorted(scores)[-1]) print "Current features: %s" % sorted(list(good_features))
注意还没结束:
# Remove last added feature from good_features good_features.remove(score_hist[-1][1])
from kaggle
版权声明:本文为博主原创文章,未经博主允许不得转载。
machine learning in coding(python):使用贪心搜索【进行特征选择】
原文:http://blog.csdn.net/mmc2015/article/details/47426437