In [6]:
# 去掉取值变化小的特征
from sklearn.feature_selection import VarianceThreshold
X = [[0, 0, 1], [0, 1, 0], [1, 0, 0], [0, 1, 1], [0, 1, 0], [0, 1, 1]]
sel = VarianceThreshold(threshold=(0.8 * (1 - 0.8)))
X_new = sel.fit_transform(X)
print(X_new)

[[0 1]
 [1 0]
 [0 0]
 [1 1]
 [1 0]
 [1 1]]


In [7]:
# 单变量特征选择
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest, chi2

iris = load_iris()
X, y = iris.data, iris.target
print(X.shape)

X_new = SelectKBest(chi2, k=2).fit_transform(X, y)
print(X_new.shape)


(150, 4)
(150, 2)


In [8]:
# 基于树的特征选择 (Tree-based Feature Selection)
from sklearn.ensemble import ExtraTreesClassifier

clf = ExtraTreesClassifier()
X_new = clf.fit(X, y)
print(clf.feature_importances_)

[0.09433547 0.05914429 0.38053572 0.46598453]


In [9]:
# 递归特征消除
from sklearn.datasets import make_friedman1
from sklearn.feature_selection import RFE
from sklearn.svm import SVR

X, y = make_friedman1(n_samples=50, n_features=10, random_state=0)
estimator = SVR(kernel='linear')
selector = RFE(estimator, n_features_to_select=5, step=1)
selector = selector.fit(X, y)
print(selector.support_)


[ True  True  True  True  True False False False False False]


In [10]:
# 自定义模型训练选择法
from sklearn.svm import LinearSVC
from sklearn.feature_selection import SelectFromModel

iris = load_iris()
X, y = iris.data, iris.target
print(X.shape)

lsvc = LinearSVC(C=0.01, penalty="l1", dual=False).fit(X, y)
model = SelectFromModel(lsvc, prefit=True)
X_new = model.transform(X)
print(X_new.shape)


(150, 4)
(150, 3)
