In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from sklearn import preprocessing

# use seaborn plotting defaults
import seaborn as sns; sns.set()


features = pd.read_csv("new_labeled_features.csv",index_col=0)
tfeatures = features.T
normalizer = preprocessing.Normalizer()
nfeatures2 = normalizer.transform(tfeatures)

plt.plot(nfeatures2[0])

In [None]:
# Recursive Feature Elimination
from sklearn.svm import SVC
from sklearn.feature_selection import RFE, RFECV
from sklearn.linear_model import LogisticRegression
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.model_selection import StratifiedKFold

# create a base classifier used to evaluate a subset of attributes
model = LogisticRegression()
# create the RFE model and select 3 attributes
rfe = RFE(model, 3)
rfe.fit(features.iloc[:, :28], features["label"])
# summarize the selection of the attributes
print("RFE NUMBERS:")
print(rfe.ranking_)

model = SVC(kernel="linear")
rfe = RFE(model, 3)
rfe.fit(features.iloc[:, :28], features["label"])
print("RFE NUMBERS:")
print(rfe.ranking_)

#RFE model with Cross Validation
#, scoring='accuracy'

model = SVC(kernel="linear")
rfecv = RFECV(estimator=model, step=1, cv=StratifiedKFold(2))
rfecv.fit(features.iloc[:, :28], features["label"])
print("\n\nRFECV NUMBERS:")
print(rfecv.n_features_)
print(rfecv.ranking_)
model = LogisticRegression()
rfecv = RFECV(estimator=model, step=1, cv=StratifiedKFold(2))
rfecv.fit(features.iloc[:, :28], features["label"])
print("RFECV NUMBERS:")
print(rfecv.n_features_)
print(rfecv.ranking_)



selK = SelectKBest(f_classif, k=6)
selK.fit(features.iloc[:, :28], features["label"])
print("\n\nSELECT K BEST NUMBERS:")
print(selK.get_support())
print(selK.scores_)

In [None]:
# Feature Importance
from sklearn import datasets
from sklearn import metrics
from sklearn.ensemble import ExtraTreesClassifier
# fit an Extra Trees model to the data
model = ExtraTreesClassifier()
model.fit(features.iloc[:, :28], features["label"])
# display the relative importance of each attribute
print(model.feature_importances_)

In [None]:
#print(__doc__)

import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.feature_selection import RFECV
from sklearn.feature_selection import RFE
from sklearn.datasets import make_classification

# Build a classification task using 3 informative features
X, y = make_classification(n_samples=1000, n_features=25, n_informative=3,
                           n_redundant=2, n_repeated=0, n_classes=8,
                           n_clusters_per_class=1, random_state=0)

# Create the RFE object and compute a cross-validated score.
svc = SVC(kernel="linear")
# The "accuracy" scoring is proportional to the number of correct
# classifications
#, scoring='accuracy'
rfecv = RFECV(estimator=svc, step=1, cv=StratifiedKFold(2))
rfecv.fit(X, y)
rfe = RFE(estimator=svc, n_features_to_select=3)
rfe.fit(X, y)


print("Optimal number of features : %d" % rfecv.n_features_)

# Plot number of features VS. cross-validation scores
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score (nb of correct classifications)")
plt.plot(range(1, len(rfecv.grid_scores_) + 1), rfecv.grid_scores_)
#plt.show()
print("rfecv ranking:")
print(rfecv.ranking_)
print("rfe ranking:")
print(rfe.ranking_)

In [19]:
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.preprocessing import Normalizer, MinMaxScaler, StandardScaler

norm_list = [Normalizer(), StandardScaler(with_mean=False), MinMaxScaler()]
selK = SelectKBest(f_classif, k=4)
tnfeatures = norm_list[0].transform(tfeatures)
nfeatures = tnfeatures.T
selK.fit(nfeatures[:, :28], nfeatures[:, -1])
print("\n\nSELECT K BEST NUMBERS:")
print(selK.get_support())
print(selK.scores_[selK.get_support()])
for norm in norm_list[1:]:
    nfeatures = norm.fit_transform(features)
    selK.fit(nfeatures[:, :28], nfeatures[:, -1])
    print("\n\nSELECT K BEST NUMBERS:")
    print(selK.get_support())
    print(selK.scores_[selK.get_support()])
#[330.9079015 422.54146593 335.18651221 382.78688292]



SELECT K BEST NUMBERS:
[False False False False False False False False False False  True False
 False False False False  True  True  True False False False False False
 False False False False]
[330.9079015  422.54146593 335.18651221 382.78688292]


SELECT K BEST NUMBERS:
[False False False False False False False False False False  True False
 False False False False  True  True  True False False False False False
 False False False False]
[330.9079015  422.54146593 335.18651221 382.78688292]


SELECT K BEST NUMBERS:
[False False False False False False False False False False  True False
 False False False False  True  True  True False False False False False
 False False False False]
[330.9079015  422.54146592 335.18651221 382.78688292]
