In [1]:
%matplotlib notebook

In [27]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn import datasets, svm
from sklearn.preprocessing import StandardScaler 

# #############################################################################
# Import some data to play with

# The iris dataset
iris = datasets.load_iris()

In [28]:
X = iris.data

In [29]:
# Some noisy data not correlated to the target

# Select random seed
np.random.seed(0)

E = np.random.uniform(0, 1, size=(len(iris.data), 50))

# Add the noisy data to the informative features
X = np.hstack((iris.data, E))

# normalize
scaler = StandardScaler()
X= scaler.fit_transform(X)

X.shape

(150, 54)

In [30]:
from sklearn import linear_model, svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
import scipy as sp
from sklearn.metrics import accuracy_score
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis, LinearDiscriminantAnalysis


# Init classifier
classifiers = [linear_model.LogisticRegression(solver='lbfgs',multi_class='multinomial'),svm.SVC(kernel='linear'), KNeighborsClassifier(), QuadraticDiscriminantAnalysis(), LinearDiscriminantAnalysis(solver="lsqr")]
names = ["Logistic Regression", "K Nearest Neighbors", "Support Vector Machines", "Quadratic Discrim Analysis", "Linear Discrim Analysis"]

# Split data -> 2/3 for learning & 1/3 for validation
X_train, X_test, y_train, y_test = train_test_split(X, iris.target, test_size=0.75, random_state=0)

for clf,name in zip(classifiers,names):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test) # predict the label of X_test from X_train and y_train
    error = accuracy_score(y_test, y_pred) # Compute the overall accuracy
    print('Errors for {1}: \t {0:.2f}'.format(error,name)) # \t means "tabular" space, and {0:.2f} 
                                                           # means we print only two first decimal

Errors for Logistic Regression: 	 0.65
Errors for K Nearest Neighbors: 	 0.69
Errors for Support Vector Machines: 	 0.68
Errors for Quadratic Discrim Analysis: 	 0.51
Errors for Linear Discrim Analysis: 	 0.37




In [31]:
neighbors = sp.arange(1,30)
errors_train, errors_validation = [], []
for n_ in neighbors:
    knn = KNeighborsClassifier(n_neighbors=n_)
    knn.fit(X_train, y_train)
    
    y_pred = knn.predict(X_train)
    errors_train.append(accuracy_score(y_train, y_pred))
    
    y_pred = knn.predict(X_test)
    errors_validation.append(accuracy_score(y_test, y_pred))
    
plt.plot(neighbors,errors_train)
plt.plot(neighbors,errors_validation)
plt.legend(["Train", "Test"])
plt.xlabel("Number of neighbors")
plt.ylabel("Overall accuracy")
plt.grid()

<IPython.core.display.Javascript object>

In [13]:
from sklearn.linear_model import LogisticRegressionCV

clf= LogisticRegressionCV( cv=10, penalty='l1', solver='liblinear', multi_class='auto' )
clf.fit(X_train,y_train)

y_pred = clf.predict(X_test) # predict the label of X_test from X_train and y_train
error = accuracy_score(y_test, y_pred) # Compute the overall accuracy
print('Errors for penalized Log Reg: \t {0:.2f}\n'.format(error,name)) 
    
print(clf.coef_)

Errors for penalized Log Reg: 	 0.93

[[ 0.          0.43137808 -1.89687058  0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.        ]
 [ 0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.
   0.          0.          0.