In [46]:
# Importing the required libraries.
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
%matplotlib inline

In [2]:
digit = load_digits()
x = digit.data
y = digit.target

In [3]:
x.shape, y.shape

((1797, 64), (1797,))

- Split the dataset into training and testing sets.

In [7]:
X_train,X_test,y_train,y_test = train_test_split(x,y,test_size = 0.3)

- Use Logistic Regression.

In [10]:
reg = LogisticRegression()
reg.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [11]:
pred = reg.predict(X_test)

In [19]:
result = pd.DataFrame({"Predicted":pred,"Actual":y_test})

In [20]:
result.head()

Unnamed: 0,Predicted,Actual
0,6,6
1,2,2
2,4,4
3,7,7
4,6,6


In [21]:
reg.score(X_test,y_test)*100

95.0

A. Classification accuracy

In [22]:
from sklearn.metrics import classification_report

In [23]:
print(classification_report(y_test,pred))

             precision    recall  f1-score   support

          0       1.00      0.97      0.98        60
          1       0.93      0.95      0.94        57
          2       0.98      1.00      0.99        54
          3       0.93      0.95      0.94        58
          4       0.98      0.96      0.97        55
          5       0.96      0.91      0.93        47
          6       0.98      1.00      0.99        51
          7       0.95      1.00      0.98        60
          8       0.84      0.88      0.86        49
          9       0.93      0.86      0.89        49

avg / total       0.95      0.95      0.95       540



In [24]:
from sklearn import model_selection

In [25]:
kfold = model_selection.KFold(n_splits=10, random_state = 7)

In [27]:
result_2 = model_selection.cross_val_score(reg,x,y,cv=kfold,scoring='accuracy')

In [28]:
result_2

array([0.90555556, 0.95      , 0.89444444, 0.91666667, 0.94444444,
       0.97222222, 0.97777778, 0.95530726, 0.8603352 , 0.93854749])

In [29]:
result_2.sum()/10

0.931530105524519

B. Logarithmic loss

In [30]:
lts = model_selection.cross_val_score(reg,x,y,cv=kfold,scoring='neg_log_loss')

In [31]:
lts

array([-0.45798961, -0.14435058, -0.61749705, -0.27323142, -0.27594697,
       -0.12815811, -0.11024468, -0.1233365 , -0.62592303, -0.68553964])

In [33]:
lts.sum()/10

-0.3442217575018163

C. R2 metrics

In [34]:
r2_met = model_selection.cross_val_score(reg,x,y,cv=kfold,scoring='r2')

In [35]:
r2_met

array([0.73184699, 0.86942354, 0.81023661, 0.79423609, 0.82434177,
       0.94343263, 0.92524112, 0.90725247, 0.60169024, 0.82555495])

In [36]:
r2_met.sum()/10

0.8233256404108781

D. Mean absolute error

In [39]:
mean_absolute_error(y_test, pred)

0.22407407407407406

E. Mean squared error

In [41]:
mean_squared_error(y_test, pred)

1.2351851851851852

F. Confusion matrix

In [43]:
print(confusion_matrix(y_test, pred))

[[58  0  0  0  1  0  0  0  1  0]
 [ 0 54  0  0  0  0  0  0  3  0]
 [ 0  0 54  0  0  0  0  0  0  0]
 [ 0  0  0 55  0  0  0  1  1  1]
 [ 0  1  0  0 53  0  1  0  0  0]
 [ 0  0  1  0  0 43  0  1  0  2]
 [ 0  0  0  0  0  0 51  0  0  0]
 [ 0  0  0  0  0  0  0 60  0  0]
 [ 0  3  0  1  0  2  0  0 43  0]
 [ 0  0  0  3  0  0  0  1  3 42]]


In [45]:
accuracy_score(y_test, pred)

0.95

G. Classification report

In [48]:
print(classification_report(y_test, pred))

             precision    recall  f1-score   support

          0       1.00      0.97      0.98        60
          1       0.93      0.95      0.94        57
          2       0.98      1.00      0.99        54
          3       0.93      0.95      0.94        58
          4       0.98      0.96      0.97        55
          5       0.96      0.91      0.93        47
          6       0.98      1.00      0.99        51
          7       0.95      1.00      0.98        60
          8       0.84      0.88      0.86        49
          9       0.93      0.86      0.89        49

avg / total       0.95      0.95      0.95       540

