## Importing libraries

In [1]:
%pip install numpy pandas matplotlib scikit-learn

Note: you may need to restart the kernel to use updated packages.


In [24]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss


## Loading the dataset, and preprocessing

In [9]:
df = pd.read_csv("data/data.csv")

labels = df.label.values
images = df.drop('label', axis=1).values.reshape(-1, 28, 28)

In [None]:
# Normalizing the images, ie, bringing them in the range of 0-1 from 0-255
# Make sure not to run this kernel twice or thrice, or it will shrink the values of image pixels very much

images = images / 255.0
images.max(), images.min()
# Ensure that the max value is 1, and min 0. Otherwise, run the previous and this kernel once.

(1.0, 0.0)

In [11]:
# Flatten the images, and do a train test split [We'll do a standard 80% train and 20% test split]
images = images.reshape(-1, 28*28)

X_train, X_test, y_train, y_test = train_test_split(images, labels, shuffle=True, test_size=0.2)
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((48000, 784), (48000,), (12000, 784), (12000,))

In [19]:
# Initialize a new Logistic Regression model
# Applies L2 regularization by default, so we don't need to do that to prevent the weights from exploding

model = LogisticRegression(max_iter=1000)


In [20]:
# Train the model
model.fit(X_train, y_train)

In [22]:
# Getting the predictions, and checking the accuracy score
preds = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, preds)*100:.4f}%")

Accuracy: 84.7167%


That's amazing, we created a 85% accurate LogiReg model with only 1000 iterations. Now let's calculate the Cross-Entropy loss

In [25]:
# Getting the probabilities for each class for each image
preds_probs = model.predict_proba(X_test)
print(f"Cross-Entropy loss: {log_loss(y_test, preds_probs)}")

Cross-Entropy loss: 0.43667873772465926
