In [1]:
import numpy as np
import pandas as pd
import matplotlib as mpl
from matplotlib import pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import check_random_state
from sklearn.metrics import accuracy_score
from joblib import dump, load
np.random.seed(1)


In [None]:
from sklearn.datasets import fetch_openml
X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
print(X.shape)
print(y.shape)
random_state = check_random_state(0)
permutation = random_state.permutation(X.shape[0])
X = X[permutation]
y = y[permutation]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2)

In [None]:
train_df = pd.read_csv('emnist-digits-train.csv', header=None)
X_train = train_df.loc[:, 1:].to_numpy()
y_train = train_df.loc[:, 0].to_numpy()
X_train = X_train.reshape(X_train.shape[0],28,28)
X_train = np.fliplr(X_train)
X_train = np.rot90(X_train,3,axes=(1,2))
X_train = X_train.reshape(X_train.shape[0],784)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
print(X_train.shape)
print(y_train.shape)

In [None]:
test_df = pd.read_csv('emnist-digits-test.csv', header=None)
X_test = test_df.loc[:, 1:].to_numpy()
y_test = test_df.loc[:, 0].to_numpy()
X_test = X_test.reshape(X_test.shape[0],28,28)
X_test = np.fliplr(X_test)
X_test = np.rot90(X_test,3,axes=(1,2))
X_test = X_test.reshape(X_test.shape[0],784)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print(X_test.shape)
print(y_test.shape)

In [None]:
cnt=5
sampleX = X_train[0:cnt]
sampley = y_train[0:cnt]

In [None]:
for i in range(sampleX.shape[0]):
    subplt = plt.subplot(1, sampleX.shape[0], i + 1)
    subplt.imshow(sampleX[i,:,:])  
    subplt.set_xticks(())
    subplt.set_yticks(())
    subplt.set_xlabel(sampley[i])
plt.show()

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
clf = LogisticRegression(
    C=50. / X_train.shape[0], penalty='l1', solver='saga', tol=0.1
)
clf.fit(X_train, y_train)

In [None]:
sparsity = np.mean(clf.coef_ == 0) * 100
score = clf.score(X_test, y_test)
# print('Best C % .4f' % clf.C_)
print("Sparsity with L1 penalty: %.2f%%" % sparsity)
print("Test score with L1 penalty: %.4f" % score)

In [None]:
coef = clf.coef_.copy()
plt.figure(figsize=(10, 5))
scale = np.abs(coef).max()
for i in range(10):
    l1_plot = plt.subplot(2, 5, i + 1)
    l1_plot.imshow(coef[i].reshape(28, 28), interpolation='nearest',
                   cmap=plt.cm.RdBu, vmin=-scale, vmax=scale)
    l1_plot.set_xticks(())
    l1_plot.set_yticks(())
    l1_plot.set_xlabel('Class %i' % i)
plt.suptitle('Classification vector for...')

In [None]:
# Training score
y_pred_tuned = clf.predict(X_train)
accuracy_score(y_train, y_pred_tuned)

In [None]:
# Test score
y_pred_tuned = clf.predict(X_test)
accuracy_score(y_test, y_pred_tuned)

In [None]:
A = X_test[0,:]
predictions = clf.predict_proba(A.reshape(1, -1))
y_pred_tuned = clf.predict(A.reshape(1, -1))
print(predictions)
print(y_pred_tuned)
print(y_test[0])

In [None]:
A=np.ones((1,784))
predictions = clf.predict_proba(A)
print(predictions)

In [None]:
from joblib import dump, load

dump(clf, "logres.m5")
np.save("logres_mean.npy", scaler.mean_)
np.save("logres_var.npy", scaler.var_)
np.save("logres_scale.npy", scaler.scale_)

In [None]:
# Load from file
joblib_model = load("logres.m5")

# Calculate the accuracy and predictions
score = joblib_model.score(X_test, y_test)
print("Test score: {0:.2f} %".format(100 * score))

In [None]:
A = X_test[0,:]
A.reshape(1, -1)
print(A.shape)

In [None]:
A=np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 255, 128, 22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 22, 128, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 255, 255, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 65, 0, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 92, 255, 255, 135, 0, 0, 0, 0, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 136, 255, 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 65, 163, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 97, 255, 184, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184, 255, 98, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255, 0, 0, 0, 0, 0]]).reshape(1,-1)
B = A.reshape(1,28,28)
print(B.shape)
for i in range(B.shape[0]):
    subplt = plt.subplot(1, B.shape[0], i + 1)
    subplt.imshow(B[i,:,:])  
    subplt.set_xticks(())
    subplt.set_yticks(())
plt.show()

In [None]:
joblib_model = load("logres.m5")

print(joblib_model.predict_proba(scaler.transform(A)))
print(joblib_model.classes_)
C = {
    "class":joblib_model.classes_,
    "prob":joblib_model.predict_proba(scaler.transform(A)) [0]
}
print(C)

In [None]:
print(B.shape)

In [None]:
scaler2=StandardScaler()
scaler2.mean_ = scaler.mean_
print(scaler.scale_)

In [None]:
scaler.var_

In [None]:
# start of new model

In [None]:
X_train = X_train.astype('float32') / 255