In [87]:
import numpy as np
import pandas as pd
import warnings
import copy
import random
from scipy.special import softmax
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings("ignore")

### Read In Data

In [88]:
training_set = pd.read_csv("./fashion-mnist_train.csv")
training_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 359.3 MB


In [89]:
testing_set = pd.read_csv("./fashion-mnist_test.csv")
testing_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 59.9 MB


In [90]:
training_set.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [91]:
testing_set.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,9,8,...,103,87,56,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,34,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,14,53,99,...,0,0,0,0,63,53,31,0,0,0
3,2,0,0,0,0,0,0,0,0,0,...,137,126,140,0,133,224,222,56,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Split into class labels and data points

In [92]:
x_train = training_set.iloc[:, 1:]
y_train = training_set.iloc[:, 0]

In [93]:
x_test = testing_set.iloc[:, 1:]
y_test = testing_set.iloc[:, 0]

In [94]:
y_test[0]

0

### Shuffle Data

In [95]:
np.random.seed(42)

In [96]:
# convert to numpy objects
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
x_test = x_test.to_numpy()
y_test = y_test.to_numpy()

In [97]:
# shuffle training data
arr = np.arange(len(x_train))
np.random.shuffle(arr)
x_train = x_train[arr]
y_train = y_train[arr]

In [98]:
# shuffle testing data
arr = np.arange(len(x_test))
np.random.shuffle(arr)
x_test = x_test[arr]
y_test = y_test[arr]

### Sample Data

In [99]:
# take the first 10K points as the training data
x_train = x_train[:10000].copy()
y_train = y_train[:10000].copy()

In [100]:
# take the first 5K points as the test data
x_test = x_test[:5000].copy()
x_test = x_test[:5000].copy()

# Multiclass Logistic Regression

In [101]:
def LogReg(x_train, y_train, eta, epsilon):
    # map x data to d+1 dimensions
    x = []
    for i in range(len(x_train)):
        x.append(np.insert(x_train[i], 0, 1))
    x = np.array(x)
    
    # create a one hot encoding of the target labels
    y = np.zeros( (y_train.size, y_train.max() + 1) )
    y[np.arange(y_train.size), y_train] = 1
    
    t = 0
    
    # create initial weights matrix
    # K rows and d+1 cols
    w = np.random.rand(10, 785)
    
    
    
    while(t <= 50):
        # make a copy of w
        w_old = w.copy()
        
    
        # iterate through dataset in random order
        r = list(range(len(x)))
        random.shuffle(r)
        for i in r:
            
            # get vector of dot products
            prods = np.dot(w, x[i])
            
            # get the softmax for the K dot products
            sm = softmax(prods)
            
            # claculate gradient and update wieghts
            for j in range(len(w)-1):
                pi = sm[j]
                grad = (y[i][j] - pi) * x[i]
                new_w = w[j] + eta * grad
                w[j] = new_w
        
        
        # check the break condition
        acc = 0
        for i in range(len(w)-1):
            n = np.linalg.norm(w[j] - w_old[j])
            acc += n
        
        
        if acc <= epsilon:
            break
        else: 
            t += 1
    
    return w

In [102]:
weights = LogReg(x_train, y_train, 1e-6, 1e-1)

In [103]:
for w in weights:
    print(w)

[ 0.72252134  0.9241446   0.03501995  0.01414506  0.18159713  0.68530803
  0.94709296  0.71907356  0.56562028  0.74324109  0.34625626  0.75518116
  0.47828824  0.2468362   0.18741539  0.60331859  0.63686391  0.60659743
  0.25385648  0.29284099  0.35530193  0.32925419  0.3719674   0.04662639
  0.72044848  0.7149477   0.34619272  0.56280621  0.10091594  0.41525657
  0.85745007  0.66120582  0.10873508  0.84260724  0.65566211  0.46410481
  0.71095476  0.29718088  0.69101227  0.72107816  0.92541922  0.20371047
  0.57150383  1.07000314  0.28685584  0.91040339  0.20165212  0.28498611
  0.52297777  0.90292381  0.2193033   0.40757875  0.14066103  0.23893193
  0.51410725  0.32092911  0.21164939  0.39354686  0.70093188  0.12789411
  0.48856453  0.29732202  0.39921835  0.78832361  0.90173938  0.2338399
  0.59867146  0.64579769  0.69043717  0.81161561  0.5296312   0.9120716
  0.93895784  0.42509947  0.41989819  0.67518025  0.38050716  0.57852521
  0.50089167  0.06448819  0.89506039  0.96749809  0.0

In [104]:
def predict(w, x_test):
    # augment test data
    x = []
    for i in range(len(x_test)):
        x.append(np.insert(x_test[i], 0, 1))
    x = np.array(x)
    
    predictions = []
    
    # for all test points
    # compute dot products between weights and data points
    # then compute the softmax and take the argmax
    for i in range(len(x)):
        prods = np.dot(w, x[i])
        sm = softmax(prods)
        index = np.argmax(sm)
        predictions.append(index)
    
    predictions = np.array(predictions)
    return predictions
    
    

In [105]:
y_pred = predict(weights, x_test)

In [106]:
# calculate accuracy
correct = 0
for i in range(len(y_pred)):
    if y_pred[i] == y_test[i]:
        correct += 1

acc_score = correct / len(y_pred)

In [107]:
acc_score

0.7326

In [108]:
# computing f1 score
prec=[]
rec=[]

# first compute the class specific accuracies
for i in range(10):
    correct = 0
    total = 0
    for j in range(len(y_pred)):
        if y_pred[j] == i:
            total += 1
            if y_test[j] == i:
                correct += 1
    
    if total == 0:
        prec.append(0)
    else:
        prec.append(correct / total)

    
# next compute the class specific recall
for i in range(10):
    correct = 0
    total = 0
    for j in range(len(y_pred)):
        if y_test[j] == i:
            total += 1
            if y_pred[j] == i:
                correct += 1
    
    rec.append(correct / total)


In [109]:
# next compute the overall precision and recall
overall_prec = np.mean(prec)
overall_recall = np.mean(rec)

# finally compute the f1 score
f1 = (2 * overall_prec * overall_recall) / (overall_prec + overall_recall)

In [110]:
print(f1)

0.7320699383836805
