In [106]:
import numpy as np
import pandas as pd
import warnings
import copy
import random
from scipy.special import softmax
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
warnings.filterwarnings("ignore")

### Read In Data

In [2]:
training_set = pd.read_csv("./fashion-mnist_train.csv")
training_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60000 entries, 0 to 59999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 359.3 MB


In [3]:
testing_set = pd.read_csv("./fashion-mnist_test.csv")
testing_set.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Columns: 785 entries, label to pixel784
dtypes: int64(785)
memory usage: 59.9 MB


In [4]:
training_set.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,6,0,0,0,0,0,0,0,5,0,...,0,0,0,30,43,0,0,0,0,0
3,0,0,0,0,1,2,0,0,0,0,...,3,0,0,0,0,1,0,0,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [5]:
testing_set.head()

Unnamed: 0,label,pixel1,pixel2,pixel3,pixel4,pixel5,pixel6,pixel7,pixel8,pixel9,...,pixel775,pixel776,pixel777,pixel778,pixel779,pixel780,pixel781,pixel782,pixel783,pixel784
0,0,0,0,0,0,0,0,0,9,8,...,103,87,56,0,0,0,0,0,0,0
1,1,0,0,0,0,0,0,0,0,0,...,34,0,0,0,0,0,0,0,0,0
2,2,0,0,0,0,0,0,14,53,99,...,0,0,0,0,63,53,31,0,0,0
3,2,0,0,0,0,0,0,0,0,0,...,137,126,140,0,133,224,222,56,0,0
4,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Split into class labels and data points

In [8]:
x_train = training_set.iloc[:, 1:]
y_train = training_set.iloc[:, 0]

In [10]:
x_test = testing_set.iloc[:, 1:]
y_test = testing_set.iloc[:, 0]

### Shuffle Data

In [13]:
np.random.seed(42)

In [14]:
# convert to numpy objects
x_train = x_train.to_numpy()
y_train = y_train.to_numpy()
x_test = x_test.to_numpy()
y_test = y_test.to_numpy()

In [17]:
# shuffle training data
arr = np.arange(len(x_train))
np.random.shuffle(arr)
x_train = x_train[arr]
y_train = y_train[arr]

In [19]:
# shuffle testing data
arr = np.arange(len(x_test))
np.random.shuffle(arr)
x_test = x_test[arr]
y_test = y_test[arr]

### Sample Data

In [20]:
# take the first 10K points as the training data
x_train = x_train[:10000].copy()
y_train = y_train[:10000].copy()

In [21]:
# take the first 5K points as the test data
x_test = x_test[:5000].copy()
x_test = x_test[:5000].copy()

# Multiclass Logistic Regression

In [137]:
def LogReg(x_train, y_train, eta, epsilon):
    # map x data to d+1 dimensions
    x = []
    for i in range(len(x_train)):
        x.append(np.insert(x_train[i], 0, 1))
    x = np.array(x)
    
    # create a one hot encoding of the target labels
    y = np.zeros( (y_train.size, y_train.max() + 1) )
    y[np.arange(y_train.size), y_train] = 1
    
    t = 0
    
    # create initial weights matrix
    # K rows and d+1 cols
    w = np.zeros((10, 785) )
    
    while(True):
        # make a copy of w
        w_old = copy.deepcopy(w)
    
    
        # iterate through dataset in random order
        r = list(range(len(x)))
        random.shuffle(r)
        for i in r:
            
            # get vector of dot products
            prods = np.dot(w, x[i])
            
            # get the softmax for the K dot products
            sm = softmax(prods)
            
            # claculate gradient and update wieghts
            for j in range(len(w)-1):
                grad = (y[i][j] - sm[j]) * x[i]
                new_w = w[j] + eta * grad
                w[j] = new_w
            
        
        # check the break condition
        acc = 0
        for i in range(len(w)-1):
            n = np.linalg.norm(w[j] - w_old[j])
            acc += n
        
        
        if acc <= epsilon:
            break
        else: 
            t += 1
    
    return w

In [138]:
weights = LogReg(x_train, y_train, 1e-6, 1e-1)

In [163]:
def predict(w, x_test):
    # augment test data
    x = []
    for i in range(len(x_test)):
        x.append(np.insert(x_test[i], 0, 1))
    x = np.array(x)
    
    predictions = []
    
    # for all test points
    # compute dot products between weights and data points
    # then compute the softmax and take the argmax
    for i in range(len(x)):
        prods = np.dot(w, x[0])
        sm = softmax(prods)
        index = np.argmax(sm)
        predictions.append(index)
    
    predictions = np.array(predictions)
    print(predictions)
    
    

In [164]:
predict(weights, x_test)

[4 4 4 ... 4 4 4]
