In [1]:
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import confusion_matrix

In [2]:
df_train = pd.read_csv('Dataset/fmnist/fashion-mnist_train.csv')
df_test = pd.read_csv('Dataset/fmnist/fashion-mnist_test.csv')
classes = {0:"T-shirt/top",
1 :"Trouser",
2 :"Pullover",
3 :"Dress",
4 :"Coat",
5 :"Sandal",
6 :"Shirt",
7 :"Sneaker",
8 :"Bag",
9 :"Ankle boot"}

In [3]:
len(classes)

10

In [4]:
train_x = np.array(df_train.iloc[:,1:]).reshape(df_train.shape[0],784)
train_y = np.array(df_train.iloc[:,0])
test_x = np.array(df_test.iloc[:,1:]).reshape(df_test.shape[0],784)
test_y = np.array(df_test.iloc[:,0])

In [5]:
class_wise_data = {}
for c in range(len(classes)):
    class_c_df = df_train.loc[df_train['label'] == c]
    class_wise_data[c] = np.array(class_c_df.iloc[:,1:]).reshape(class_c_df.shape[0],784)

In [6]:
class_wise_data[0].shape

(6000, 784)

In [7]:
class_wise_mean = {}
for c in range(len(classes)):
    class_wise_mean[c] = np.mean(class_wise_data[c], axis=0)

In [8]:
overall_mean = np.mean(train_x, axis=0)

In [9]:
overall_mean.shape

(784,)

In [10]:
class_wise_mean[0].shape

(784,)

In [11]:
class_wise_data[0].shape

(6000, 784)

In [12]:
print(train_x.shape,train_y.shape)
print(test_x.shape,test_y.shape)

(60000, 784) (60000,)
(10000, 784) (10000,)


In [13]:
df_train.value_counts('label')

label
0    6000
1    6000
2    6000
3    6000
4    6000
5    6000
6    6000
7    6000
8    6000
9    6000
dtype: int64

In [14]:
Sk_list = []
for class_c, mean_c in class_wise_mean.items():
    sub = np.subtract(class_wise_data[class_c], mean_c)
    Sk_list.append(np.dot(np.transpose(sub), sub))

In [15]:
Sk_array = np.asarray(Sk_list)

In [16]:
Sk_array.shape

(10, 784, 784)

In [17]:
Sw = np.sum(Sk_array, axis=0)

In [18]:
Sw.shape

(784, 784)

In [19]:
Nk = {}
for class_c in range(len(classes)):
    Nk[class_c] = class_wise_data[class_c].shape[0]

In [20]:
Sb = []
for class_c, mean_c in class_wise_mean.items():
    sub2 = mean_c - overall_mean
    Sb.append(np.multiply(Nk[class_c], np.outer(sub2, sub2.T)))
Sb = np.sum(Sb, axis=0)

In [21]:
Sb.shape

(784, 784)

In [22]:
u,s,v = np.linalg.svd(Sw, full_matrices=True)
Swinv = np.dot(v.transpose(),np.dot(np.diag(s**-1),u.transpose()))
mat = np.dot(Swinv,Sb)
eigen_values, eigen_vectors = np.linalg.eigh(mat)
eiglist = [(eigen_values[i], eigen_vectors[:, i]) for i in range(len(eigen_values))]
eiglist = sorted(eiglist, key=lambda x: x[0], reverse=True)
W = np.array([eiglist[i][1] for i in range(len(classes)-1)])
W = np.asarray(W).T

In [23]:
train_x_projected = np.dot(train_x, W)
test_x_projected = np.dot(test_x, W)

In [24]:
lda = LinearDiscriminantAnalysis()
lda.fit(train_x_projected,train_y)

LinearDiscriminantAnalysis()

In [26]:
pred = lda.predict(test_x_projected)

In [28]:
print('Overall accuracy : ',lda.score(test_x_projected,test_y)*100, '%')

Overall accuracy :  72.92999999999999 %


In [29]:
c_matrix = confusion_matrix(test_y, pred)
class_wise_accuracy = 100*c_matrix.diagonal()/c_matrix.sum(axis=1)

In [30]:
for y in range(len(class_wise_accuracy)):
    print('Class wise accuracy of class ',y,' : ',class_wise_accuracy[y], '%')

Class wise accuracy of class  0  :  73.4 %
Class wise accuracy of class  1  :  91.3 %
Class wise accuracy of class  2  :  58.1 %
Class wise accuracy of class  3  :  80.6 %
Class wise accuracy of class  4  :  59.6 %
Class wise accuracy of class  5  :  77.5 %
Class wise accuracy of class  6  :  36.1 %
Class wise accuracy of class  7  :  76.1 %
Class wise accuracy of class  8  :  86.6 %
Class wise accuracy of class  9  :  90.0 %
