In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import math

# Read the dataset

In [2]:
df = pd.read_excel('Raisin_Dataset.xlsx')
df.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
0,87524,442.246011,253.291155,0.819738,90546,0.758651,1184.04,Kecimen
1,75166,406.690687,243.032436,0.801805,78789,0.68413,1121.786,Kecimen
2,90856,442.267048,266.328318,0.798354,93717,0.637613,1208.575,Kecimen
3,45928,286.540559,208.760042,0.684989,47336,0.699599,844.162,Kecimen
4,79408,352.19077,290.827533,0.564011,81463,0.792772,1073.251,Kecimen


### Check the classes

We have 2 classes.

In [3]:
df["Class"].unique()

array(['Kecimen', 'Besni'], dtype=object)

## Separate two classes

In [4]:
dfs = [x for _, x in df.groupby('Class')]
df0 = dfs[1].reset_index(drop=True)
df1 = dfs[0].drop(columns=["Class"]).reset_index(drop=True)

In [5]:
df0.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter,Class
0,87524,442.246011,253.291155,0.819738,90546,0.758651,1184.04,Kecimen
1,75166,406.690687,243.032436,0.801805,78789,0.68413,1121.786,Kecimen
2,90856,442.267048,266.328318,0.798354,93717,0.637613,1208.575,Kecimen
3,45928,286.540559,208.760042,0.684989,47336,0.699599,844.162,Kecimen
4,79408,352.19077,290.827533,0.564011,81463,0.792772,1073.251,Kecimen


### Assign class  number to each class

In [6]:
df0["Class"] = 0
df1["Class"] = 1

# Prepare train and test sets

80\% in train and 20\% in test

In [7]:
train_df0 = df0.iloc[0: int(len(df0)*0.8)]
train_df1 = df1.iloc[0: int(len(df1)*0.8)]

In [8]:
X_train = pd.concat([train_df0.drop(columns=["Class"]), train_df1.drop(columns=["Class"])]).reset_index(drop=True)
X_train

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter
0,87524,442.246011,253.291155,0.819738,90546,0.758651,1184.040
1,75166,406.690687,243.032436,0.801805,78789,0.684130,1121.786
2,90856,442.267048,266.328318,0.798354,93717,0.637613,1208.575
3,45928,286.540559,208.760042,0.684989,47336,0.699599,844.162
4,79408,352.190770,290.827533,0.564011,81463,0.792772,1073.251
...,...,...,...,...,...,...,...
715,56244,398.802452,182.844046,0.888703,58530,0.656366,1008.134
716,142239,614.834478,297.735347,0.874928,148078,0.643516,1553.114
717,78632,407.940329,245.821198,0.798050,79715,0.689011,1068.727
718,93430,467.637119,258.947168,0.832693,98337,0.712988,1258.966


In [9]:
y_train = pd.concat([train_df0["Class"], train_df1["Class"]])
y_train

0      0
1      0
2      0
3      0
4      0
      ..
355    1
356    1
357    1
358    1
359    1
Name: Class, Length: 720, dtype: int64

In [10]:
test_df0 = df0.iloc[int(len(df0)*0.8):]
test_df1 = df1.iloc[int(len(df1)*0.8):]

In [11]:
X_test = pd.concat([test_df0.drop(columns=["Class"]), test_df1.drop(columns=["Class"])]).reset_index(drop=True)
y_test = pd.concat([test_df0["Class"], test_df1["Class"]])

# Standardize the dataset

Before preparing the model, the entire dataset need to be standardized. Only train set mean and standard deviation are used to standardize the dataset to avoid potential bias. Each feature/attribute $(X_i)$ is subtracted by its mean $\mu$ and divided by its standard deviation $\sigma$. 

$\mu = \frac{1}{N} * X_i$\
$X_i = (X_i - \mu) / \sigma$

In [12]:
mean_train = X_train.mean()
std_train = X_train.std()

In [13]:
mean_train

Area               88043.401389
MajorAxisLength      432.215772
MinorAxisLength      254.705362
Eccentricity           0.780984
ConvexArea         91495.638889
Extent                 0.698941
Perimeter           1169.716692
dtype: float64

In [14]:
std_train

Area               38921.340363
MajorAxisLength      117.019632
MinorAxisLength       49.308385
Eccentricity           0.093355
ConvexArea         40763.058741
Extent                 0.054117
Perimeter            274.748746
dtype: float64

In [15]:
X_train_standard = (X_train - mean_train)/ std_train
X_train_standard.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter
0,-0.013345,0.085714,-0.028681,0.415126,-0.023297,1.103335,0.052132
1,-0.330857,-0.218127,-0.236733,0.22303,-0.311719,-0.273694,-0.174453
2,0.072264,0.085894,0.23572,0.186058,0.054494,-1.133249,0.141432
3,-1.082065,-1.244878,-0.931795,-1.028276,-1.083325,0.012163,-1.184918
4,-0.221868,-0.68386,0.732577,-2.324163,-0.246121,1.733843,-0.351105


In [16]:
X_test_standard = (X_test - mean_train)/ std_train
X_test_standard.head()

Unnamed: 0,Area,MajorAxisLength,MinorAxisLength,Eccentricity,ConvexArea,Extent,Perimeter
0,-1.008043,-0.854138,-1.305133,0.413987,-0.972661,-0.694778,-0.879715
1,-1.002134,-1.254607,-0.687215,-1.579456,-0.996408,0.292508,-1.194003
2,-0.600555,-0.577963,-0.525359,-0.025955,-0.588073,-0.796336,-0.633396
3,0.393964,0.243161,0.675094,-0.005231,0.345248,0.517888,0.220261
4,-0.361534,-0.623341,0.174981,-1.078837,-0.396011,1.031061,-0.553115


## Prepare the dataset for the neural network

In [17]:
X_train = X_train_standard.to_numpy()
X_test = X_test_standard.to_numpy()
y_train = y_train.to_numpy().reshape(X_train.shape[0],1)
y_test = y_test.to_numpy().reshape(X_test.shape[0],1)

# Prepare the Neural Network

Steps to follow:

* Define the architecute of neural network: input layer, hidden layer, output layer
* Run the forward pass and get the prediction
* Run the backward pass, calculate the delta of each layer and then gradient
* Use the gradient to update the weights


In [18]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size
        
        # Initialize weights with random values
        self.W1 = np.random.randn(self.input_size + 1, self.hidden_size)
        self.W2 = np.random.randn(self.hidden_size + 1, self.output_size)
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def sigmoid_derivative(self, x):
        return x * (1 - x)
    
    def forward(self, X): 
        # Add bias node to input
        X = np.hstack((X, np.ones((X.shape[0], 1))))
        
        # Compute activations of hidden layer
        self.hidden_activations = self.sigmoid(np.dot(X, self.W1))
        
        # Add bias node to hidden layer activations
        self.hidden_activations = np.hstack((self.hidden_activations, np.ones((self.hidden_activations.shape[0], 1))))
        
        # Compute output
        output_activations = self.sigmoid(np.dot(self.hidden_activations, self.W2))
        
        return output_activations
    
    def backward(self, X, y, output_activations, learning_rate):
        # Add bias node to input
        X = np.hstack((X, np.ones((X.shape[0], 1))))
        
        # Compute gradients
        output_error = output_activations - y
        output_delta = output_error * self.sigmoid_derivative(output_activations)
        
        hidden_error = np.dot(output_delta, self.W2.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_activations)
        
        # Update weights
        gradient2 = np.dot(self.hidden_activations.T, output_delta)
        gradient1 = np.dot(X.T, hidden_delta[:,1:])
        
        return gradient2, gradient1
    
    def train(self, X, y, epochs, batch_size=10, learning_rate=0.005):
        for epoch in range(epochs):
            # Shuffle data
            indices = np.random.permutation(len(X))
            X = X[indices]
            y = y[indices]
            
            # Split data into mini-batches
            mini_batches = [(X[k:k+batch_size], y[k:k+batch_size]) for k in range(0, len(X), batch_size)]
            loss = 0
            for mini_batch in mini_batches:
                mini_X, mini_y = mini_batch
                #mini_X = np.hstack((mini_X, np.ones((mini_X.shape[0], 1))))
                
                # Forward pass
                output_activations = self.forward(mini_X)
                
                # Backward pass
                gradient2, gradient1 = self.backward(mini_X, mini_y, output_activations, learning_rate)
                
                # Update the weights
                self.W2 -= learning_rate * gradient2
                self.W1 -= learning_rate * gradient1
                
                # Calculate loss using cross-entropy loss function
                batch_loss = -np.mean(mini_y * np.log(output_activations) + (1 - mini_y) * np.log(1 - output_activations))
                loss += batch_loss
            loss = loss/len(mini_batches)
            if epoch % 3 == 0:
                print(f'Epoch: {epoch} Loss: {loss}')
    
    def predict(self, X):
        output_activations = self.forward(X)
        predictions = (np.rint(output_activations)).astype(int)
        return predictions

In [19]:
num_samples, num_features = X_train_standard.shape

# Initialize and train the neural network
input_size = num_features
hidden_size = 10
output_size = 1

epochs = 40
batch_size = 10
learning_rate = 0.001

model = NeuralNetwork(input_size, hidden_size, output_size)
model.train(X_train, y_train, epochs, batch_size, learning_rate)

# Test the model
predictions = model.predict(X_test)

# Calculate accuracy
test_accuracy = np.sum(predictions == y_test) / len(y_test)
print(f"Test Accuracy: {test_accuracy}")


Epoch: 0 Loss: 0.9335641224785982
Epoch: 3 Loss: 0.9098120349739427
Epoch: 6 Loss: 0.8856751214529511
Epoch: 9 Loss: 0.8609679129765463
Epoch: 12 Loss: 0.8359042543287907
Epoch: 15 Loss: 0.8101793902610349
Epoch: 18 Loss: 0.7842258693885243
Epoch: 21 Loss: 0.757828785661919
Epoch: 24 Loss: 0.7314903237300004
Epoch: 27 Loss: 0.7053823091381327
Epoch: 30 Loss: 0.6799279339432879
Epoch: 33 Loss: 0.6552528745799716
Epoch: 36 Loss: 0.6317902012799597
Epoch: 39 Loss: 0.6094257655984205
Test Accuracy: 0.6444444444444445


# Evaluation Metrics

The following evaluation metrics are used to evaluate the LDA model:
* accuracy = (TN + TP)/ N
* sensitivity = TP / (TP + FN)
* specificity = TN / (TN + FP)
* precision = TP / (TP + FP)
* f1-score = $2*\frac{precision * sensitivity} {precision + sensitivity}$
* log loss = $- \frac{1}{N} * \sum_{i}^{N} [y_i*ln(p_i) + (1-y_i)*ln(1-p_i)]$

Here,

TP= True Positive\
TN= True Negative\
FP= False Positive\
FN= False Negative\
$y_i$ = ground truth of instance i\
$p_i$ = probability of instance of i

In [20]:
def compute_confusion_matrix(true, pred):
    '''Computes a confusion matrix using numpy.'''

    K = len(np.unique(true)) # Number of classes 
    result = np.zeros((K, K))

    for i in range(len(true)):
        result[true[i]][pred[i]] += 1

    return result

In [21]:
def accuracy(conf_matrix):
    tn, fp, fn, tp = conf_matrix.ravel()
    return (tp+tn)/(tp+tn+fp+fn) 

In [22]:
def sensitivity(conf_matrix):
    tn, fp, fn, tp = conf_matrix.ravel()
    return tp/(tp+fn)   

In [23]:
def specificity(conf_matrix):
    tn, fp, fn, tp = conf_matrix.ravel()
    return tn/(tn+fp) 

In [24]:
def precision(conf_matrix):
    tn, fp, fn, tp = conf_matrix.ravel()
    return tp/(tp+fp) 

In [25]:
def f1_score(conf_matrix):
    tn, fp, fn, tp = conf_matrix.ravel()
    precision = tp/(tp+fp)
    recall = tp/(tp+fn)
    f1_score = 2 * (precision * recall)/(precision + recall)
    return f1_score

## Evaluation metrics for train set

In [26]:
y_train_pred = model.predict(X_train)

conf_matrix_train = compute_confusion_matrix(y_train.reshape(y_train.shape[0]), y_train_pred.reshape(y_train.shape[0]))
conf_matrix_train

array([[235., 125.],
       [113., 247.]])

In [27]:
accuracy(conf_matrix_train)

0.6694444444444444

In [28]:
sensitivity(conf_matrix_train)

0.6861111111111111

In [29]:
specificity(conf_matrix_train)

0.6527777777777778

In [30]:
f1_score(conf_matrix_train)

0.6748633879781422

## Evaluation metrics for test set

In [31]:
y_test_pred = model.predict(X_test)

conf_matrix_test = compute_confusion_matrix(y_test.reshape(y_test.shape[0]), y_test_pred.reshape(y_test.shape[0]))
conf_matrix_test

array([[58., 32.],
       [32., 58.]])

In [32]:
accuracy(conf_matrix_test)

0.6444444444444445

In [33]:
sensitivity(conf_matrix_test)

0.6444444444444445

In [34]:
specificity(conf_matrix_test)

0.6444444444444445

In [35]:
f1_score(conf_matrix_test)

0.6444444444444445