# Classification Using Artificial Neural Networks with Hyperparameter Tuning on Alphabets Data

#Overview

In this assignment, you will be tasked with developing a classification model using Artificial Neural Networks (ANNs) to classify data points from the "Alphabets_data.csv" dataset into predefined categories of alphabets. This exercise aims to deepen your understanding of ANNs and the significant role hyperparameter tuning plays in enhancing model performance

In [2]:
# Data Exploration and Preprocessing

In [5]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
import warnings
warnings.filterwarnings("ignore")

In [6]:
df = pd.read_csv("Alphabets_data.csv")
df.head(5)

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [7]:
df.head()

Unnamed: 0,letter,xbox,ybox,width,height,onpix,xbar,ybar,x2bar,y2bar,xybar,x2ybar,xy2bar,xedge,xedgey,yedge,yedgex
0,T,2,8,3,5,1,8,13,0,6,6,10,8,0,8,0,8
1,I,5,12,3,7,2,10,5,5,4,13,3,9,2,8,4,10
2,D,4,11,6,8,6,10,6,2,6,10,3,7,3,7,3,9
3,N,7,11,6,6,3,5,9,4,6,4,4,10,6,10,2,8
4,G,2,1,3,1,1,8,6,6,6,6,5,9,1,7,5,10


In [8]:
df.shape

(20000, 17)

In [9]:
df.dtypes

letter    object
xbox       int64
ybox       int64
width      int64
height     int64
onpix      int64
xbar       int64
ybar       int64
x2bar      int64
y2bar      int64
xybar      int64
x2ybar     int64
xy2bar     int64
xedge      int64
xedgey     int64
yedge      int64
yedgex     int64
dtype: object

In [10]:
df.isna().sum()

letter    0
xbox      0
ybox      0
width     0
height    0
onpix     0
xbar      0
ybar      0
x2bar     0
y2bar     0
xybar     0
x2ybar    0
xy2bar    0
xedge     0
xedgey    0
yedge     0
yedgex    0
dtype: int64

In [11]:
# Assuming 'df' is your DataFrame
df = df.drop_duplicates().reset_index(drop=True)

In [12]:
df.duplicated().sum()

0

In [13]:
# Summarize key features
print("Number of samples:", len(df))
print("Number of features:", len(df.columns) - 1)  # Excluding the target column
print("Number of classes:", len(df['letter'].unique()))

Number of samples: 18668
Number of features: 16
Number of classes: 26


In [14]:
# Split features and target variable
X = df.iloc[:,1:16]
y = df['letter']

In [15]:
### Normalization
from sklearn.preprocessing import StandardScaler
# Scale the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

### label encoding
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

In [16]:
#### Split into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.20, random_state=41)
X_train.shape, X_test.shape, y_train.shape, y_test.shape 

((14934, 15), (3734, 15), (14934,), (3734,))

In [17]:
# Model Implementation


In [18]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Initialize the ANN
model = Sequential()

# Add the input layer and the first hidden layer
model.add(Dense(units=16, activation='relu', input_dim=X_train.shape[1]))

# Add the second hidden layer (optional)
model.add(Dense(units=8, activation='relu'))

# Add the output layer
model.add(Dense(units=len(np.unique(y)), activation='softmax'))

# Compile the ANN
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Summary of the model
model.summary()

In [19]:
# Train the model on the training set
history = model.fit(X_train, y_train, epochs=50, batch_size=10, validation_split=0.1)

Epoch 1/50
[1m1344/1344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.1533 - loss: 2.8857 - val_accuracy: 0.5147 - val_loss: 1.6348
Epoch 2/50
[1m1344/1344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.5218 - loss: 1.5773 - val_accuracy: 0.6191 - val_loss: 1.3066
Epoch 3/50
[1m1344/1344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6040 - loss: 1.3182 - val_accuracy: 0.6566 - val_loss: 1.1775
Epoch 4/50
[1m1344/1344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6401 - loss: 1.2070 - val_accuracy: 0.6647 - val_loss: 1.1110
Epoch 5/50
[1m1344/1344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6591 - loss: 1.1350 - val_accuracy: 0.6774 - val_loss: 1.0434
Epoch 6/50
[1m1344/1344[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.6843 - loss: 1.0469 - val_accuracy: 0.6908 - val_loss: 1.0050
Epoch 7/50
[1m1

In [20]:
### evaluate the model
# Evaluate the model on the train set
loss, accuracy = model.evaluate(X_train, y_train)
print(f'train Accuracy: {accuracy * 100:.2f}%')

# Make predictions on the train set
yhat_train = model.predict(X_train)
y_pred_classes = yhat_train.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_train, y_pred_classes ))
print(classification_report(y_train, y_pred_classes ))

[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.7973 - loss: 0.6546
train Accuracy: 79.62%
[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[[527   3   0   0   0   3  10   2   0   2  13   2   0   0   6   2   6   5
   10   0   3   2   0   7   8   0]
 [  0 448   0  29   5   2   3   1   0   2   8   0   0   0   3  11   0  33
   18   0   0   1   1   1   2   0]
 [  0   0 478   0  12   0  15   3   0   0  18   5   0   0  14   0   2   0
    1   6  17   2   0   0   0   0]
 [  0  52   0 464   8   1   1  25   0   5   1   4   4   5   7   4   0  29
    0   1   6   0   0   0   0   0]
 [  0   6  17   7 396   0  46   2   0   0   8   5   0   0   0   0  14   2
   11   7   0   0   0  36   0  28]
 [  3  43   1   6   2 436   1   9  16   1   1   0   0   0   2  28   0   0
    8  15   0   6   0   5  13   0]
 [  2  30  19   0  13  11 417   5   0   0   3   1   1   0   9  11  31  12
    2   0  13   4   3   0   0   0]
 [ 10   9   0  32   1   5   7 

In [21]:
# Make predictions on the test set
yhat_test = model.predict(X_test)
y_pred_classes1 = yhat_test.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, y_pred_classes1 ))
print(classification_report(y_test, y_pred_classes1 ))

[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step
[[124   1   0   0   1   1   2   1   0   1   2   0   1   0   3   0   1   3
    0   0   1   0   0   3   0   0]
 [  1 133   0   9   3   0   0   0   0   1   1   0   0   0   0   1   0   9
    1   0   0   0   0   1   0   2]
 [  0   0 110   0   4   0   4   0   0   0   5   0   0   0   4   0   0   0
    0   2   8   0   0   0   0   0]
 [  1   9   0 114   1   1   1   4   0   1   1   0   0   1   2   1   0   5
    0   0   0   0   1   0   0   0]
 [  0   1   6   0  96   1  10   2   0   0   5   0   0   0   0   0   1   0
    2   0   0   0   0   9   0   7]
 [  0  12   0   1   0 115   0   5   2   3   0   0   0   0   0   6   0   0
    3   3   0   1   0   2   3   0]
 [  1   6   6   1   2   1 108   1   0   0   2   1   0   0   4   2  10   1
    3   0   4   2   1   0   0   0]
 [  5   2   0   3   0   1   2  88   0   0   9   0   1   7   5   3   2  10
    0   0   0   5   0   1   1   0]
 [  0   1   0   1   0   2   0   0  62   1   1   2   0

# Hyperparameter Tuning

In [28]:
import tensorflow as tf
import numpy as np
from sklearn.model_selection import ParameterGrid

def create_model(hidden_layers=1, units=16, activation='relu', learning_rate=0.001):  # Model Creation Function
    model = Sequential()
    model.add(Dense(units=units, activation=activation, input_dim=X_train.shape[1]))

    for _ in range(hidden_layers - 1):
        model.add(Dense(units=units, activation=activation))

    model.add(Dense(units=len(np.unique(y)), activation='softmax'))
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [29]:
def evaluate_model(hidden_layers, units, activation, learning_rate):        #Model Evaluation Function
    model = create_model(hidden_layers=hidden_layers, units=units, activation=activation, learning_rate=learning_rate)
    model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=0)
    _, accuracy = model.evaluate(X_test, y_test, verbose=0)
    return accuracy

In [24]:
#Perform Hyperparameter Tuning
param_grid = {                                        
    'hidden_layers': [1, 2, 3],
    'units': [8, 16, 32],
    'activation': ['relu', 'tanh'],
    'learning_rate': [0.001, 0.01]
}

best_score = 0
best_params = {}

for params in ParameterGrid(param_grid):
    score = evaluate_model(**params)
    print(f"Params: {params} - Score: {score}")
    if score > best_score:
        best_score = score
        best_params = params

print(f"Best Score: {best_score}")
print(f"Best Params: {best_params}")

Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.001, 'units': 8} - Score: 0.6912158727645874
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.001, 'units': 16} - Score: 0.7793251276016235
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.001, 'units': 32} - Score: 0.8286020159721375
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.01, 'units': 8} - Score: 0.7062131762504578
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.01, 'units': 16} - Score: 0.7935190200805664
Params: {'activation': 'relu', 'hidden_layers': 1, 'learning_rate': 0.01, 'units': 32} - Score: 0.8476164937019348
Params: {'activation': 'relu', 'hidden_layers': 2, 'learning_rate': 0.001, 'units': 8} - Score: 0.6815747022628784
Params: {'activation': 'relu', 'hidden_layers': 2, 'learning_rate': 0.001, 'units': 16} - Score: 0.8012855052947998
Params: {'activation': 'relu', 'hidden_layers': 2, 'learning_rate': 0.001, 'un

In [30]:
# Best parameters from the tuning process
best_hidden_layers = best_params['hidden_layers']
best_units = best_params['units']
best_activation = best_params['activation']
best_learning_rate = best_params['learning_rate']    

# Create the final model with the best parameters
final_model = create_model(
    hidden_layers=best_hidden_layers,
    units=best_units,
    activation=best_activation,
    learning_rate=best_learning_rate
)

# Train the final model on the full training data
final_model.fit(X_train, y_train, epochs=10, batch_size=10, verbose=1)

Epoch 1/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.4160 - loss: 2.1684
Epoch 2/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.7237 - loss: 1.0097
Epoch 3/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.7789 - loss: 0.7738
Epoch 4/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.8164 - loss: 0.6480
Epoch 5/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.8365 - loss: 0.5667
Epoch 6/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.8536 - loss: 0.5020
Epoch 7/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.8728 - loss: 0.4388
Epoch 8/10
[1m1494/1494[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.8761 - loss: 0.4152
Epoch 9/10
[1m1494/149

<keras.src.callbacks.history.History at 0x1f8edd62c30>

In [31]:
# Evaluate the final model on the test data
loss, accuracy = final_model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.8710 - loss: 0.4236
Test Loss: 0.4190920889377594
Test Accuracy: 0.8727905750274658


In [32]:
# Make predictions on the train set
yhat_train = final_model.predict(X_train)
y_pred_classes = yhat_train.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_train, y_pred_classes ))
print(classification_report(y_train, y_pred_classes ))

[1m467/467[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step
[[578   5   0   0   2   0   1   0   0   1   0   0   1   1   0   1   4   4
    1   1   3   3   3   1   0   1]
 [  0 513   0  14   4   3   1   1   0   4   0   0   0   1   0   0   0  15
    6   3   0   2   0   0   1   0]
 [  0   0 530   0  13   0  10   2   0   0   0   2   0   0   8   0   1   2
    1   0   3   0   1   0   0   0]
 [  0  27   0 538   0   1   0  20   0   1   0   0   1   3   3   1   0  18
    2   0   2   0   0   0   0   0]
 [  0   2   1   0 535   2  12   1   0   0   1   4   0   0   0   0   4   2
    3   5   0   0   0   0   0  13]
 [  0   4   1   1   7 518   0   2   4   3   0   0   0   2   1  14   1   0
   15   7   0   2   2   0  10   2]
 [  0   4  11   4   5   2 523   2   0   0   2   6   2   0   3   5   2   2
    8   0   2   3   1   0   0   0]
 [  4   7   1  21   0   6   1 448   0   0  14   1   1   2   5   5   2  32
    0   0   2   3   0   1   3   0]
 [  0   2   1   3   1   7   1   0 382  23   0   1   0

In [33]:
# Make predictions on the test set
yhat_test = final_model.predict(X_test)
y_pred_classes1 = yhat_test.argmax(axis=-1)

from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(y_test, y_pred_classes1 ))
print(classification_report(y_test, y_pred_classes1 ))

[1m117/117[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
[[137   1   0   0   0   0   1   0   0   1   0   0   2   0   0   0   1   1
    0   0   0   0   0   1   0   0]
 [  0 136   0   5   1   1   1   0   0   3   0   1   0   0   0   0   0   6
    7   0   0   0   0   1   0   0]
 [  0   0 123   0   1   0   4   0   0   0   1   0   0   0   3   0   0   0
    0   0   5   0   0   0   0   0]
 [  0   4   0 131   0   0   0   4   0   1   0   0   0   0   0   0   0   2
    0   0   1   0   0   0   0   0]
 [  0   0   1   0 129   1   2   0   0   0   0   1   0   0   0   0   1   0
    1   1   0   0   0   0   0   3]
 [  0   1   0   0   2 131   0   0   1   2   0   0   0   1   0   9   0   0
    5   2   0   0   0   0   2   0]
 [  0   0   7   3   4   0 127   0   0   0   2   2   0   0   0   2   1   1
    1   0   2   3   1   0   0   0]
 [  2   5   1   2   0   2   1 105   0   0   9   1   1   2   0   1   1   6
    0   1   2   3   0   0   0   0]
 [  0   0   0   1   1   2   0   0  69   4   0   2   0

# Make predictions on the test set
Training Accuracy: 91%

Testing Accuracy: 89%

Default Model

Training Accuracy: 81.24%

Testing Accuracy: 80%

Overall Improvement: The tuned model outperforms the default model in both training and testing accuracy. The tuned model has higher precision, recall, and F1-scores across most classes.

Consistency: The performance improvements are consistent across various metrics, indicating a well-tuned model.