In [2]:
# # ANN Classification on "Alphabets_data.csv" Dataset

# ## 1. Import Necessary Libraries

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasClassifier  # Use scikeras KerasClassifier

# ## 2. Load and Explore the Dataset

# Load the dataset
data = pd.read_csv("Alphabets_data.csv")

# Display basic information about the dataset
print("Dataset Info:")
print(data.info())
print("\nDataset Head:")
print(data.head())

# ## 3. Data Preprocessing

# Separate features (X) and target (y)
X = data.drop("letter", axis=1)  # Drop the target column "letter" to get feature matrix
y = data["letter"]  # Target variable

# Encode the target labels as integers for ANN compatibility
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Normalize the feature data for better model performance
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the dataset into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_encoded, test_size=0.2, random_state=42)

# ## 4. Define the ANN Model

# Function to create a customizable ANN model
def create_model(learning_rate=0.001, hidden_units=32, activation="relu"):
    model = Sequential()
    # Input layer with one hidden layer
    model.add(Dense(hidden_units, input_shape=(X_train.shape[1],), activation=activation))
    # Second hidden layer
    model.add(Dense(hidden_units, activation=activation))
    # Output layer with number of units equal to the number of classes and softmax activation
    model.add(Dense(len(np.unique(y_encoded)), activation="softmax"))
    
    # Compile the model with Adam optimizer and categorical crossentropy loss
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

# ## 5. Hyperparameter Tuning

# Wrap the model in a KerasClassifier from scikeras
model = KerasClassifier(model=create_model, verbose=0)

# Define hyperparameters to tune
param_grid = {
    'epochs': [50, 100],
    'batch_size': [10, 20],
    'model__learning_rate': [0.001, 0.01],  # Use "model__" prefix for model hyperparameters
    'model__hidden_units': [32, 64],
    'model__activation': ['relu', 'tanh']
}

# Perform grid search with cross-validation to find the best hyperparameters
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='accuracy')
grid_result = grid.fit(X_train, y_train)

# Display the best hyperparameters and their corresponding cross-validation accuracy
print(f"Best Hyperparameters: {grid_result.best_params_}")
print(f"Best Cross-validation Accuracy: {grid_result.best_score_}")

# ## 6. Model Evaluation

# Use the best model from grid search to make predictions on the test set
best_model = grid_result.best_estimator_
y_pred = best_model.predict(X_test)

# Calculate and display evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("\nModel Evaluation on Test Set:")
print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 17 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   letter  20000 non-null  object
 1   xbox    20000 non-null  int64 
 2   ybox    20000 non-null  int64 
 3   width   20000 non-null  int64 
 4   height  20000 non-null  int64 
 5   onpix   20000 non-null  int64 
 6   xbar    20000 non-null  int64 
 7   ybar    20000 non-null  int64 
 8   x2bar   20000 non-null  int64 
 9   y2bar   20000 non-null  int64 
 10  xybar   20000 non-null  int64 
 11  x2ybar  20000 non-null  int64 
 12  xy2bar  20000 non-null  int64 
 13  xedge   20000 non-null  int64 
 14  xedgey  20000 non-null  int64 
 15  yedge   20000 non-null  int64 
 16  yedgex  20000 non-null  int64 
dtypes: int64(16), object(1)
memory usage: 2.6+ MB
None

Dataset Head:
  letter  xbox  ybox  width  height  onpix  xbar  ybar  x2bar  y2bar  xybar  \
0      T     2     8      3       5      1 

ValueError: 
All the 96 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
96 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\shakt\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\shakt\AppData\Local\Programs\Python\Python311\Lib\site-packages\scikeras\wrappers.py", line 1501, in fit
    super().fit(X=X, y=y, sample_weight=sample_weight, **kwargs)
  File "c:\Users\shakt\AppData\Local\Programs\Python\Python311\Lib\site-packages\scikeras\wrappers.py", line 770, in fit
    self._fit(
  File "c:\Users\shakt\AppData\Local\Programs\Python\Python311\Lib\site-packages\scikeras\wrappers.py", line 928, in _fit
    self._ensure_compiled_model()
  File "c:\Users\shakt\AppData\Local\Programs\Python\Python311\Lib\site-packages\scikeras\wrappers.py", line 439, in _ensure_compiled_model
    if not self.model_.compiled:
           ^^^^^^^^^^^^^^^^^^^^
AttributeError: 'Sequential' object has no attribute 'compiled'
