In [None]:
import os
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from keras.models import Sequential
from keras.layers import BatchNormalization
from keras.regularizers import l2
from keras.regularizers import l1
from keras.layers import Dropout
from keras.layers import Dense
from keras.optimizers import Adam
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.metrics import confusion_matrix, accuracy_score
from keras.models import load_model

In [None]:
# Load the data
data = pd.read_csv('/kaggle/input/hms-harmful-brain-activity-classification/train.csv')

In [None]:
print(data.head())

In [None]:
print(data.isnull().sum())

In [None]:
print(data.dtypes)

In [None]:
# Convert 'expert_consensus' to a categorical type
data['expert_consensus'] = data['expert_consensus'].astype('category')

# Assign encoded variable back to data['expert_consensus']
data['expert_consensus'] = data['expert_consensus'].cat.codes

In [None]:
print(data.dtypes)

In [None]:
# Convert 'expert_consensus' to int64
data['expert_consensus'] = data['expert_consensus'].astype('int64')

In [None]:
print(data.dtypes)

In [None]:
# Loop through all columns in the DataFrame
for column in data.columns:
    # Check if the column is of a numerical data type
    if data[column].dtype in ['int64', 'float64']:
        # Create a histogram for the column
        plt.hist(data[column], bins=10, edgecolor='black')
        
        # Add a title and labels
        plt.title(f'Histogram of {column}')
        plt.xlabel(column)
        plt.ylabel('Frequency')
        
        # Show the plot
        plt.show()

In [None]:
# Prepare the data
X = data.drop('expert_consensus', axis=1)
y = data['expert_consensus']

In [None]:
# Encode the labels
encoder = LabelEncoder()
y = encoder.fit_transform(y)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Print the shapes of the training and testing sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

In [None]:
model = Sequential()

# Input layer
model.add(Dense(512, input_dim=X_train.shape[1], activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Hidden layer 1
model.add(Dense(512, activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Hidden layer 2
model.add(Dense(256, activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Hidden layer 3
model.add(Dense(128, activation='relu', kernel_regularizer=l2(0.01)))
model.add(BatchNormalization())
model.add(Dropout(0.5))

# Output layer
model.add(Dense(len(np.unique(y)), activation='softmax'))

# Compile the model with Adam optimizer
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer=Adam(learning_rate=0.001), 
              metrics=['accuracy'])

model.summary()


In [None]:
history = model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

In [None]:
# Save the model
model.save('my_model1.h5')

In [None]:
# Plot training & validation accuracy values
plt.figure(figsize=(12, 6))
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.figure(figsize=(12, 6))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
# Load the model from disk
model = load_model('/kaggle/working/my_model1.h5')

In [None]:
# Predicting the Test set results
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)  # Convert probabilities to class labels

In [None]:
# Making the Confusion Matrix
cm = confusion_matrix(y_test, y_pred)

In [None]:
# Visualizing the Confusion Matrix
plt.figure(figsize=(10,7))
sns.heatmap(cm, annot=True, fmt='d')
plt.xlabel('Predicted')
plt.ylabel('Truth')
plt.show()

In [None]:
# Test Accuracy
test_accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy: ", test_accuracy)

In [None]:
# Predict the classes
y_pred_probs = model.predict(X_test)
y_pred_classes = np.argmax(y_pred_probs, axis=1)

# Select 10 random indices
random_indices = np.random.choice(range(len(y_test)), 10)

# Map the class indices to the actual class labels
class_labels = {i: label for i, label in enumerate(encoder.classes_)}

# Check actual class and predicted class for the randomly selected instances
for i in random_indices:
    actual_class = class_labels[y_test[i]]
    predicted_class = class_labels[y_pred_classes[i]]
    print(f"Instance {i+1}: Actual Class - {actual_class}, Predicted Class - {predicted_class}")