In [19]:
# Import required libraries and setup warnings
import numpy as np
import pandas as pd
from scipy.io import loadmat
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

In [21]:
# Load the MATLAB file (adjust the file path as necessary)
data = loadmat("Matlab_cancer.mat")

# Extract features (X) and labels (y)
X = data['x']  # Features matrix
y = data['t']  # Labels in binary pair format

# Transpose the matrices so that each row represents one sample
X_transposed = X.T
y_transposed = y.T

# Convert labels into a DataFrame and add column names
dfy = pd.DataFrame(y_transposed, columns=["First", "Second"])

# Define a function to map the binary pair to class labels
def map_binary_to_label(row):
    if row["First"] == 1 and row["Second"] == 0:
        return 'Cancer'
    elif row["First"] == 0 and row["Second"] == 1:
        return 'Normal'
    else:
        return 'Unknown'

# Apply the mapping function to create a new label column
dfy['Label'] = dfy.apply(map_binary_to_label, axis=1)

# Convert the categorical labels into numeric values:
# Cancer --> 1, Normal --> 0 (using get_dummies with drop_first)
y_numeric = pd.get_dummies(dfy['Label'], drop_first=True)  # This gives a column named 'Cancer'

# Convert features to a DataFrame for easier manipulation
dfX = pd.DataFrame(X_transposed)

# (Optional) Combine features and labels for a quick check
df = pd.concat([dfX, y_numeric], axis=1)
assert df.isna().sum().sum() == 0, "DataFrame contains NaN values!"


In [23]:
from sklearn.model_selection import train_test_split

# Check the columns of y_numeric to see available dummy variable names
print("Dummy columns:", y_numeric.columns)

# Option 1: Invert the 'Normal' dummy variable so that:
# - Cancer: 1 (since 'Normal' dummy is 0 for Cancer)
# - Normal: 0 (since 'Normal' dummy is 1 for Normal)
y_final = 1 - y_numeric['Normal']

# Define features
X_final = dfX  # All feature columns

# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(
    X_final, y_final, test_size=0.20, random_state=1
)

print("X_train shape:", X_train.shape)
print("y_train distribution:\n", y_train.value_counts())

Dummy columns: Index(['Normal'], dtype='object')
X_train shape: (172, 100)
y_train distribution:
 Normal
1    91
0    81
Name: count, dtype: int64


In [25]:
import tensorflow as tf
from tensorflow import keras

# Build the ANN model architecture
model = keras.Sequential([
    keras.layers.Dense(60, input_dim=X_train.shape[1], activation='relu', name='Hidden_Layer'),
    keras.layers.Dense(1, activation='sigmoid', name='Output_Layer')
])

# Compile the model using binary crossentropy loss and Adam optimizer
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
# Train the model for 100 epochs
history = model.fit(X_train, y_train, epochs=100, verbose=1)


Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - accuracy: 0.7435 - loss: 0.5975
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8298 - loss: 0.4535 
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.8707 - loss: 0.3942
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.8904 - loss: 0.3436  
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.8783 - loss: 0.3186 
Epoch 6/100
[1m3/6[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 26ms/step - accuracy: 0.8438 - loss: 0.3473

In [None]:
# Evaluate the model on the test set
loss, accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"\nTest Loss     : {loss:.4f}")
print(f"Test Accuracy : {accuracy:.4f}")

# Generate predictions on the test set
y_pred_prob = model.predict(X_test).reshape(-1)
y_pred = np.round(y_pred_prob)  # Convert probabilities to 0 or 1

# Display the first 10 predictions
print("Predicted probabilities (first 10):", y_pred_prob[:10])
print("Rounded predictions (first 10):", y_pred[:10])

# Generate a classification report and confusion matrix
from sklearn.metrics import confusion_matrix, classification_report

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Plot the confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(6, 4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Confusion Matrix")
plt.xlabel("Predicted Label")
plt.ylabel("True Label")
plt.show()
