In [None]:
# import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.callbacks import EarlyStopping
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'pandas'

In [8]:
def load_and_preprocess_data():
    """
    Load water quality dataset and preprocess it for neural network training
    """
    # Load the dataset
    df = pd.read_csv('water_potability.csv')
    
    # Handle missing values (if any)
    df = df.fillna(df.median())
    
    # Separate features and target
    X = df.drop('Potability', axis=1)
    y = df['Potability']
    
    # Scale features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    
    return X_scaled, y.values

In [10]:
def create_water_quality_model(input_shape, learning_rate=0.01, dropout_rate=0.3, l2_lambda=0.001):
    """
    Create a neural network model for water potability classification
    
    Args:
        input_shape (int): Number of input features
        learning_rate (float): Learning rate for SGD optimizer
        dropout_rate (float): Dropout rate for regularization
        l2_lambda (float): L2 regularization strength
    """
    model = Sequential([
        # Input layer with L2 regularization
        Dense(64, activation='relu', 
              input_shape=(input_shape,), 
              kernel_regularizer=l2(l2_lambda)),
        
        # Dropout layer for preventing overfitting
        Dropout(dropout_rate),
        
        # Hidden layer with L2 regularization
        Dense(32, activation='relu', 
              kernel_regularizer=l2(l2_lambda)),
        
        # Another dropout layer
        Dropout(dropout_rate),
        
        # Output layer (binary classification)
        Dense(1, activation='sigmoid')
    ])
    
    # Compile the model with Stochastic Gradient Descent
    optimizer = SGD(learning_rate=learning_rate, momentum=0.9)
    
    model.compile(
        optimizer=optimizer,
        loss='binary_crossentropy',
        metrics=['accuracy', 'precision', 'recall', tf.keras.metrics.AUC()]
    )
    
    return model

Training and Evaluation Function

In [11]:
def train_and_evaluate_model(X, y, test_size=0.3, random_state=42):
    """
   Split data, train the model, and evaluate its performance
    """
    # Split the data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=test_size, random_state=random_state, stratify=y
    )
    
    # Further split validation set
    X_train, X_val, y_train, y_val = train_test_split(
        X_train, y_train, test_size=0.2, random_state=random_state, stratify=y_train
    )
    
    # Early Stopping Callback
    early_stopping = EarlyStopping(
        monitor='val_loss',  
        patience=30,         
        restore_best_weights=True  
    )
    
    # Create and train the model
    model = create_water_quality_model(
        input_shape=X_train.shape[1], 
        learning_rate=0.01,     
        dropout_rate=0.3,       
        l2_lambda=0.001         
    )
    
    # Train the model
    history = model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        epochs=200,             
        batch_size=32,          
        callbacks=[early_stopping],
        verbose=0               
    )
    
    # Evaluate the model
    train_metrics = model.evaluate(X_train, y_train, verbose=0)
    val_metrics = model.evaluate(X_val, y_val, verbose=0)
    test_metrics = model.evaluate(X_test, y_test, verbose=0)
    
    # Compute F1 Score
    y_pred = (model.predict(X_test) > 0.5).astype(int)
    f1_score = tf.keras.metrics.F1Score()(y_test, y_pred)
    
    # Plotting Training History
    plt.figure(figsize=(10, 5))
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Model Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()
    
    plt.subplot(1, 2, 2)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Model Accuracy')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()
    plt.tight_layout()
    plt.show()
    
    # Return comprehensive results
    return {
        'model': model,
        'train_metrics': {
            'loss': train_metrics[0],
            'accuracy': train_metrics[1],
            'precision': train_metrics[2],
            'recall': train_metrics[3],
            'auc': train_metrics[4]
        },
        'validation_metrics': {
            'loss': val_metrics[0],
            'accuracy': val_metrics[1],
            'precision': val_metrics[2],
            'recall': val_metrics[3],
            'auc': val_metrics[4]
        },
        'test_metrics': {
            'loss': test_metrics[0],
            'accuracy': test_metrics[1],
            'precision': test_metrics[2],
            'recall': test_metrics[3],
            'auc': test_metrics[4]
        },
        'f1_score': f1_score.numpy(),
        'history': history
    }

In [12]:
def main():
    # Load and preprocess data
    X, y = load_and_preprocess_data('water_potability.csv')
    
    # Train and evaluate the model
    results = train_and_evaluate_model(X, y)
    
    # Print Metrics
    print("\n--- Model Performance Metrics ---")
    print("\nTraining Metrics:")
    for metric, value in results['train_metrics'].items():
        print(f"{metric.capitalize()}: {value:.4f}")
    
    print("\nValidation Metrics:")
    for metric, value in results['validation_metrics'].items():
        print(f"{metric.capitalize()}: {value:.4f}")
    
    print("\nTest Metrics:")
    for metric, value in results['test_metrics'].items():
        print(f"{metric.capitalize()}: {value:.4f}")
    
    print(f"\nF1 Score: {results['f1_score']:.4f}")

In [13]:
if __name__ == '__main__':
    main()

TypeError: load_and_preprocess_data() takes 0 positional arguments but 1 was given

Training Summary Table


<br>
| Parameter            | Value               |<br>
|---------------------|---------------------|<br>
| Regularizer         | L2 (lambda=0.001)   |<br>
| Optimizer           | Stochastic Gradient Descent |<br>
| Early Stopping      | Monitor val_loss, patience=30 |<br>
| Dropout Rate        | 0.3                 |<br>
| Learning Rate       | 0.01                |<br>
