In [None]:
# Step 1: Import Libraries
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix

# Step 2: Load and Explore Dataset
def load_data(file_path, target_column):
    data = pd.read_csv(file_path)  # Read the CSV file into a DataFrame
    X = data.drop(columns=[target_column])  # Drop the target column to get the features
    y = data[target_column]  # Extract the target variable
    return train_test_split(X, y, test_size=0.2, random_state=15)  # Split the data into training and testing sets

# Step 3: Train Random Forest Model
def train_random_forest(X_train, y_train):
    rf = RandomForestClassifier(random_state=15)  # Create an instance of RandomForestClassifier with fixed random state
    rf.fit(X_train, y_train)  # Fit the model to the training data
    return rf  # Return the trained model

# Step 4: Evaluate Model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)  # Get the model's predictions on the test data

    # Calculate accuracy
    accuracy = accuracy_score(y_test, y_pred)

    # Calculate F1 score (weighted average for imbalanced classes)
    f1 = f1_score(y_test, y_pred, average='weighted')

    # Calculate precision
    precision = precision_score(y_test, y_pred, average='weighted')

    # Calculate recall
    recall = recall_score(y_test, y_pred, average='weighted')

    # Specificity (True Negative Rate)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()  # Get the confusion matrix
    specificity = tn / (tn + fp)  # Calculate specificity

    # Display metrics
    print("Accuracy:", accuracy)
    print("F1 Score (Weighted):", f1)
    print("Precision (Weighted):", precision)
    print("Recall (Weighted):", recall)
    print("Specificity:", specificity)
    print("\nClassification Report:\n", classification_report(y_test, y_pred))  # Detailed classification report

# Step 5: Main Workflow
def run_analysis(file_path, target_column):
    # Load data and split it
    X_train, X_test, y_train, y_test = load_data(file_path, target_column)
    
    # Train the Random Forest model
    model = train_random_forest(X_train, y_train)
    
    # Evaluate the model and print metrics
    print(f"\nEvaluation Metrics for {file_path}:\n")
    evaluate_model(model, X_test, y_test)

# Step 6: Test with different datasets
file_path = './'  # Replace with actual file path of Dataset 1
target_column = 'Fire Alarm'  # Replace with the actual target column name
run_analysis(file_path, target_column)



Evaluation Metrics for ./smoke_detection_iot.csv:

Accuracy: 1.0
F1 Score (Weighted): 1.0
Precision (Weighted): 1.0
Recall (Weighted): 1.0
Specificity: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      3655
           1       1.00      1.00      1.00      8871

    accuracy                           1.00     12526
   macro avg       1.00      1.00      1.00     12526
weighted avg       1.00      1.00      1.00     12526

