In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
)
import mlflow
import mlflow.sklearn
import os

# Load your own dataset (replace 'your_dataset.csv' with your file path)
dataset_path = "Frequency Analysis Dataset/Dataset.csv"
df = pd.read_csv(dataset_path)

# Assume the last column is the target variable
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Create a Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_classifier.predict(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, labels=["P", "C"], pos_label="P")
recall = recall_score(y_test, y_pred, labels=["P", "C"], pos_label="P")
f1 = f1_score(y_test, y_pred, labels=["P", "C"], pos_label="P")
roc_auc = roc_auc_score(y_test, rf_classifier.predict_proba(X_test)[:, 1])
pr_auc = average_precision_score(
    y_test, rf_classifier.predict_proba(X_test)[:, 1], pos_label="P"
)

# Start MLflow run
with mlflow.start_run():
    # Log the dataset
    mlflow.log_param("dataset_path", dataset_path)

    # Log the model
    mlflow.sklearn.log_model(rf_classifier, "random_forest_model")

    # Log evaluation metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1", f1)
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("pr_auc", pr_auc)

# Display the results
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"AUC-ROC: {roc_auc:.4f}")
print(f"AUC-PR: {pr_auc:.4f}")



Accuracy: 0.6667
Precision: 0.6667
Recall: 1.0000
F1 Score: 0.8000
AUC-ROC: 0.3750
AUC-PR: 0.6667
