In [4]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, precision_recall_curve, classification_report, roc_auc_score


results_dir = "results"
if not os.path.exists(results_dir):
    os.makedirs(results_dir)


df = load_iris()
iris_data = pd.DataFrame(df.data, columns=df.feature_names)
iris_data['target'] = df.target


iris_data = iris_data[iris_data['target'].isin([0, 1])]


X = iris_data.drop('target', axis=1)
y = iris_data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


model1 = LogisticRegression(solver='liblinear')
model1.fit(X_train, y_train)
y_pred1 = model1.predict(X_test)


model2 = LogisticRegression(C=0.5, max_iter=200, solver='liblinear')
model2.fit(X_train, y_train)
y_pred2 = model2.predict(X_test)


def plot_confusion_matrix(cm, model_name, file_path):
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)
    plt.title(f"Confusion Matrix - {model_name}")
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.savefig(file_path)
    plt.close()


def plot_precision_recall_curve(y_test, y_scores, model_name, file_path):
    precision, recall, _ = precision_recall_curve(y_test, y_scores)
    plt.figure(figsize=(6, 4))
    plt.plot(recall, precision, color='b', label=f'{model_name} (AUC = {roc_auc_score(y_test, y_scores):.2f})')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title(f"Precision-Recall Curve - {model_name}")
    plt.legend(loc='best')
    plt.savefig(file_path)
    plt.close()


cm1 = confusion_matrix(y_test, y_pred1)
y_probs1 = model1.predict_proba(X_test)[:, 1]  

plot_confusion_matrix(cm1, "Model 1", os.path.join(results_dir, "confusion_matrix_model1.png"))
plot_precision_recall_curve(y_test, y_probs1, "Model 1", os.path.join(results_dir, "precision_recall_model1.png"))


cm2 = confusion_matrix(y_test, y_pred2)
y_probs2 = model2.predict_proba(X_test)[:, 1] 


plot_confusion_matrix(cm2, "Model 2", os.path.join(results_dir, "confusion_matrix_model2.png"))
plot_precision_recall_curve(y_test, y_probs2, "Model 2", os.path.join(results_dir, "precision_recall_model2.png"))


print("Model 1 Classification Report:\n", classification_report(y_test, y_pred1))
print("Model 2 Classification Report:\n", classification_report(y_test, y_pred2))


auc1 = roc_auc_score(y_test, y_probs1)
auc2 = roc_auc_score(y_test, y_probs2)

print(f"Model 1 AUC: {auc1:.2f}")
print(f"Model 2 AUC: {auc2:.2f}")


if auc1 > auc2:
    print("Model 1 is better based on AUC score.")
else:
    print("Model 2 is better based on AUC score.")


Model 1 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

Model 2 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        12
           1       1.00      1.00      1.00         8

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

Model 1 AUC: 1.00
Model 2 AUC: 1.00
Model 2 is better based on AUC score.


In [5]:
import os
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (confusion_matrix, 
                           precision_recall_curve,
                           PrecisionRecallDisplay,
                           classification_report)

# 1. Setup results directory
os.makedirs('results', exist_ok=True)

# 2. Load and prepare Iris data (binary classification)
iris = load_iris()
X = iris.data[iris.target != 2]  # Exclude virginica
y = iris.target[iris.target != 2]  # Binary: setosa(0) vs versicolor(1)

# 3. Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 4. Train two models
model1 = LogisticRegression(solver='liblinear').fit(X_train, y_train)
model2 = LogisticRegression(C=0.1, solver='liblinear', max_iter=200).fit(X_train, y_train)

# 5. Evaluation function
def evaluate_model(model, X_test, y_test, model_name):
    # Predictions
    y_pred = model.predict(X_test)
    y_proba = model.predict_proba(X_test)[:, 1]
    
    # Confusion Matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure()
    plt.imshow(cm, cmap='Blues')
    plt.colorbar()
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.savefig(f'results/{model_name}_confusion.png')
    plt.close()
    
    # Precision-Recall Curve
    precision, recall, _ = precision_recall_curve(y_test, y_proba)
    disp = PrecisionRecallDisplay(precision=precision, recall=recall)
    disp.plot()
    plt.title(f'{model_name} Precision-Recall Curve')
    plt.savefig(f'results/{model_name}_pr_curve.png')
    plt.close()
    
    # Classification Report
    report = classification_report(y_test, y_pred)
    with open(f'results/{model_name}_report.txt', 'w') as f:
        f.write(report)
    
    return report

# 6. Evaluate both models
print("Model 1 Evaluation:")
print(evaluate_model(model1, X_test, y_test, 'model1'))

print("\nModel 2 Evaluation:")
print(evaluate_model(model2, X_test, y_test, 'model2'))

# 7. Save models
import joblib
joblib.dump(model1, 'results/model1.pkl')
joblib.dump(model2, 'results/model2.pkl')

Model 1 Evaluation:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30


Model 2 Evaluation:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



['results/model2.pkl']

In [15]:
plt.figure(figsize=(6, 4))
y_probs1 = model1.predict_proba(X_test)[:,1]
precision, recall,_ = precision_recall_curve(y_test, y_probs1)
plt.plot(recall, precision, color='b', label=f'Model 1 (AUC = {roc_auc_score(y_test, y_probs1):.2f})')
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title(f"Precision-Recall Curve ")
plt.legend(loc='best')
plt.close()