In [3]:
# Import packages and data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Create a dataframe
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['target'] = iris.target
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
# Train/Test split
from sklearn.model_selection import train_test_split
X = df.drop(columns="target").values
y = df["target"].values

X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2,        # 20% of the data reserved for the test set
    random_state=412,      # Fix the random seed for reproducibility
)

In [None]:
# Find the best degree for LogisticRegression
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.metrics import accuracy_score, classification_report
from sklearn.pipeline import Pipeline

results = {}

for degree in range(1, 5):  # 1, 2, 3, 4
    pipeline = Pipeline([
        ("poly", PolynomialFeatures(degree=degree, include_bias=False)),
        ("scaler", StandardScaler()),
        ("log_reg", LogisticRegression(max_iter=10000))
    ])
    
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)
    
    accuracy = accuracy_score(y_test, y_pred)
    results[degree] = accuracy
    print(f"Accuracy for Degree {degree}: {accuracy:.4f}")

# Show the results
best_degree = max(results, key=results.get)
print("\nBest degree is", best_degree, "with accuracy:", results[best_degree])

Accuracy for Degree 1: 0.9333
Accuracy for Degree 2: 0.9667
Accuracy for Degree 3: 0.9667
Accuracy for Degree 4: 0.9667

Best degree is 2 with accuracy: 0.9666666666666667
