In [1]:
import pandas as pd
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import joblib

iris = load_iris()
X = pd.DataFrame(iris.data, columns=iris.feature_names)
y = pd.Series(iris.target, name='target')

print(f"Dataset shape: {X.shape}")
print(f"Missing values: {X.isnull().sum().sum()}")
print(f"Target distribution:\n{y.value_counts()}")

Dataset shape: (150, 4)
Missing values: 0
Target distribution:
target
0    50
1    50
2    50
Name: count, dtype: int64


In [2]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    'LogisticRegression': LogisticRegression(max_iter=200, random_state=0),
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=0),
    'SVM': SVC(kernel='rbf', random_state=0)
}

scores = {}
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    pred = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, pred)
    scores[name] = acc
    print(f"{name}: {acc:.4f}")

best_model_name = max(scores, key=scores.get)
best_model = models[best_model_name]
print(f"\nBest model: {best_model_name} with accuracy {scores[best_model_name]:.4f}")

LogisticRegression: 1.0000
RandomForest: 1.0000
SVM: 1.0000

Best model: LogisticRegression with accuracy 1.0000


In [3]:
joblib.dump(best_model, 'model.pkl')
joblib.dump(scaler, 'scaler.pkl')
joblib.dump(iris.target_names, 'target_names.pkl')
print("Model, scaler, and target names saved!")

Model, scaler, and target names saved!
