In [20]:
from sklearn import datasets
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split, cross_val_score, KFold, LeaveOneOut
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from imblearn.over_sampling import RandomOverSampler


In [21]:

iris = datasets.load_iris()
X = iris.data
y = iris.target

In [22]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [23]:
# Feature Scaling (Standardization)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [24]:
# Handling Imbalanced Dataset using RandomOverSampler (for illustration)
ros = RandomOverSampler(random_state=0)
X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)


In [25]:
# GridSearchCV
param_grid = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 10, 20],
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=5)
grid_search.fit(X_train_resampled, y_train_resampled)

print("Best parameters for GridSearchCV:")
print(grid_search.best_params_)

Best parameters for GridSearchCV:
{'max_depth': None, 'n_estimators': 10}


In [26]:
# RandomizedSearchCV
random_search = RandomizedSearchCV(rf, param_distributions=param_grid, n_iter=10, cv=5, random_state=42)
random_search.fit(X_train_resampled, y_train_resampled)

print("Best parameters for RandomizedSearchCV:")
print(random_search.best_params_)




Best parameters for RandomizedSearchCV:
{'n_estimators': 10, 'max_depth': None}


In [27]:
# Cross-validation using K-Fold
kf = KFold(n_splits=5, shuffle=True, random_state=42)
cross_val_scores = cross_val_score(rf, X_train_resampled, y_train_resampled, cv=kf, scoring='accuracy')
print("Cross-Validation Scores (K-Fold):", cross_val_scores)


Cross-Validation Scores (K-Fold): [1.         0.81818182 0.86363636 1.         0.95454545]


In [28]:
# Cross-validation using LOOCV
loo = LeaveOneOut()
loo_scores = cross_val_score(rf, X_train_resampled, y_train_resampled, cv=loo, scoring='accuracy')
print("Cross-Validation Scores (LOOCV):", loo_scores)

Cross-Validation Scores (LOOCV): [1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 0.
 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [29]:
# Fit the model on the entire training set with best hyperparameters
best_rf = grid_search.best_estimator_
best_rf.fit(X_train_resampled, y_train_resampled)

In [30]:
# Test the model on the test set
y_pred = best_rf.predict(X_test)
print("Accuracy on the test set:", accuracy_score(y_test, y_pred))
print("Classification Report on the test set:")
print(classification_report(y_test, y_pred))

Accuracy on the test set: 1.0
Classification Report on the test set:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

