# Ensemble Methods and Hyperparameter Tuning.

**1. Implement Classification Models:**

• Train a Decision Tree Classifier and a Random Forest Classifier using scikit-learn.

• Compare the models based on their F1 scores.

In [None]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split

wine = load_wine()
X, y = wine.data, wine.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import f1_score

dt_classifier = DecisionTreeClassifier(random_state=42)
dt_classifier.fit(X_train, y_train)
y_pred_dt = dt_classifier.predict(X_test)
f1_dt = f1_score(y_test, y_pred_dt, average='weighted')
print(f"Decision Tree F1 Score: {f1_dt:.4f}")

Decision Tree F1 Score: 0.9440


In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)
y_pred_rf = rf_classifier.predict(X_test)
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')
print(f"Random Forest F1 Score: {f1_rf:.4f}")

Random Forest F1 Score: 1.0000


**2. Hyperparameter Tuning:**

• Identify three hyperparameters of the Random Forest Classifier.

• Perform hyperparameter tuning using GridSearchCV to optimize these parameters.

• Take hints from the scikit-learn documentation to guide the implementation.

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

grid_search = GridSearchCV(estimator=rf_classifier, param_grid=param_grid, cv=5, scoring='f1_weighted')
grid_search.fit(X_train, y_train)

print("Best Parameters:", grid_search.best_params_)
print("Best F1 Score:", grid_search.best_score_)

Best Parameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}
Best F1 Score: 0.9782952128219708


**3. Implement Regression Model:**

• Train a Decision Tree Regressor and a Random Forest Regressor using scikit-learn.

• Identify three parameters for Random Forest Regressio and Perform hyperparameter tuning using
RandomSearchCV to optimize these parameters.

In [None]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_squared_error

#Train a Decision Tree Regressor
dt_regressor = DecisionTreeRegressor(random_state=42)
dt_regressor.fit(X_train, y_train)
y_pred_dt_reg = dt_regressor.predict(X_test)
mse_dt_reg = mean_squared_error(y_test, y_pred_dt_reg)

#Train a Random Forest Regressor
rf_regressor = RandomForestRegressor(random_state=42)
rf_regressor.fit(X_train, y_train)
y_pred_rf_reg = rf_regressor.predict(X_test)
mse_rf_reg = mean_squared_error(y_test, y_pred_rf_reg)

print(f"Decision Tree Regressor MSE: {mse_dt_reg:.4f}")
print(f"Random Forest Regressor MSE: {mse_rf_reg:.4f}")

#Define the hyperparameters to tune for Random Forest Regressor
param_dist = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}

#Perform RandomizedSearchCV to find the best hyperparameters
random_search = RandomizedSearchCV(estimator=rf_regressor, param_distributions=param_dist, n_iter=10, cv=5, scoring='neg_mean_squared_error', random_state=42)
random_search.fit(X_train, y_train)

print(f"Best Hyperparameters: {random_search.best_params_}")
print(f"Best MSE: {-random_search.best_score_:.4f}")

Decision Tree Regressor MSE: 0.1667
Random Forest Regressor MSE: 0.0648
Best Hyperparameters: {'n_estimators': 200, 'min_samples_split': 10, 'max_depth': 30}
Best MSE: 0.0468
