In [34]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, mean_squared_error, r2_score
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.datasets import load_wine


df_wine= load_wine()
X = pd.DataFrame(df_wine.data, columns=df_wine.feature_names)
y = pd.Series(df_wine.target, name='Wine')

In [35]:
X.head()


Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [36]:
y.head()

Unnamed: 0,Wine
0,0
1,0
2,0
3,0
4,0


In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [38]:
df_clf = DecisionTreeClassifier(random_state=42)
df_clf.fit(X_train, y_train)


In [39]:
y_preds_t = df_clf.predict(X_test)
y_preds_t

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 1, 1,
       2, 0, 1, 1, 2, 0, 1, 0, 0, 2])

In [40]:
f1_tree = f1_score(y_test, y_preds_t, average='weighted')
f1_tree

0.9628353590455226

In [41]:
rf_clf = RandomForestClassifier(random_state=42)
rf_clf.fit(X_train, y_train)
y_pred_rf = rf_clf.predict(X_test)
y_pred_rf

array([0, 0, 2, 0, 1, 0, 1, 2, 1, 2, 0, 2, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       1, 2, 2, 2, 1, 1, 1, 0, 0, 1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 1, 1,
       2, 0, 1, 1, 2, 0, 1, 0, 0, 2])

In [42]:
f1_rf = f1_score(y_test, y_pred_rf, average='weighted')
f1_rf


1.0

In [43]:
rf_params = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10]
}
# Perform GridSearchCV on Random Forest Classifier
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42),
    param_grid=rf_params,
    scoring='f1_weighted',
    cv=5
)
grid_search.fit(X_train, y_train)

In [44]:


best_rf_clf = grid_search.best_estimator_
print("Best parameters for Random Forest Classifier:", grid_search.best_params_)
print("Best F1 score from GridSearchCV:", grid_search.best_score_)


y_pred_best_rf = best_rf_clf.predict(X_test)
f1_best_rf = f1_score(y_test, y_pred_best_rf, average='weighted')
print("F1 score for the best Random Forest Classifier on test set:", f1_best_rf)

Best parameters for Random Forest Classifier: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 50}
Best F1 score from GridSearchCV: 0.9680809081527346
F1 score for the best Random Forest Classifier on test set: 1.0


In [45]:

dt_reg = DecisionTreeRegressor(random_state=42)
dt_reg.fit(X_train, y_train)
y_pred_dt_reg = dt_reg.predict(X_test)


rf_reg = RandomForestRegressor(random_state=42)
rf_reg.fit(X_train, y_train)
y_pred_rf_reg = rf_reg.predict(X_test)


mse_dt = mean_squared_error(y_test, y_pred_dt_reg)
r2_dt = r2_score(y_test, y_pred_dt_reg)
mse_rf = mean_squared_error(y_test, y_pred_rf_reg)
r2_rf = r2_score(y_test, y_pred_rf_reg)

print("Decision Tree Regressor - MSE:", mse_dt, "R2:", r2_dt)
print("Random Forest Regressor - MSE:", mse_rf, "R2:", r2_rf)

Decision Tree Regressor - MSE: 0.14814814814814814 R2: 0.7541263517359135
Random Forest Regressor - MSE: 0.057238888888888916 R2: 0.9050036425725668


In [46]:
#  hyperparameter space for Random Forest Regressor
rf_reg_params = {
    'n_estimators': [10, 50, 100, 200],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

# Perform RandomizedSearchCV
random_search = RandomizedSearchCV(estimator=RandomForestRegressor(random_state=42),param_distributions=rf_reg_params,
    n_iter=20,
    scoring='neg_mean_squared_error',
    cv=5,
    random_state=42
)
random_search.fit(X_train, y_train)

In [47]:

best_rf_reg = random_search.best_estimator_
print("Best parameters for Random Forest Regressor:", random_search.best_params_)
print("Best negative MSE from RandomizedSearchCV:", random_search.best_score_)

y_pred_best_rf_reg = best_rf_reg.predict(X_test)
mse_best_rf = mean_squared_error(y_test, y_pred_best_rf_reg)
r2_best_rf = r2_score(y_test, y_pred_best_rf_reg)
print("Best Random Forest Regressor - MSE:", mse_best_rf, "R2:", r2_best_rf)


Best parameters for Random Forest Regressor: {'n_estimators': 50, 'min_samples_split': 2, 'min_samples_leaf': 1, 'max_features': 'log2', 'max_depth': 20}
Best negative MSE from RandomizedSearchCV: -0.03402733333333333
Best Random Forest Regressor - MSE: 0.03526666666666666 R2: 0.9414697780307342
