In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('features.csv')
x = df.drop(columns=['target','Medu', 'health', 'Dalc', 'Walc', 'traveltime', 'Mjob', 'internet', 'Pstatus', 'goout', 'Fjob', 'guardian', 'activities'], axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
rf = RandomForestRegressor(random_state=42)

param_grid = {
    'n_estimators': [20, 30, 40, 50, 100, 150, 200],
    'max_depth': [3, 5, 10, 15, 20],
    'min_samples_split': [1, 2, 5],
    'min_samples_leaf': [1, 2],
    'max_features': ['sqrt', 'log2', '0.3'],
    'bootstrap': [True]
}

grid_search = GridSearchCV(rf, param_grid, cv=5, scoring='r2', n_jobs=-1, verbose=1)
grid_search.fit(X_train, y_train)

print(f'Melhores parâmetros: {grid_search.best_params_}')
print(f'Melhor score (CV): {grid_search.best_score_:.4f}')

Fitting 5 folds for each of 120 candidates, totalling 600 fits
Melhores parâmetros: {'bootstrap': True, 'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 5, 'n_estimators': 50}
Melhor score (CV): 0.2707
