# Searching for the optimal parameters

In [None]:
from xgboost import XGBClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, Normalizer
from sklearn import metrics
import numpy as np
import pandas as pd
import pickle
from sklearn.datasets import fetch_openml

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
# import the mnist dataset
mnist = fetch_openml('mnist_784', version=1)
mnist.keys()

In [None]:
# separate features and target
X, y = mnist["data"], mnist["target"]
print(X.shape)
print(y.shape)

In [None]:
# train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                       test_size=0.2, 
                                       random_state=42)

## Preprocessing

In [None]:
# standardize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Random Forest

In [None]:
# modeling: random forest (arbitrary hyperparameters)
model = RandomForestClassifier()

In [None]:
# define your parameter grid 
param_grid = {
                'max_depth':[5,10,15], 
                'criterion':['entropy','gini'], 
                'min_samples_leaf':[6,10,20],
                'class_weight':['balanced', None],
                'n_estimators':[50,100,200]
             }

In [None]:
# establish cross-validation and gridsearch
rf_grid = GridSearchCV(model, param_grid, cv=10, verbose=1,n_jobs=-1)

In [None]:
# conduct the search
rf_grid.fit(X_train_scaled, y_train)

In [None]:
# see the best parameters and their score
print("Best parameters:")
print(rf_search.best_params_)
print("Best score in grid search:")
print(rf_search.best_score_)
print("best model from grid search:")
print(rf_search.score(X_test_scaled, y_test))

In [None]:
# predict
y_preds=rf_search.predict(X_test_scaled)
print(list(y_preds[:10]))
print(list(y_test[:10]))

In [None]:
# evaluate
print('Accuracy:', metrics.accuracy_score(y_test, y_preds))
print('Precision:', metrics.precision_score(y_test, y_preds,average='macro'))
print('Recall:', metrics.recall_score(y_test, y_preds,average='macro'))
print('F1 Score:', metrics.f1_score(y_test, y_preds,average='macro'))