In [None]:
# libraries
import pandas as pd
import optuna
from sklearn.ensemble import RandomForestClassifier
from results import Fold
from evaluation import Experiment, GoalOptimizationRandomForest
from method import ScikitLearnMachineLearning
from work_methods import parameters_graph, show_results

# global categories
numbers_names = ['0','1','2','3','4','5','6','7','8','9']
numbers_values = [x for x in range(0,10,1)]
numbers = dict(zip(numbers_values,numbers_names))

## Use the classifier methods without any input filter (2ND step)

In [None]:
file_name = 'mnist_evaluation.csv'

# recover DataFrame
df_data = pd.DataFrame(pd.read_csv(file_name))

# generate fold and p experiment
folds = Fold.generate_k_folds(df_data,val_k=5,col_category='y_class',
                            num_threshold=1,seed=1,
                            num_folds_validation=3,num_threshold_validation=2)

### Random Forrest Classifier 28x28

In [None]:
clf_rf = RandomForestClassifier(random_state=1)
ml_method = ScikitLearnMachineLearning(clf_rf)
exp = Experiment(folds,ml_method, GoalOptimizationRandomForest, num_trials=10,
                    sampler=optuna.samplers.TPESampler(seed=1, n_startup_trials=3))
exp.calculate_results()

In [None]:
studdy_fold_0 = exp.studies_per_fold[0]
studdy_fold_0.trials_dataframe().sort_values("value",ascending=False)

In [None]:
parameters_graph(exp.studies_per_fold[0].trials)

In [None]:
show_results("Random Forest 28x28", numbers, exp)

## Use infogain as input filter (3RD step)

In [None]:
file_name = 'mnist_sample_feature_select.csv'

# recover DataFrame
df_data = pd.DataFrame(pd.read_csv(file_name))

# generate fold and p experiment
folds = Fold.generate_k_folds(df_data,val_k=5,col_category='y_class',
                            num_threshold=1,seed=1,
                            num_folds_validation=3,num_threshold_validation=2)

### Random Forest Classifier 20x20

In [None]:
clf_rf = RandomForestClassifier(random_state=1)
ml_method = ScikitLearnMachineLearning(clf_rf)
exp = Experiment(folds,ml_method, GoalOptimizationRandomForest, num_trials=10,
                    sampler=optuna.samplers.TPESampler(seed=1, n_startup_trials=3))
exp.calculate_results()

In [None]:
studdy_fold_0 = exp.studies_per_fold[0]
studdy_fold_0.trials_dataframe().sort_values("value",ascending=False)

In [None]:
parameters_graph(exp.studies_per_fold[0].trials)

In [None]:
show_results("Random Forest 20x20", numbers, exp)

## Transform feature as input filter (4TH step)

In [None]:
file_name = 'mnist_sample_binary.csv'

# recover DataFrame
df_data = pd.DataFrame(pd.read_csv(file_name))

# generate fold and p experiment
folds = Fold.generate_k_folds(df_data,val_k=5,col_category='y_class',
                            num_threshold=1,seed=1,
                            num_folds_validation=3,num_threshold_validation=2)

### Random Forest Classifier 28x28 Binary Input

In [None]:
clf_rf = RandomForestClassifier(random_state=1)
ml_method = ScikitLearnMachineLearning(clf_rf)
exp = Experiment(folds,ml_method, GoalOptimizationRandomForest, num_trials=10,
                    sampler=optuna.samplers.TPESampler(seed=1, n_startup_trials=3))
exp.calculate_results()

In [None]:
studdy_fold_0 = exp.studies_per_fold[0]
studdy_fold_0.trials_dataframe().sort_values("value",ascending=False)

In [None]:
parameters_graph(exp.studies_per_fold[0].trials)

In [None]:
show_results("Random Forest 28x28 Binary Input", numbers, exp)

## Mix input filters (5TH step)

In [None]:
file_name = 'mnist_sample_feature_select_binary.csv'

# recover DataFrame
df_data = pd.DataFrame(pd.read_csv(file_name))

# generate fold and p experiment
folds = Fold.generate_k_folds(df_data,val_k=5,col_category='y_class',
                            num_threshold=1,seed=1,
                            num_folds_validation=3,num_threshold_validation=2)

### Random Forest Classifier 20x20 Binary Input

In [None]:
clf_rf = RandomForestClassifier(random_state=1)
ml_method = ScikitLearnMachineLearning(clf_rf)
exp = Experiment(folds,ml_method, GoalOptimizationRandomForest, num_trials=10,
                    sampler=optuna.samplers.TPESampler(seed=1, n_startup_trials=3))
exp.calculate_results()

In [None]:
studdy_fold_0 = exp.studies_per_fold[0]
studdy_fold_0.trials_dataframe().sort_values("value",ascending=False)

In [None]:
parameters_graph(exp.studies_per_fold[0].trials)

In [None]:
show_results("Random Forest 20x20 Binary Input", numbers, exp)