In [21]:
!mkdir bases

mkdir: cannot create directory ‘bases’: File exists


In [22]:
from sklearn.datasets import make_blobs
import pandas as pd
for i in range(5):
    X, y = make_blobs(n_samples=100, centers=3, n_features=5)
    X = pd.DataFrame(X)
    
    # Define the mapping of values to replacements
    replacement_mapping = {
        0: 'A',
        1: 'B',
        2: 'C'
    }

    # Replace values in the list
    y = [replacement_mapping.get(value, value) for value in y]
    X['class'] = y
    X.to_csv(f'bases/df{i}.csv',index=False)

In [23]:
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier as knn
from experimenter import ModelRunner

#Configurando

In [24]:
runner = ModelRunner()

#Bases
runner.data.add_datasets('bases/')

#Splits
runner.splits.add_holdout([.3,.25])
#runner.splits.add_fold([5,6])

#Modelos
runner.models.add_models(['DecisionTreeClassifier',
                         'KNeighborsClassifier',
                         'MLPClassifier',
                         'GaussianNB',
                         'LinearSVC',
                         'AdaBoostClassifier',
                         'RandomForestClassifier',
                         BaggingClassifier(estimator=knn())]
                         )

#Métricas
runner.metrics.add_score(['matrix',
                          'acc',
                          'f1_micro'] #Equivalente a  f1_score(y_true, y_pred, average='micro'), f1_score só funciona para binários
                          )

#Seeds
runner.random.add_model_seed([42,43])
#runner.random.add_split_seed([10,12])

runner.summarize()

Number of dataframes: 5
Metrics used: confusion_matrix, accuracy_score, f1_score
Split method:
     - Holdouts: [0.3, 0.25]
     - Folds: []
Seeds methods:
     - Each model will run 2 times witch seeds: [42, 43]
     - Each split will run 1 times witch seeds: [42]
Models used: DecisionTreeClassifier, KNeighborsClassifier, MLPClassifier, GaussianNB, LinearSVC, AdaBoostClassifier, RandomForestClassifier, BaggingClassifier


In [25]:
#Acessando um dataset
runner.data.datasets[0].get_target()

0     1
1     1
2     2
3     1
4     2
     ..
95    1
96    1
97    2
98    1
99    0
Name: class, Length: 100, dtype: int64

# Executando

Para os dados não serem perdidos ao acontecer um erro save_each_evaluation salva atualiza o arquivo result.pkl a cada execução.

In [26]:
runner.run(save_each_evaluation=True)



In [27]:
runner.results

Unnamed: 0,df_name,model_name,model_params,split_type,split_value,model_seed,split_seed,confusion_matrix,accuracy_score,f1_score
0,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.30,42.0,42,"[[10, 0, 0], [0, 11, 0], [0, 0, 9]]",1.0,1.0
1,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.30,43.0,42,"[[10, 0, 0], [0, 11, 0], [0, 0, 9]]",1.0,1.0
2,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.25,42.0,42,"[[9, 0, 0], [0, 8, 0], [0, 0, 8]]",1.0,1.0
3,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.25,43.0,42,"[[9, 0, 0], [0, 8, 0], [0, 0, 8]]",1.0,1.0
4,bases/df3.csv,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",holdout,0.30,,42,"[[10, 0, 0], [0, 11, 0], [0, 0, 9]]",1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
135,bases/df4.csv,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",holdout,0.25,43.0,42,"[[6, 0, 0], [0, 12, 0], [0, 0, 7]]",1.0,1.0
136,bases/df4.csv,BaggingClassifier,"{'base_estimator': 'deprecated', 'bootstrap': ...",holdout,0.30,42.0,42,"[[8, 0, 0], [0, 14, 0], [0, 0, 8]]",1.0,1.0
137,bases/df4.csv,BaggingClassifier,"{'base_estimator': 'deprecated', 'bootstrap': ...",holdout,0.30,43.0,42,"[[8, 0, 0], [0, 14, 0], [0, 0, 8]]",1.0,1.0
138,bases/df4.csv,BaggingClassifier,"{'base_estimator': 'deprecated', 'bootstrap': ...",holdout,0.25,42.0,42,"[[6, 0, 0], [0, 12, 0], [0, 0, 7]]",1.0,1.0


In [28]:
pd.read_pickle("result.pkl")

Unnamed: 0,df_name,model_name,model_params,split_type,split_value,model_seed,split_seed,confusion_matrix,accuracy_score,f1_score
0,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.30,42.0,42,"[[10, 0, 0], [0, 11, 0], [0, 0, 9]]",1.0,1.0
1,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.30,43.0,42,"[[10, 0, 0], [0, 11, 0], [0, 0, 9]]",1.0,1.0
2,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.25,42.0,42,"[[9, 0, 0], [0, 8, 0], [0, 0, 8]]",1.0,1.0
3,bases/df3.csv,DecisionTreeClassifier,"{'ccp_alpha': 0.0, 'class_weight': None, 'crit...",holdout,0.25,43.0,42,"[[9, 0, 0], [0, 8, 0], [0, 0, 8]]",1.0,1.0
4,bases/df3.csv,KNeighborsClassifier,"{'algorithm': 'auto', 'leaf_size': 30, 'metric...",holdout,0.30,,42,"[[10, 0, 0], [0, 11, 0], [0, 0, 9]]",1.0,1.0
...,...,...,...,...,...,...,...,...,...,...
135,bases/df4.csv,RandomForestClassifier,"{'bootstrap': True, 'ccp_alpha': 0.0, 'class_w...",holdout,0.25,43.0,42,"[[6, 0, 0], [0, 12, 0], [0, 0, 7]]",1.0,1.0
136,bases/df4.csv,BaggingClassifier,"{'base_estimator': 'deprecated', 'bootstrap': ...",holdout,0.30,42.0,42,"[[8, 0, 0], [0, 14, 0], [0, 0, 8]]",1.0,1.0
137,bases/df4.csv,BaggingClassifier,"{'base_estimator': 'deprecated', 'bootstrap': ...",holdout,0.30,43.0,42,"[[8, 0, 0], [0, 14, 0], [0, 0, 8]]",1.0,1.0
138,bases/df4.csv,BaggingClassifier,"{'base_estimator': 'deprecated', 'bootstrap': ...",holdout,0.25,42.0,42,"[[6, 0, 0], [0, 12, 0], [0, 0, 7]]",1.0,1.0
