# Find the appropriate parameters that improve a given score for a selected sampling method

Fill the parameters.xlsx file with the parameters needed for each sampling method for a corresponding score and dataset. 


In [1]:
#import oversampling methods
from oversampling_methods.assembled_smote import assembled_smote
from oversampling_methods.cluster_smote import cluster_smote
from oversampling_methods.cure_smote import cure_smote
from oversampling_methods.smote import smote
from oversampling_methods.dbsmote import dbsmote
from oversampling_methods.de_oversampling import de_oversampling
from oversampling_methods.gsmote import gsmote
from oversampling_methods.kmeans_smote import kmeans_smote
from oversampling_methods.lee import lee
from oversampling_methods.polynom_fit_smote import polynom_fit_smote
from oversampling_methods.prowsyn import prowsyn
from oversampling_methods.smobd import smobd
from oversampling_methods.smote_ipf import smote_ipf
from oversampling_methods.wssmote import wssmote
## Add here the sampling method you want to try

#import algorithm
from tools.generate_tab import generate_tab_to_excel

#import machine learning classifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from smote_variants import MLPClassifierWrapper
from sklearn.ensemble import AdaBoostClassifier

#warnings
import warnings
warnings.filterwarnings("ignore")
import itertools

In [2]:
## Sampling strategies
sampling_strategies = [assembled_smote, cluster_smote, cure_smote, smote, dbsmote, de_oversampling, gsmote, lee, 
                       polynom_fit_smote, prowsyn, smobd, smote_ipf, wssmote]

## Scores to compare
score_strategies = ['auc', 'gacc', 'p20', 'accuracy', 'f1', 'ppv', 'npv']

## Given Datasets
name_file = 'presev_before'

## Classifier
list_classifier = [AdaBoostClassifier()]
# list_classifier= []
for x in itertools.product(['relu', 'logistic'], [1.0, 0.5, 0.1]):
    list_classifier.append(MLPClassifierWrapper(activation= x[0], hidden_layer_fraction= x[1]))
for x in itertools.product([3, 5, 7], ['distance'], [1, 2, 3]):
    list_classifier.append(KNeighborsClassifier(n_neighbors= x[0], weights= x[1], p= x[2]))
for x in itertools.product(['gini', 'entropy'], [None, 5]):
    list_classifier.append(DecisionTreeClassifier(criterion= x[0], max_depth= x[1]))   
list_classifier.append(CalibratedClassifierCV(base_estimator=LinearSVC(C=1.0, penalty='l2', loss= 'squared_hinge', dual= False)))

In [None]:
## Generate parameters
generate_tab_to_excel(sampling_strategies, score_strategies, name_file, list_classifier)