In [87]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
import warnings
import pickle

In [88]:
DATABSE_PATH = '/content/drive/MyDrive/dados/sonar.csv'
df = pd.read_csv(DATABSE_PATH)

In [89]:
df.head()

Unnamed: 0,0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032,R
0,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
1,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
2,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
3,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R
4,0.0286,0.0453,0.0277,0.0174,0.0384,0.099,0.1201,0.1833,0.2105,0.3039,...,0.0045,0.0014,0.0038,0.0013,0.0089,0.0057,0.0027,0.0051,0.0062,R


In [90]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 207 entries, 0 to 206
Data columns (total 61 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0.0200  207 non-null    float64
 1   0.0371  207 non-null    float64
 2   0.0428  207 non-null    float64
 3   0.0207  207 non-null    float64
 4   0.0954  207 non-null    float64
 5   0.0986  207 non-null    float64
 6   0.1539  207 non-null    float64
 7   0.1601  207 non-null    float64
 8   0.3109  207 non-null    float64
 9   0.2111  207 non-null    float64
 10  0.1609  207 non-null    float64
 11  0.1582  207 non-null    float64
 12  0.2238  207 non-null    float64
 13  0.0645  207 non-null    float64
 14  0.0660  207 non-null    float64
 15  0.2273  207 non-null    float64
 16  0.3100  207 non-null    float64
 17  0.2999  207 non-null    float64
 18  0.5078  207 non-null    float64
 19  0.4797  207 non-null    float64
 20  0.5783  207 non-null    float64
 21  0.5071  207 non-null    float64
 22  0.

In [91]:
df.isnull().sum().sum()

0

In [92]:
df.shape

(207, 61)

In [93]:
X = df.iloc[:,:60].values
y = df.iloc[:,60:].values

In [94]:
encoder = LabelEncoder()
y = encoder.fit_transform(y)

In [95]:
warnings.filterwarnings('ignore')

In [96]:
params = {
    'n_estimators':range(50,310,50),
    'learning_rate':np.logspace(3,-3,7),
    'algorithm':['SAMME']
}
cv = StratifiedKFold(
    n_splits = 10,
    shuffle=True,
    random_state=5
)
adaboost_test = AdaBoostClassifier()
searchCV = RandomizedSearchCV(
    estimator = adaboost_test,
    param_distributions = params,
    cv=cv,
    random_state = 5
)
searchCV.fit(X,y)
print(searchCV.best_score_)
print(searchCV.best_params_)
best_params = searchCV.best_params_

0.8790476190476191
{'n_estimators': 300, 'learning_rate': 1.0, 'algorithm': 'SAMME'}


In [97]:
X_train, X_test, y_train, y_test = train_test_split(X,y,shuffle=True,test_size=0.2,random_state=5)

In [98]:
adaboost = AdaBoostClassifier(**best_params)
adaboost.fit(X_train,y_train)
preds = adaboost.predict(X_test)
print(accuracy_score(y_test,preds))

0.8095238095238095


In [99]:
response = input('Do you want to save the model?: [Y/n]?').upper()
if response == 'Y':
  with open('model.pkl','wb') as f:
    pickle.dump(adaboost,f)

Do you want to save the model?: [Y/n]?y
