# Projeto3- Classificação Binária Breast Cancer com tuning dos parâmetros.ipynb

## Importação das bibliotecas

In [5]:
!pip install skorch #validação cruzada



In [6]:
import pandas as pd
import numpy as np
import torch.nn as nn
from skorch import NeuralNetBinaryClassifier
import torch
import torch.nn.functional as F
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score
from google import colab as cl
torch.__version__

'2.0.1+cu118'

## Base de dados

In [7]:
np.random.seed(123)
torch.manual_seed(123)

<torch._C.Generator at 0x7e4a08129df0>

In [8]:
file_upload = cl.files.upload() #importando arquivo para computador

previsores = pd.read_csv('/content/entradas_breast.csv')
classe = pd.read_csv('/content/saidas_breast.csv')

Saving saidas_breast.csv to saidas_breast.csv
Saving entradas_breast.csv to entradas_breast.csv


In [9]:
previsores = np.array(previsores, dtype='float32')
classe = np.array(classe, dtype='float32').squeeze(1)

In [10]:
previsores.shape

(569, 30)

In [11]:
classe.shape

(569,)

## Classe para estrutura da rede neural

In [12]:
class classificador_torch(nn.Module):
  def __init__(self, activation, neurons, initializer): #constructor
    super().__init__()

    #30 -> 16 -> 16 -> 1
    self.dense0 = nn.Linear(in_features=30, out_features=neurons)
    initializer(self.dense0.weight)
    self.activation0 = activation

    self.dense1 = nn.Linear(in_features=neurons, out_features=neurons)
    initializer(self.dense1.weight)
    self.activation1 = activation

    self.dense2 = nn.Linear(in_features=neurons, out_features=1)
    initializer(self.dense2.weight)
    self.output = nn.Sigmoid()


  def forward(self, X):
    X = self.dense0(X)
    X = self.activation0(X)

    X = self.dense1(X)
    X = self.activation1(X)

    X = self.dense2(X)
    X = self.output(X)

    return X

## Skorch

In [13]:
classificador_sklearn = NeuralNetBinaryClassifier(module = classificador_torch,
                                                  lr = 0.001,
                                                  optimizer__weight_decay = 0.0001,
                                                  train_split = False )

## Tuning dos Parâmetros

In [14]:
params = {
          'batch_size': [10, 30],
          'max_epochs': [50, 100],
          'optimizer': [torch.optim.Adam],# torch.optim.SGD],
          'criterion': [torch.nn.BCELoss],# torch.nn.HingeEmbeddingLoss],
          'module__activation': [F.relu],#, F.tanh],
          'module__neurons': [16],
          'module__initializer': [torch.nn.init.uniform_]#, torch.nn.init.normal_]
          }

In [15]:
params

{'batch_size': [10, 30],
 'max_epochs': [50, 100],
 'optimizer': [torch.optim.adam.Adam],
 'criterion': [torch.nn.modules.loss.BCELoss],
 'module__activation': [<function torch.nn.functional.relu(input: torch.Tensor, inplace: bool = False) -> torch.Tensor>],
 'module__neurons': [16],
 'module__initializer': [<function torch.nn.init.uniform_(tensor: torch.Tensor, a: float = 0.0, b: float = 1.0) -> torch.Tensor>]}

In [16]:
grid_search = GridSearchCV(estimator=classificador_sklearn, param_grid=params, scoring='accuracy', cv=2)
grid_search = grid_search.fit(previsores, classe)

  epoch    train_loss     dur
-------  ------------  ------
      1       [36m37.3239[0m  0.0448
      2       37.3239  0.0449
      3       37.3239  0.0447
      4       37.3239  0.0501
      5       37.3239  0.0451
      6       37.3239  0.0520
      7       37.3239  0.0520
      8       37.3239  0.0472
      9       37.3239  0.0455
     10       37.3239  0.0439
     11       37.3239  0.0441
     12       37.3239  0.0465
     13       37.3239  0.0436
     14       37.3239  0.0441
     15       37.3239  0.0448
     16       37.3239  0.0797
     17       37.3239  0.0666
     18       37.3239  0.0463
     19       37.3239  0.0464
     20       37.3239  0.0465
     21       37.3239  0.0424
     22       37.3239  0.0430
     23       37.3239  0.0488
     24       37.3239  0.0549
     25       37.3239  0.0442
     26       37.3239  0.0437
     27       37.3239  0.0444
     28       37.3239  0.0439
     29       37.3239  0.0441
     30       37.3239  0.0451
     31       37.3239  0.0444
 

In [19]:
melhores_parametros = grid_search.best_params_
melhor_precisao = grid_search.best_score_

In [20]:
melhores_parametros

{'batch_size': 10,
 'criterion': torch.nn.modules.loss.BCELoss,
 'max_epochs': 100,
 'module__activation': <function torch.nn.functional.relu(input: torch.Tensor, inplace: bool = False) -> torch.Tensor>,
 'module__initializer': <function torch.nn.init.uniform_(tensor: torch.Tensor, a: float = 0.0, b: float = 1.0) -> torch.Tensor>,
 'module__neurons': 16,
 'optimizer': torch.optim.adam.Adam}

In [21]:
melhor_precisao

0.7541759327897208

In [22]:
params = {
          'batch_size': [10],
          'max_epochs': [100],
          'optimizer': [torch.optim.Adam, torch.optim.SGD],
          'criterion': [torch.nn.BCELoss, torch.nn.HingeEmbeddingLoss],
          'module__activation': [F.relu, F.tanh],
          'module__neurons': [8, 16],
          'module__initializer': [torch.nn.init.uniform_, torch.nn.init.normal_]
          }

In [23]:
params

{'batch_size': [10],
 'max_epochs': [100],
 'optimizer': [torch.optim.adam.Adam, torch.optim.sgd.SGD],
 'criterion': [torch.nn.modules.loss.BCELoss,
  torch.nn.modules.loss.HingeEmbeddingLoss],
 'module__activation': [<function torch.nn.functional.relu(input: torch.Tensor, inplace: bool = False) -> torch.Tensor>,
  <function torch.nn.functional.tanh(input)>],
 'module__neurons': [8, 16],
 'module__initializer': [<function torch.nn.init.uniform_(tensor: torch.Tensor, a: float = 0.0, b: float = 1.0) -> torch.Tensor>,
  <function torch.nn.init.normal_(tensor: torch.Tensor, mean: float = 0.0, std: float = 1.0) -> torch.Tensor>]}

In [24]:
grid_search = GridSearchCV(estimator=classificador_sklearn, param_grid=params, scoring='accuracy', cv=5)
grid_search = grid_search.fit(previsores, classe)

[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
      1       [36m37.5824[0m  0.0990
      2       38.0220  0.0983
      3       38.2418  0.0977
      4       40.2198  0.0970
      5       41.9780  0.1071
      6       41.9792  0.1167
      7       [36m37.1981[0m  0.1019
      8       37.3626  0.0688
      9       37.3626  0.0677
     10       [36m37.1502[0m  0.0785
     11       37.3626  0.0692
     12       37.3626  0.0677
     13       37.3626  0.0739
     14       37.3626  0.0682
     15       37.3626  0.0757
     16       37.3626  0.0770
     17       37.3626  0.0756
     18       37.3626  0.0700
     19       37.3626  0.0787
     20       37.3626  0.0765
     21       37.3626  0.0700
     22       37.3626  0.0769
     23       37.3626  0.0702
     24       37.3626  0.0704
     25       37.3626  0.0689
     26       37.3626  0.0772
     27       37.3626  0.0711
     28       37.3626  0.0754
     29       37.3626  0.0714
     30       37.3626  0.0695


Traceback (most recent call last):
  File "/usr/local/lib/python3.10/dist-packages/sklearn/model_selection/_validation.py", line 767, in _score
    scores = scorer(estimator, X_test, y_test)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 234, in __call__
    return self._score(
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 276, in _score
    y_pred = method_caller(estimator, "predict", X)
  File "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_scorer.py", line 73, in _cached_call
    return getattr(estimator, method)(*args, **kwargs)
  File "/usr/local/lib/python3.10/dist-packages/skorch/classifier.py", line 359, in predict
    return (y_proba[:, 1] > self.threshold).astype('uint8')
IndexError: too many indices for array: array is 1-dimensional, but 2 were indexed



[1;30;43mA saída de streaming foi truncada nas últimas 5000 linhas.[0m
-------  ------------  ------
      1        [36m0.9934[0m  0.0767
      2        [36m0.9912[0m  0.0826
      3        [36m0.9824[0m  0.0902
      4        [36m0.9714[0m  0.0827
      5        [36m0.9516[0m  0.0798
      6        [36m0.9341[0m  0.0763
      7        [36m0.8967[0m  0.0774
      8        [36m0.8923[0m  0.0840
      9        0.8923  0.0811
     10        0.8923  0.0849
     11        0.8923  0.1047
     12        0.8923  0.1070
     13        0.8923  0.0782
     14        0.8923  0.0758
     15        0.8923  0.0772
     16        0.8923  0.0822
     17        0.8923  0.0736
     18        0.8923  0.0779
     19        0.8923  0.0796
     20        0.8923  0.0765
     21        0.8945  0.0818
     22        0.8945  0.0816
     23        0.8945  0.0782
     24        0.8945  0.0889
     25        0.8945  0.0782
     26        [36m0.8909[0m  0.0795
     27        [36m0.8198[0m  0.07

 0.57654091 0.49583916 0.6274181  0.6274181  0.63990064 0.6274181
 0.69267195 0.61156653 0.68191275 0.62029188        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan        nan        nan        nan        nan
        nan        nan]


      2       37.2583  0.1323
      3       37.2583  0.1243
      4       37.2583  0.1371
      5       37.2583  0.1235
      6       37.2583  0.1147
      7       37.2583  0.1279
      8       37.2583  0.1186
      9       37.2583  0.1346
     10       37.2583  0.0876
     11       37.2583  0.0897
     12       37.2583  0.0922
     13       37.2583  0.1212
     14       37.2583  0.1133
     15       37.2583  0.0893
     16       37.2583  0.0904
     17       37.2583  0.0886
     18       [36m19.0251[0m  0.0890
     19        [36m0.6455[0m  0.0896
     20        [36m0.5671[0m  0.0868
     21        [36m0.5472[0m  0.0906
     22        [36m0.5346[0m  0.0922
     23        [36m0.5199[0m  0.0973
     24        [36m0.5049[0m  0.0883
     25        [36m0.4929[0m  0.0910
     26        [36m0.4808[0m  0.0871
     27        [36m0.4620[0m  0.0905
     28        [36m0.4475[0m  0.0975
     29        [36m0.4356[0m  0.0877
     30        [36m0.4205[0m  0.0866
     31      

In [25]:
melhores_parametros = grid_search.best_params_
melhor_precisao = grid_search.best_score_

In [26]:
melhores_parametros

{'batch_size': 10,
 'criterion': torch.nn.modules.loss.BCELoss,
 'max_epochs': 100,
 'module__activation': <function torch.nn.functional.relu(input: torch.Tensor, inplace: bool = False) -> torch.Tensor>,
 'module__initializer': <function torch.nn.init.uniform_(tensor: torch.Tensor, a: float = 0.0, b: float = 1.0) -> torch.Tensor>,
 'module__neurons': 8,
 'optimizer': torch.optim.adam.Adam}

In [27]:
melhor_precisao

0.8103400093153237