In [1]:
import torch
from torch import nn
from torch import optim
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import Dataset, DataLoader

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [3]:
X, y = load_breast_cancer(return_X_y=True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [5]:
X_train.shape

(455, 30)

In [6]:
X_test.shape

(114, 30)

In [7]:
X_train[0]

array([1.270e+01, 1.217e+01, 8.088e+01, 4.950e+02, 8.785e-02, 5.794e-02,
       2.360e-02, 2.402e-02, 1.583e-01, 6.275e-02, 2.253e-01, 6.457e-01,
       1.527e+00, 1.737e+01, 6.131e-03, 1.263e-02, 9.075e-03, 8.231e-03,
       1.713e-02, 4.414e-03, 1.365e+01, 1.692e+01, 8.812e+01, 5.669e+02,
       1.314e-01, 1.607e-01, 9.385e-02, 8.224e-02, 2.775e-01, 9.464e-02])

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
X_train[0]

array([-0.39613435, -1.6852843 , -0.4502001 , -0.44555548, -0.64680697,
       -0.90331658, -0.84225878, -0.65686789, -0.86024337, -0.02873597,
       -0.63815091, -1.04754445, -0.6540412 , -0.4926319 , -0.32189669,
       -0.7439685 , -0.74698374, -0.59137547, -0.41836691,  0.21064109,
       -0.54102011, -1.44001225, -0.57089973, -0.55044548, -0.08841847,
       -0.62466071, -0.87599259, -0.51326743, -0.23342987,  0.52589063])

In [11]:
class OurDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

In [12]:
train_dataset = OurDataset(X_train, y_train)
len(train_dataset)

455

In [13]:
train_dataset[0]

(tensor([-0.3961, -1.6853, -0.4502, -0.4456, -0.6468, -0.9033, -0.8423, -0.6569,
         -0.8602, -0.0287, -0.6382, -1.0475, -0.6540, -0.4926, -0.3219, -0.7440,
         -0.7470, -0.5914, -0.4184,  0.2106, -0.5410, -1.4400, -0.5709, -0.5504,
         -0.0884, -0.6247, -0.8760, -0.5133, -0.2334,  0.5259]),
 tensor(1.))

In [14]:
test_dataset = OurDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=16)

In [15]:
train_dataloader = DataLoader(train_dataset, batch_size=16)
for batch_x, batch_y in train_dataloader:
    print(batch_x.shape)
    print(batch_y.shape)
    break

torch.Size([16, 30])
torch.Size([16])


In [16]:
from torch import nn

In [17]:
class Net(nn.Module):
    def __init__(self):

        super().__init__()

        # self.net = nn.Sequential([layer1, layer2])

        self.layer1 = nn.Linear(30, 10)
        self.layer2 = nn.Linear(10, 1)
        self.activation = nn.ReLU()

    def forward(self, x):
        # g(Wx+b)
        h1 = self.activation(self.layer1(x))
        h2 = self.layer2(h1)
        return h2.squeeze()

In [18]:
model = Net()
loss_fn = BCEWithLogitsLoss()
optimizer = optim.SGD(params=model.parameters(), lr=0.01)

In [19]:
def train_loop(dataloader, model, loss_fn, optimizer, device):
    model.train()
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        preds = model(X)
        loss = loss_fn(preds, y)

        # backpropagation - racunanje gradijenta
        loss.backward()
        # x_new = x - lr * grad
        optimizer.step()
        # necemo da sabiramo sve gradijente
        optimizer.zero_grad()

In [20]:
def test_loop(dataloader, model, loss_fn, device):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        num_same = 0
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            loss = loss_fn(outputs, y)
            total_loss += loss.item()

            if len(outputs.shape) == 1: # accuracy za dve klase
                preds = outputs > 0
                num_same += sum(preds == y).item()
            else:
                _, indices = torch.max(outputs, 1) # vise klasa - maks po dimenziji 1 jer je outputs.shape = (batch_size, num_classes)
                num_same += sum(indices == y).item()

        print(f'Average loss: {total_loss / len(dataloader.dataset)}')
        print(f'Accuracy: {num_same / len(dataloader.dataset)}')

In [21]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
num_epochs = 10
for epoch in range(num_epochs):
    train_loop(train_dataloader, model, loss_fn, optimizer, device)
    test_loop(test_dataloader, model, loss_fn, device)

Average loss: 0.049660921619649516
Accuracy: 0.3157894736842105
Average loss: 0.04471501498891596
Accuracy: 0.5614035087719298
Average loss: 0.04067986994458918
Accuracy: 0.7982456140350878
Average loss: 0.03702828601786965
Accuracy: 0.8771929824561403
Average loss: 0.0335262982468856
Accuracy: 0.9210526315789473
Average loss: 0.030169856130031116
Accuracy: 0.9122807017543859
Average loss: 0.027043190441633527
Accuracy: 0.9298245614035088
Average loss: 0.0242208671151546
Accuracy: 0.9385964912280702
Average loss: 0.021776639578635234
Accuracy: 0.9385964912280702
Average loss: 0.019719417942197698
Accuracy: 0.9385964912280702
