In [1]:
import torch
from torch import nn
from torch import optim
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import Dataset, DataLoader

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

In [3]:
X, y = load_breast_cancer(return_X_y=True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [5]:
X_train.shape

(455, 30)

In [6]:
X_test.shape

(114, 30)

In [7]:
X_train[0]

array([1.385e+01, 1.721e+01, 8.844e+01, 5.887e+02, 8.785e-02, 6.136e-02,
       1.420e-02, 1.141e-02, 1.614e-01, 5.890e-02, 2.185e-01, 8.561e-01,
       1.495e+00, 1.791e+01, 4.599e-03, 9.169e-03, 9.127e-03, 4.814e-03,
       1.247e-02, 1.708e-03, 1.549e+01, 2.358e+01, 1.003e+02, 7.259e+02,
       1.157e-01, 1.350e-01, 8.115e-02, 5.104e-02, 2.364e-01, 7.182e-02])

In [8]:
from sklearn.preprocessing import StandardScaler

In [9]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
X_train[0]

array([-0.06311549, -0.50460789, -0.13094595, -0.17364446, -0.60119443,
       -0.82855984, -0.93351952, -0.96577715, -0.74542199, -0.56629043,
       -0.70821954, -0.6821818 , -0.71162563, -0.52803363, -0.83329326,
       -0.92566762, -0.72324232, -1.15703714, -1.02599761, -0.76747021,
       -0.14991844, -0.36191715, -0.19429025, -0.26442652, -0.71596219,
       -0.76443944, -0.90271605, -0.95495613, -0.8865722 , -0.67406481])

In [11]:
class OurDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

In [12]:
train_dataset = OurDataset(X_train, y_train)
len(train_dataset)

455

In [13]:
train_dataset[0]

(tensor([-0.0631, -0.5046, -0.1309, -0.1736, -0.6012, -0.8286, -0.9335, -0.9658,
         -0.7454, -0.5663, -0.7082, -0.6822, -0.7116, -0.5280, -0.8333, -0.9257,
         -0.7232, -1.1570, -1.0260, -0.7675, -0.1499, -0.3619, -0.1943, -0.2644,
         -0.7160, -0.7644, -0.9027, -0.9550, -0.8866, -0.6741]),
 tensor(1.))

In [14]:
test_dataset = OurDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=16)

In [15]:
train_dataloader = DataLoader(train_dataset, batch_size=16)
for batch_x, batch_y in train_dataloader:
    print(batch_x.shape)
    print(batch_y.shape)
    break

torch.Size([16, 30])
torch.Size([16])


In [16]:
from torch import nn

In [17]:
class Net(nn.Module):
    def __init__(self):

        super().__init__()

        # self.net = nn.Sequential([layer1, layer2])

        self.layer1 = nn.Linear(30, 10)
        self.layer2 = nn.Linear(10, 1)
        self.activation = nn.ReLU()

    def forward(self, x):
        # g(Wx+b)
        h1 = self.activation(self.layer1(x))
        h2 = self.layer2(h1)
        return h2

In [18]:
model = Net()
loss_fn = BCEWithLogitsLoss()
# optimizer = SGD(model.parameters(), lr=0.1)
optimizer = optim.SGD(params=model.parameters(), lr=0.01)

In [19]:
def train_loop(dataloader, model, loss_fn, optimizer, device):
    model.train()
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        preds = model(X)
        loss = loss_fn(preds, y.unsqueeze(1))

        # backpropagation - racunanje gradijenta
        loss.backward()
        # x_new = x - lr * grad
        optimizer.step()
        # necemo da sabiramo sve gradijente
        optimizer.zero_grad()

In [20]:
def test_loop(dataloader, model, loss_fn, device):
    model.eval()
    with torch.no_grad():
        total_loss = 0
        num_same = 0
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            loss = loss_fn(outputs, y.unsqueeze(1))
            total_loss += loss.item()
            # accuracy
            # preds = outputs > 0 # dve klase
            _, indices = torch.max(outputs, 1) # vise klasa - maks po dimenziji 1
                                               # jer je outputs.shape = (batch_size, num_classes)
            num_same += sum(indices == y).item()
        print(total_loss)
        print(num_same / len(dataloader.dataset))

In [22]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
num_epochs = 10
for epoch in range(num_epochs):
    train_loop(train_dataloader, model, loss_fn, optimizer, device)
    test_loop(test_dataloader, model, loss_fn, device)

4.2924840450286865
0.40350877192982454
3.899208754301071
0.40350877192982454
3.516589045524597
0.40350877192982454
3.1519010066986084
0.40350877192982454
2.8246245980262756
0.40350877192982454
2.5408855229616165
0.40350877192982454
2.29860121011734
0.40350877192982454
2.093602254986763
0.40350877192982454
1.9197643101215363
0.40350877192982454
1.7728595435619354
0.40350877192982454
