In [1]:
import torch
from torch import nn
from torch import optim
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import Dataset, DataLoader

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

Ucitaj podatke za breast cancer

In [3]:
X, y = load_breast_cancer(return_X_y = True)
X.shape, y.shape

((569, 30), (569,))

Podeli podatke na trening i test u odnosu 0.8

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y,train_size=0.8)

Skaliraj podatke

In [5]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

Napravi rucni dataset, batch_size staviti na 16

In [6]:
class Dataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i]

Napravi klasu za nn koja sadrzi linearan sloj sa ulazom 30 i izlazom 10, aktivacionu fju ReLU i linearan sloj

In [7]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin1 = nn.Linear(30, 10)
        self.act = nn.ReLU()
        self.lin2 = nn.Linear(10, 1)

    def forward(self, x):
        x = self.lin1(x)
        x = self.act(x)
        x = self.lin2(x)
        return x.squeeze()

Inicijalizuj model, funkciju greske i optimizator

In [14]:
model = Net()
loss_fn = BCEWithLogitsLoss()
optimizer = optim.SGD(model.parameters(), 0.01)

Konstruisi funkcije za train i test loop

In [16]:
def train_loop(dataloader, model, loss_fn, optimizer, device):
    model.train()
    for X, y in dataloader:
        X, y = X.to(device), y.to(device)
        preds = model(X)
        loss = loss_fn(preds, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

def test_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    with torch.no_grad():
        total_loss = 0
        num_same = 0
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            outputs = model(X)
            loss = loss_fn(outputs, y)
            total_loss += loss.item()

            if len(outputs.shape) == 1:
                preds = outputs > 0
                num_same += sum(preds == y).item()
            else:
                _, indices = torch.max(outputs, 1)
                num_same += sum(indices == y).item()
        total_loss / len(dataloader.dataset)
        num_same /

Ucitaj podatke, batch_size je 16

In [10]:
train_dataset = Dataset(X_train, y_train)
test_dataset = Dataset(X_test, y_test)

train_dataloader = DataLoader(train_dataset, batch_size=16)
test_dataloader = DataLoader(test_dataset, batch_size=16)

Pokreni nn za 10 epoha

In [15]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model.to(device)
num_epochs = 10
for epoch in range(num_epochs):
    train_loop(train_dataloader, model, loss_fn, optimizer, device)
    test_loop(test_dataloader, model, loss_fn, device)

0.045998446251216685
0.7543859649122807
0.04243360172238266
0.8245614035087719
0.03887640959338138
0.8421052631578947
0.035183089605548924
0.868421052631579
0.031595012075022647
0.9385964912280702
0.028323241754582052
0.9473684210526315
0.025402666968211793
0.9649122807017544
0.022880145070845622
0.9736842105263158
0.020741252130583712
0.9649122807017544
0.018958758889583118
0.9649122807017544
