# Логистическая регрессия в PyTorch

In [1]:
import torch
import torch.nn as nn
import numpy as np
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


### Подготовка данных

In [27]:
# загрузка датасета из библиотеки sklearn
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

In [28]:
n_samples, n_features = X.shape

In [29]:
# разбивка на тренировочную и валидационную выборку
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [30]:
# стандартизация данных
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [31]:
# преобразование в тензоры
X_train = torch.from_numpy(X_train.astype(np.float32))
X_test = torch.from_numpy(X_test.astype(np.float32))
y_train = torch.from_numpy(y_train.astype(np.float32))
y_test = torch.from_numpy(y_test.astype(np.float32))

In [32]:
# преобразование размерностей
y_train = y_train.view(y_train.shape[0], 1)
y_test = y_test.view(y_test.shape[0], 1)

### Определение модели

In [33]:
class LogisticRegression(nn.Module):
    def __init__(self, n_input_features):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(n_input_features, 1)
        
    def forward(self, x):
        y_pred = torch.sigmoid(self.linear(x))
        return y_pred

In [34]:
model = LogisticRegression(n_features)

### Задание функции потерь и оптимизатора

In [35]:
learning_rate = 0.01
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

### Цикл обучения

In [36]:
num_epochs = 100
for epoch in range(num_epochs):
    # прямой проход
    y_pred = model(X_train)
    
    # потери
    loss = loss_fn(y_pred, y_train)
    
    # обратный проход
    loss.backward()
    
    # обновить веса
    optimizer.step()
    
    # обнулить градиенты
    optimizer.zero_grad()
    
    if (epoch+1) % 10 == 0:
        print(f'epoch {epoch+1}: loss = {loss.item():.3f}')

epoch 10: loss = 0.499
epoch 20: loss = 0.435
epoch 30: loss = 0.390
epoch 40: loss = 0.357
epoch 50: loss = 0.331
epoch 60: loss = 0.310
epoch 70: loss = 0.293
epoch 80: loss = 0.278
epoch 90: loss = 0.266
epoch 100: loss = 0.255


In [37]:
with torch.no_grad():
    predicted_proba = model(X_test)
    predicted_labels = predicted_proba.round()
    accuracy = predicted_labels.eq(y_test).sum() / float(y_test.shape[0])
    print(f'accuracy = {accuracy:.3f}')

accuracy = 0.965
