In [31]:
import numpy as np
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import torch
from torch import nn
from torch.utils.data import TensorDataset, DataLoader

In [51]:
X = np.random.rand(3, 4)
X_test = X + 0.01
Y = np.zeros(3)
Y[1] = 1
Y_test = Y

In [53]:
cls = RandomForestClassifier(n_estimators=50, random_state=42)
cls.fit(X, Y)
score = cls.predict_proba(X_test)[:, 1]
predicted = (score > 0.5).astype(float)
roc_score = metrics.roc_auc_score(Y_test, score)
accuracy = (predicted == Y_test).sum() / len(Y_test)
print(f"ROC: {roc_score:.2}; accuracy: {accuracy:.3}")

ROC: 1.0; accuracy: 1.0


In [56]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
X_ts = torch.tensor(X, device=device, dtype=torch.float32)
Y_ts = torch.tensor(Y, device=device, dtype=torch.float32).unsqueeze(-1)

X_test_ts = torch.tensor(X_test, device=device, dtype=torch.float32)
Y_test_ts = torch.tensor(Y_test, device=device, dtype=torch.float32).unsqueeze(-1)

dataset = TensorDataset(X_ts, Y_ts)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [57]:
class MLP(nn.Module):
    def __init__(self, input_size, h_size=32, drop_ratio=0.5):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, h_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(h_size, 1)
        self.dropout = nn.Dropout(drop_ratio)
        self.sigm = nn.Sigmoid()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        x = self.dropout(x)
        x = self.sigm(x)
        return x 

In [60]:
input_size = 4
lr = 0.001
epochs = 3

model = MLP(input_size=input_size)
model.to(device)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(epochs):
    train_loss = val_loss = total_correct = 0
    eval_output_total = eval_y_total = np.empty((0, 1))

    model.train()
    for batch_data, batch_labels in dataloader:
        optimizer.zero_grad()
        outputs = model(batch_data)
        curr_train_loss = criterion(outputs, batch_labels)

        curr_train_loss.backward()
        train_loss += curr_train_loss.item()
        optimizer.step()

    train_loss /= len(dataloader)

    model.eval()
    with torch.no_grad():
        val_outputs = model(X_test_ts)
        curr_val_loss = criterion(val_outputs, Y_test_ts)
        val_loss = curr_val_loss.item()

        total_correct = ((val_outputs > 0.5).float() == Y_test_ts).float().sum().item()
        roc_auc = metrics.roc_auc_score(Y_test_ts.cpu().numpy(), val_outputs.cpu().numpy())

    accuracy = (total_correct / len(Y_test_ts))
    print(f"Epoch {epoch:<2}: train loss: {train_loss:.3}; accuracy {accuracy:.4}; ROC AUC: {roc_auc:.3}")

Epoch 0 : train loss: 0.835; accuracy 0.3333; ROC AUC: 0.5
Epoch 1 : train loss: 0.723; accuracy 0.3333; ROC AUC: 0.5
Epoch 2 : train loss: 0.834; accuracy 0.3333; ROC AUC: 0.5
