In [1]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset
import torch.optim as optim

In [2]:
df = pd.read_excel (r'data.ods')

In [3]:
df = df[~df['Fallnummer'].isnull()]

In [4]:
class PatientClassificationNet(nn.Module):
    def __init__(self, input_dim, hidden_size, activation=torch.relu):
        super(PatientClassificationNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_size)
        self.activation = activation
        self.fc2 = nn.Linear(hidden_size, hidden_size)
        self.fc3 = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = self.activation(self.fc1(x))
        x = self.activation(self.fc2(x))
        x = self.fc3(x)
        x = torch.sigmoid(x)
        return x

In [5]:
X = df.loc[:,['AGE','Admission type 2','No. of times sent to ICU','FA ab para']]
X['FA ab para'] = X['FA ab para'].replace([1,5, 10, 14, 16, 21, 22], X['FA ab para'].max()+1)
X = torch.Tensor(X.to_numpy())
fa_val = X[:, 3].unique()
tmp = X[:,3].unsqueeze(1).expand((-1,len(fa_val))) == fa_val
X = torch.cat((X[:, :3], tmp.float()), dim=1)
X[:,2] = (X[:,2] > 1 )
X = (X - X.mean(0)) / X.std(0)

Y = df.loc[:,['Tod']]
Y = torch.Tensor(Y.to_numpy())

num_feature = X.shape[-1]

In [6]:
num_train = int(len(X) * 0.8)
shuffle_indices = np.arange(len(X))
np.random.seed(6)
np.random.shuffle(shuffle_indices)
X_train = X[shuffle_indices][:num_train]
y_train = Y[shuffle_indices][:num_train]
X_test = X[shuffle_indices][num_train:]
y_test = Y[shuffle_indices][num_train:]
dataset_train = TensorDataset(X_train, y_train)
dataset_test = TensorDataset(X_test, y_test)
trainloader = DataLoader(dataset_train, batch_size=128, shuffle=True)
testloader = DataLoader(dataset_test, batch_size=128, shuffle=True)

In [7]:
y_train.sum()

tensor(320.)

In [8]:
threshold = y_train.nonzero().size(0)/num_train

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  /tmp/pip-req-build-ojg3q6e4/torch/csrc/utils/python_arg_parser.cpp:882.)
  threshold = y_train.nonzero().size(0)/num_train


In [10]:
def train(net, trainloader, testloader, epoch=5):
    criterion = nn.BCELoss()
    optimizer = optim.SGD(net.parameters(), lr=0.003, momentum=0.9, weight_decay=1e-6)
    for epoch in range(epoch):
        total_loss = 0
        net.train()
        for i, data in enumerate(trainloader):
            inputs, labels = data
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, labels)
#             print(loss)
            loss.backward()
            optimizer.step()
            total_loss += loss
        
        net.eval()
        correct_true = 0
        predicted_true = 0
        target_true = 0
        total = 0
        for i, data in enumerate(testloader):
            inputs, labels = data
            outputs = net(inputs)
            prediction = outputs > threshold
            correct_true += ((prediction==1) * (labels==1)).sum().item()
            target_true += labels.sum().item()
            predicted_true += prediction.sum().item()
            total += labels.size(0)
        recall = correct_true / target_true
        precision = correct_true / predicted_true
        f1_score = 2 * precision * recall / (precision + recall)
        print(f'epoch {epoch}: recall: {recall} precision: {precision} f1_score: {f1_score} loss:{total_loss}')
    
        

In [11]:
net = PatientClassificationNet(num_feature,256)

In [12]:
train(net, trainloader, testloader, epoch=25)

epoch 0: recall: 0.9146341463414634 precision: 0.07197696737044146 f1_score: 0.13345195729537368 loss:18.78215217590332
epoch 1: recall: 0.6463414634146342 precision: 0.06495098039215687 f1_score: 0.1180400890868597 loss:10.713094711303711
epoch 2: recall: 0.6219512195121951 precision: 0.06623376623376623 f1_score: 0.11971830985915492 loss:9.920525550842285
epoch 3: recall: 0.6341463414634146 precision: 0.06806282722513089 f1_score: 0.12293144208037825 loss:9.595834732055664
epoch 4: recall: 0.6951219512195121 precision: 0.07519788918205805 f1_score: 0.1357142857142857 loss:9.194808959960938
epoch 5: recall: 0.7317073170731707 precision: 0.08275862068965517 f1_score: 0.14869888475836432 loss:9.010043144226074
epoch 6: recall: 0.7682926829268293 precision: 0.09143686502177069 f1_score: 0.16342412451361868 loss:8.914921760559082
epoch 7: recall: 0.7439024390243902 precision: 0.09486780715396578 f1_score: 0.1682758620689655 loss:8.862672805786133
epoch 8: recall: 0.7682926829268293 precis

In [13]:
correct_true = 0
predicted_true = 0
target_true = 0
total = 0
for i, data in enumerate(testloader):
    inputs, labels = data
    outputs = net(inputs)
    prediction = outputs > threshold
    correct_true += ((prediction==1) * (labels==1)).sum().item()
    target_true += labels.sum().item()
    predicted_true += prediction.sum().item()
    total += labels.size(0)
recall = correct_true / target_true
precision = correct_true / predicted_true
f1_score = 2 * precision * recall / (precision + recall)

In [14]:
print(recall, precision, f1_score)

0.6585365853658537 0.14713896457765668 0.24053452115812918


In [15]:
print(correct_true, target_true, predicted_true)

54 82.0 367


In [16]:
outputs = net(X_train)
v, _ = outputs.sort(0)
num_class = 5
class_boundary = torch.Tensor([v[int(num_train/num_class*i-1)] for i in range(1,num_class+1)])

In [17]:
out = net(X_train[0:2])
comp = out < class_boundary
# comp.nonzero()[0].item()
comp = 0

In [18]:
class PatientGroupNet(nn.Module):
    def __init__(self, patient_class_net, class_boundary):
        super(PatientGroupNet, self).__init__()
        self.patient_class_net = patient_class_net
        self.patient_class_net.eval()
        self.class_boundary = class_boundary
        self.num_class = self.class_boundary.size(0)

    def forward(self, x):
        x = self.patient_class_net(x)
        comp = x < self.class_boundary
        return self.num_class - comp.sum(-1) + 1

In [19]:
net2 = PatientGroupNet(net, class_boundary)
out = net2(X_train)
[torch.sum(out == i) for i in range(1, num_class+1)]

[tensor(847), tensor(851), tensor(854), tensor(848), tensor(854)]

In [20]:
net2 = PatientGroupNet(net, class_boundary)
out = net2(X_test)
[torch.sum(out == i) for i in range(1, num_class+1)]

[tensor(214), tensor(212), tensor(206), tensor(211), tensor(221)]

In [21]:
prediction = net2(X).numpy()
df['class'] = prediction
los_mean = [df[df['class']==i]['LOS (Days)'].mean() for i in range(1,num_class+1)]
los_mean

[0.9809600481707305,
 2.1084868558550465,
 2.5422137054486913,
 2.6860507816577064,
 3.3307932816514727]

In [24]:
PATH = 'model/patient_group_net.pth'#'D:\working\I2RL\model\test'
torch.save(net2.state_dict(), PATH)

In [488]:
X_train

tensor([[-1.4988,  1.1154, -0.3025,  ...,  1.9328, -0.1925, -0.0974],
        [-0.2893, -0.8964,  3.3051,  ..., -0.5173, -0.1925, -0.0974],
        [-1.1461,  1.1154, -0.3025,  ...,  1.9328, -0.1925, -0.0974],
        ...,
        [ 0.6681, -0.8964, -0.3025,  ..., -0.5173, -0.1925, -0.0974],
        [-1.3476,  1.1154, -0.3025,  ...,  1.9328, -0.1925, -0.0974],
        [ 0.6681,  1.1154, -0.3025,  ...,  1.9328, -0.1925, -0.0974]])

In [25]:
class_boundary


tensor([0.0262, 0.0464, 0.0650, 0.1080, 0.4479])