In [36]:
import numpy as np
from env.Environment import Environment, State
import pandas as pd
import torch
from torch.utils.data import TensorDataset, DataLoader
from network.network import PatientClassificationNet, PatientGroupNet
from DataProcess import DataProcess

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
num_bed = 10
num_class = 6

In [34]:
def _prepare_data():
    df = pd.read_excel(r'data.xlsx')
    df = df[~df['Fallnummer'].isnull()]

    X = df.loc[:, ['AGE', 'Admission type 2', 'No. of times sent to ICU', 'FA ab para']]
    X['FA ab para'] = X['FA ab para'].replace([1, 5, 10, 14, 16, 21, 22], X['FA ab para'].max() + 1)
    X = torch.Tensor(X.to_numpy())
    fa_val = X[:, 3].unique()
    tmp = X[:, 3].unsqueeze(1).expand((-1, len(fa_val))) == fa_val
    X = torch.cat((X[:, :3], tmp.float()), dim=1)
    X[:, 2] = (X[:, 2] > 1)
    X = (X - X.mean(0)) / X.std(0)

    Y = df.loc[:, ['Tod']]
    Y = torch.Tensor(Y.to_numpy())

    num_feature = X.shape[-1]

    num_train = int(len(X) * 0.8)
    shuffle_indices = np.arange(len(X))
    np.random.shuffle(shuffle_indices)
    X = X[shuffle_indices]
    Y = Y[shuffle_indices]
    X_train = X[:num_train]
    y_train = Y[:num_train]
    X_test = X[num_train:]
    y_test = Y[num_train:]
    dataset_train = TensorDataset(X_train, y_train)
    dataset_test = TensorDataset(X_test, y_test)
    trainloader = DataLoader(dataset_train, batch_size=128, shuffle=True)
    testloader = DataLoader(dataset_test, batch_size=128, shuffle=True)

    return num_train, num_feature, trainloader, testloader, X_train, y_train, X_test, y_test


def _calculate_class_boundary(net, X, num_train, num_class, percetage=None):
    outputs = net.patient_class_net(X)
    v, _ = outputs.sort(0)
    if percetage is None:
        class_boundary = torch.Tensor([v[int(num_train * i / float(num_class) - 1)] for i in range(1, num_class + 1)])
    else:
        class_boundary = torch.Tensor([v[int(num_train * percetage[i] - 1)] for i in range(0, num_class)])
    return class_boundary

def load_net(input_dim, hidden, class_boundary, path):
    class_boundary = torch.Tensor(class_boundary)
    temp = PatientClassificationNet(input_dim, hidden)
    net = PatientGroupNet(temp, class_boundary)
    net.load_state_dict(torch.load(path))
    return net

In [32]:
num_train, num_feature, trainloader, testloader, X_train, y_train, X_test, y_test = _prepare_data()


In [35]:
net = load_net(21, 256, [0.0065, 0.0240, 0.0404, 0.0571, 0.0760, 0.1060, 0.1640, 0.6534], './model/patient_group_net.pth')
class_boundary = _calculate_class_boundary(net, X_train, num_train, num_class)
net.class_boundary = class_boundary
class_boundary

tensor([[9.2742e-05],
        [9.7380e-05],
        [1.3648e-04],
        ...,
        [5.6844e-01],
        [5.9560e-01],
        [6.5335e-01]], grad_fn=<SortBackward>)


tensor([0.0131, 0.0345, 0.0555, 0.0810, 0.1381, 0.6534])

In [40]:
d = DataProcess(net, 60)
d.process()

Processing file data.xlsx
Mean los per class [0.6414718959027855, 1.8064469473142297, 2.0702363927845457, 2.5320048754466398, 2.5903448836818734, 2.641957131408011, 2.8534830898960806, 3.719057824842895]
1 1.3123224251590295
2 2.3851740562669455
3 2.7601770500501117
4 2.7530320279274845
5 2.6493594182811093
6 3.2294760959960507
7 3.3351556343598316
8 4.247728018372573
Data process finished


([0.3928082191780822,
  0.07796803652968036,
  0.0776255707762557,
  0.07328767123287672,
  0.07819634703196347,
  0.07853881278538813,
  0.07146118721461187,
  0.07363013698630137,
  0.07648401826484018],
 [0.06495478123484495,
  0.023065535762683425,
  0.0201265260391947,
  0.016455997802656983,
  0.016085374163552442,
  0.015771136545452098,
  0.014602037353648414,
  0.011203554402498919],
 [0.03175032741029128,
  0.01746902560724625,
  0.015095649993144535,
  0.015134828161819039,
  0.015727072128892117,
  0.012901989495548698,
  0.012493170104988037,
  0.009809165390638726],
 [0.006847985891974875,
  0.0675641225513241,
  0.3645487971361824,
  0.026689814814905156,
  0.02455304182979476,
  0.6264421606256592,
  1.1699432862591812,
  0.8426338099430193],
 [0.0014641288433382138,
  0.025,
  0.03115264797507788,
  0.08029197080291971,
  0.07703488372093023,
  0.1134185303514377,
  0.15038759689922482,
  0.2626865671641791],
 [0.0021645021645021645,
  0.011363636363636364,
  0.0314465

In [47]:
tmp = np.tile(all_x, (num_bed+1,1))
print(tmp.shape)
tmp2 = np.tile(np.arange(num_bed + 1), (len(all_x), 1)).T.reshape(-1,1)
tmp = np.concatenate([tmp,tmp2], axis=-1)
all_x = tmp[tmp.sum(-1) <= num_bed]
all_x.shape

(18648630, 7)


(3108105, 8)