In [11]:
import torch
from torch_geometric.data import Data
from torch_geometric import seed_everything
import pandas as pd
from sklearn.preprocessing import KBinsDiscretizer
import torch_geometric
from tqdm import tqdm, trange
from torcheval.metrics import MulticlassAccuracy
from torcheval.metrics import BinaryAUROC
import numpy as np

DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# DEVICE = torch.device('cuda')
print(DEVICE)

cuda


In [12]:
# get feature from csv
RAW_data = pd.read_csv('data/adult.csv')
CAT = ['workclass','education','marital-status','occupation','relationship','race','gender','native-country']
NUM = ['age','fnlwgt','educational-num','capital-gain','capital-loss','hours-per-week']
LABEL = 'income'
# RAW_data = pd.read_csv('data/compass_old.csv')
# CAT=['sex','age_cat','race','c_charge_degree','decile_score.1','score_text','v_type_of_assessment','v_decile_score','v_score_text']
# NUM=['age','juv_fel_count','juv_misd_count','juv_other_count','priors_count','days_b_screening_arrest','c_days_from_compas','end']
# LABEL = 'is_recid'
# convert categorical data to ordinal data
from sklearn.preprocessing import OrdinalEncoder
enc = OrdinalEncoder()
data_pd = RAW_data.copy()
data_pd[CAT] = enc.fit_transform(RAW_data[CAT])
# data_pd = pd.get_dummies(RAW_data, columns=CAT, dtype=float)
# label to category
data_pd[LABEL] = data_pd[LABEL].astype('category').cat.codes

# realign data to num + cat
data_pd = data_pd[NUM + CAT + [LABEL]]

# caculate unique value of each categorical feature
cat_num = [len(data_pd[col].unique()) for col in CAT]

# normalize numerical data
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data_pd[NUM] = scaler.fit_transform(data_pd[NUM])

# convert data to tensor
x = torch.tensor(data_pd.drop(columns=[LABEL]).values, dtype=torch.float, device=DEVICE)  # [48842, 108]
y = torch.tensor(data_pd[LABEL].values, dtype=torch.long, device=DEVICE) # [48842]
print(x.shape, y.shape)
print(cat_num)
data_pd

torch.Size([48842, 14]) torch.Size([48842])
[9, 16, 7, 15, 6, 5, 2, 42]


Unnamed: 0,age,fnlwgt,educational-num,capital-gain,capital-loss,hours-per-week,workclass,education,marital-status,occupation,relationship,race,gender,native-country,income
0,-0.995129,0.351675,-1.197259,-0.144804,-0.217127,-0.034087,4.0,1.0,4.0,7.0,3.0,2.0,1.0,39.0,0
1,-0.046942,-0.945524,-0.419335,-0.144804,-0.217127,0.772930,4.0,11.0,2.0,5.0,0.0,4.0,1.0,39.0,0
2,-0.776316,1.394723,0.747550,-0.144804,-0.217127,-0.034087,2.0,7.0,2.0,11.0,0.0,4.0,1.0,39.0,1
3,0.390683,-0.277844,-0.030373,0.886874,-0.217127,-0.034087,4.0,15.0,2.0,7.0,0.0,2.0,1.0,39.0,1
4,-1.505691,-0.815954,-0.030373,-0.144804,-0.217127,-0.841104,0.0,15.0,4.0,0.0,3.0,4.0,0.0,39.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,-0.849254,0.640492,0.747550,-0.144804,-0.217127,-0.195490,4.0,7.0,2.0,13.0,5.0,4.0,0.0,39.0,0
48838,0.098933,-0.334178,-0.419335,-0.144804,-0.217127,-0.034087,4.0,11.0,2.0,7.0,0.0,4.0,1.0,39.0,1
48839,1.411808,-0.357510,-0.419335,-0.144804,-0.217127,-0.034087,4.0,11.0,6.0,1.0,4.0,4.0,0.0,39.0,0
48840,-1.213941,0.111984,-0.419335,-0.144804,-0.217127,-1.648120,4.0,11.0,4.0,1.0,3.0,4.0,1.0,39.0,0


In [13]:
class feature_improtance_extractor():
    def __init__(self):
        self.feature_importance = []
        self.iter = 0
        pass
    def update(self, new_feature_importance):
        if self.iter == 0:
            self.feature_importance = new_feature_importance
        else:
            self.feature_importance += new_feature_importance
        self.iter += 1
        return
    
    def get(self):
        return (self.feature_importance / self.iter)
    
    def reset(self):
        self.feature_importance = []
        self.iter = 0
        return
extractor = feature_improtance_extractor()

In [14]:
class K_graph(torch.nn.Module):
    def __init__(self, NUM, CAT, LABEL, cat_num):
        super(K_graph, self).__init__()
        '''
        num_cols: number of numerical columns
        cat_cols: number of categorical columns
        label_cols: number of label columns
        cat_num: number of unique value of each categorical columns
        '''
        self.hidden_dim = 128
        # order: num -> cat -> label
        self.num_cols = len(NUM)
        self.cat_cols = len(CAT)
        self.label_cols = len(LABEL)
        self.number_of_columns = self.num_cols + self.cat_cols 
        self.K = round(self.number_of_columns*0.5)
        
        # numerical feature
        self.num_embeddings = torch.nn.ModuleList([torch.nn.Linear(1, self.hidden_dim) for i in range(self.num_cols)])
        # categorical feature
        self.cat_embeddings = torch.nn.ModuleList([torch.nn.Embedding(cat_num[i], self.hidden_dim) for i in range(self.cat_cols)])
        
        self.prediction = torch.nn.Sequential(
            torch.nn.Linear(self.hidden_dim *( self.K + self.number_of_columns), self.hidden_dim),
            torch.nn.ReLU(),
            torch.nn.LayerNorm(self.hidden_dim),
            torch.nn.Linear(self.hidden_dim, self.label_cols + 1)
        )
        
        # feature importance learning
        self.feature_importance_learners = torch.nn.Sequential(
            torch.nn.Linear(self.hidden_dim, self.hidden_dim),
            torch.nn.ReLU(),
            torch.nn.LayerNorm(self.hidden_dim),
            torch.nn.Dropout(p=0.5),
            torch.nn.Linear(self.hidden_dim, 1),
        ) 
        
        # graph convolution layers
        self.conv_GCN_input = torch_geometric.nn.GCNConv(self.number_of_columns*self.hidden_dim, self.hidden_dim)
        # self.conv_GCN_input = torch_geometric.nn.GCNConv(self.hidden_dim, self.hidden_dim)
        # self.conv_1_input = torch_geometric.nn.GATConv(self.number_of_columns*self.hidden_dim, self.hidden_dim)
        self.conv_GCN_2 = torch_geometric.nn.GCNConv(self.hidden_dim, self.hidden_dim)
        
        # self.transform = torch.nn.Linear(self.number_of_columns*self.hidden_dim, self.hidden_dim)
        
    def forward(self, input_data, epoch = -1):
        
        # make feature embedding
        num_data = input_data[:,:self.num_cols].unsqueeze(-1).unsqueeze(-1) 
        feature_embedding_num = torch.cat([self.num_embeddings[i](num_data[:,i]) for i in range(self.num_cols)], dim=1).reshape(len(input_data), -1) # [batch_size, num_cols * hidden_dim]
        feature_embedding_num = torch.nn.ReLU()(feature_embedding_num)
        feature_embedding_num = torch.layer_norm(feature_embedding_num, feature_embedding_num.shape)
        # categorical feature
        feature_embedding_cat = torch.cat([self.cat_embeddings[i](input_data[:,self.num_cols+i].long()) for i in range(self.cat_cols)], dim=1) # [batch_size, cat_cols * hidden_dim]
        feature_embedding_cat = torch.layer_norm(feature_embedding_cat, feature_embedding_cat.shape)
        # concat
        feature_embedding = torch.cat((feature_embedding_num, feature_embedding_cat), dim=1) # [batch_size, (num_cols + cat_cols) * hidden_dim]
        # feature_embedding = feature_embedding.reshape((len(input_data), self.number_of_columns, -1)) # [batch_size, (num_cols + cat_cols), hidden_dim]
        
        # feature importance learning
        feature_importance = torch.cat([self.feature_importance_learners(feature_embedding[:,i*self.hidden_dim:(i+1)*self.hidden_dim]) for i in range(self.number_of_columns)], dim=1) # [batch_size, num_cols + cat_cols, 1]
        # print(feature_importance)
        feature_importance = torch.layer_norm(feature_importance, feature_importance.shape)
        # feature_importance = torch.softmax(feature_importance, dim=1) # [batch_size, num_cols + cat_cols, 1]
        # print(feature_importance.shape)
        # print(feature_importance.sum(dim=1))
        # print(feature_importance)
        
        # weighted feature embedding 
        feature_embedding = feature_embedding.reshape((len(input_data),self.number_of_columns, -1)) * feature_importance.unsqueeze(-1) # [batch_size, (num_cols + cat_cols) * hidden_dim]
        feature_embedding = feature_embedding.reshape((len(input_data), -1)) # [batch_size, (num_cols + cat_cols) * hidden_dim]
        
        # top K feature importance
        K = self.K
        value, indices = torch.topk(feature_importance, K) # (value: [batch_size, k], indices: [batch_size, k])
        mask = torch.zeros_like(feature_importance, device=DEVICE)
        mask.scatter_(1, indices, 1)
        # importance_topK = torch.where(mask > 0, feature_importance, torch.zeros(feature_importance.shape,device=DEVICE)) # [batch_size, cols]
        importance_topK = torch.where(mask > 0, feature_importance, torch.empty(feature_importance.shape,device=DEVICE).fill_(-1e9)) # [batch_size, cols]
        importance_topK = torch.softmax(importance_topK, dim=1) # [batch_size, cols]
        # importance_topK = torch.stack([importance_topK.clone() for _ in range(self.number_of_columns)], dim=0) # [cols, batch_size, cols]
        
        extractor.update(feature_importance.sum(dim=0)/len(input_data))
        del feature_embedding_num, feature_embedding_cat, num_data
        del mask, feature_importance, value, indices
        
        
        processed_data = []
        processed_indices = []
        for target_col in range(self.number_of_columns):
            importance_topK_current = importance_topK.clone()# [batch_size, cols] 
            indices = importance_topK_current.T[target_col].nonzero().T[0] # selected samples' indices  
            
            if indices.shape[0] == 0:
                continue
            
            importance_topK_current = importance_topK_current[importance_topK_current.T[target_col]>0]# [????, cols]
            
            # for target column, set its importance to 0. so that it will not be fully connected graph
            # copy target column
            tmp = torch.clone(importance_topK_current[:,target_col]) # [????], save for future weighted sum
            importance_topK_current[:,target_col] = 0 # [????, cols]
            # multiply to get weighted adj
            weighted_adj = torch.matmul(importance_topK_current, importance_topK_current.T) # [batch_size, cols] * [cols, batch_size] = [batch_size, batch_size]
            # prune the diagonal
            weighted_adj = weighted_adj - torch.diag(weighted_adj.diagonal())

            # construct graph
            edge_index = weighted_adj.nonzero().T  # [2, num_edges]
            edge_wight = weighted_adj[edge_index[0], edge_index[1]] # [num_edges]
            edge_wight = torch.softmax(edge_wight, dim=0)

            
            if False:
                print('in graph', target_col, 'nodes:', len(indices), 'edges:', len(edge_wight),'ratio', len(edge_wight)/(len(indices)**2+0.000001))
            
            # print(edge_wight)
            # importance_topK_current[:,target_col] = tmp # [????, cols]
            
            features = (feature_embedding[indices]) # [????, cols*hidden_dim]
            # features = (feature_embedding.reshape(len(input_data),self.number_of_columns,-1)[indices][:,target_col,:]) # [????, hidden_dim]
            # print(features.shape)

            # construct graph 
            data = Data(x=features, edge_index=edge_index, edge_weight=edge_wight, indices=indices) 
            
            del features, edge_index, edge_wight, weighted_adj, importance_topK_current, tmp
            
            # apply GCN
            x = self.conv_GCN_input(data.x, data.edge_index, data.edge_weight)  # [???, hidden_dim]
            # x = self.conv_1_input(data.x, data.edge_index)  # [???, hidden_dim]
            x = torch.relu(x)
            x = torch.layer_norm(x, x.shape) # [???, hidden_dim]
            x = torch.nn.Dropout(p=0.5)(x)
            x = self.conv_GCN_2(x, data.edge_index, data.edge_weight)  # [???, hidden_dim]
            x = torch.relu(x)
            x = torch.layer_norm(x, x.shape)

            processed_data.append(x)
            processed_indices.append(indices)
        
        processed_data = torch.cat(processed_data, dim=0) 
        processed_indices = torch.cat(processed_indices, dim=0)
        # print(processed_indices)
        # print(processed_indices.argsort())
        # print(processed_indices[processed_indices.argsort()])
        processed_data = processed_data[processed_indices.argsort()] # ???????
        processed_data = torch.split(processed_data, self.K) # ?????????
        processed_data = torch.stack(list(processed_data), dim=0) # ???????????
        # processed_data = torch.sum(list(processed_data), dim=0) # ???????????

        # cat residual
        processed_data = torch.cat((processed_data, feature_embedding.reshape((len(input_data),self.number_of_columns,-1))), dim=1) # [batch_size, K+cols , hidden_dim]
        
        # make prediction
        prediction = self.prediction(processed_data.reshape(processed_data.shape[0],-1))
        # prediction = self.prediction(feature_embedding)
        
        
        return prediction


In [15]:
the_model = K_graph(NUM, CAT, [LABEL], cat_num).to(DEVICE)
optimizer = torch.optim.SGD(the_model.parameters(), lr=0.001)

# optimizer.step()
data_count = 5
# random pick data
indices = torch.randperm(len(x))[:data_count]
train_data = x[indices]
train_label = y[indices]

for i in range(5):
    
    optimizer.zero_grad()
    output = the_model(train_data[:data_count], epoch=200)
    loss = torch.nn.functional.cross_entropy(output, train_label[:data_count])
    loss.backward()
    # print(((the_model.feature_importance_learners.grad).abs().max(dim=1)[0]))
    optimizer.step()
    
    print('-----------------------------------------')


-----------------------------------------
-----------------------------------------
-----------------------------------------
-----------------------------------------
-----------------------------------------


In [16]:
import torchviz
plot = torchviz.make_dot(loss, params=dict(the_model.named_parameters()))
plot.render("MLP+FI", format="png")

'MLP+FI.png'

In [17]:

def train_epoch(model, optimizer, datas, batch_size, epoch):
    train_data, train_label, validation_data, validation_label = datas
    
    # slice data into batch
    train_data = torch.split(train_data, batch_size)
    train_label = torch.split(train_label, batch_size)
    validation_data = torch.split(validation_data, batch_size)
    validation_label = torch.split(validation_label, batch_size)

    # losses and metrics
    batch_loss = 0
    train_acc = MulticlassAccuracy(num_classes=2).to(DEVICE)
    train_auc = BinaryAUROC().to(DEVICE)
    valid_acc = MulticlassAccuracy(num_classes=2).to(DEVICE)
    valid_auc = BinaryAUROC().to(DEVICE)
    
    # train the model
    stepper = trange(len(train_data))
    for i in stepper:
        stepper.set_description(f'Epoch {epoch}')
        
        optimizer.zero_grad()
        output = model(train_data[i], epoch=epoch)
        loss = torch.nn.functional.cross_entropy(output, train_label[i]) * model.number_of_columns
        loss.backward()
        optimizer.step()
        batch_loss += loss.item()
        
        # metrics
        preds = output.softmax(dim=1)
        true = torch.nn.functional.one_hot(train_label[i], num_classes=2).to(DEVICE)
        train_acc.update(torch.argmax(preds, 1),true.T[1])
        train_auc.update(preds.T[0],true.T[0])
        
        # at the end of epoch, print result and validate the model
        if i == len(train_data) - 1:
            train_acc = train_acc.compute()
            train_auc = train_auc.compute()
            stepper.set_postfix({'loss': round(batch_loss/(i+1), 3), 'acc': round(train_acc.item(), 3), 'AUC': round(train_auc.item(), 3)})
            stepper.update()
        
            with torch.no_grad():
                for i in range(len(validation_data)):
                    output = model(validation_data[i], epoch=200)
                    # loss = torch.nn.functional.cross_entropy(output, validation_label[i])
                    preds = output.softmax(dim=1)
                    true = torch.nn.functional.one_hot(validation_label[i], num_classes=2).to(DEVICE)
                    valid_acc.update(torch.argmax(preds,1),true.T[1])
                    valid_auc.update(preds.T[0],true.T[0])
                stepper.set_postfix({'loss': round(batch_loss/(i+1), 3), 'acc': round(train_acc.item(), 3), 'AUC': round(train_auc.item(), 3), 'val_acc': round(valid_acc.compute().item(), 3), 'val_AUC': round(valid_auc.compute().item(), 3)})



In [18]:
def overall_train(x, y, seed=0):
    # hyperparameter
    epoch = 50
    batch_size = 1000
    seed_everything(seed)
    
    # shuffle data
    indices = torch.randperm(len(x))
    x = x[indices]
    y = y[indices]
    # slice data into train and test and validation
    train_ratio = 0.7
    validation_ratio = 0.1
    train_data = x[:int(len(x)*train_ratio)]
    train_label = y[:int(len(x)*train_ratio)]
    validation_data = x[int(len(x)*train_ratio):int(len(x)*(train_ratio+validation_ratio))]
    validation_label = y[int(len(x)*train_ratio):int(len(x)*(train_ratio+validation_ratio))]
    test_data = x[int(len(x)*(train_ratio+validation_ratio)):]
    test_label = y[int(len(x)*(train_ratio+validation_ratio)):]

    # build model and optimizer
    the_model = K_graph(NUM, CAT, [LABEL], cat_num).to(DEVICE)
    optimizer = torch.optim.SGD(the_model.parameters(), lr=0.001)
    
    # train the model
    datas = (train_data, train_label, validation_data, validation_label)
    for i in range(epoch):
        train_epoch(the_model, optimizer, datas, batch_size, epoch=i+1)
        print(extractor.get())
        extractor.reset()
    
    # test the model
    with torch.no_grad():
        test_data = torch.split(test_data, batch_size)
        test_label = torch.split(test_label, batch_size)
        for i in range(len(test_data)):
            output = the_model(test_data[i], epoch=200)
            preds = output.softmax(dim=1)
            true = torch.nn.functional.one_hot(test_label[i], num_classes=2).to(DEVICE)
            test_acc = MulticlassAccuracy(num_classes=2).to(DEVICE)
            test_auc = BinaryAUROC().to(DEVICE)
            test_acc.update(torch.argmax(preds,1),true.T[1])
            test_auc.update(preds.T[0],true.T[0])

        print('test_acc:', test_acc.compute().item())
        print('test_auc:', test_auc.compute().item())
        print('-----------------------------------------')

In [19]:
seed_set = [9,90,900,9000,90000]
overall_train(x, y, seed=9)

Epoch 1: 100%|██████████| 35/35 [00:04<00:00,  8.40it/s, loss=43.4, acc=0.78, AUC=0.802, val_acc=0.824, val_AUC=0.873]


tensor([-0.6158,  0.4862, -0.3629, -0.5126,  0.2436,  0.2867, -0.3363, -0.3433,
         0.0320, -0.0995,  0.0014,  0.3815,  0.9984, -0.1594], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 2: 100%|██████████| 35/35 [00:04<00:00,  8.71it/s, loss=34.3, acc=0.835, AUC=0.884, val_acc=0.831, val_AUC=0.893]


tensor([-0.8969,  0.4658, -0.7856, -0.6236,  0.2363,  0.2527, -0.2119, -0.3694,
        -0.0438, -0.0415,  0.0961,  0.6376,  1.1630,  0.1212], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 3: 100%|██████████| 35/35 [00:03<00:00,  9.18it/s, loss=32.9, acc=0.844, AUC=0.894, val_acc=0.843, val_AUC=0.896]


tensor([-0.9682,  0.4335, -0.9045, -0.4166,  0.3172,  0.1491, -0.1696, -0.3776,
        -0.0901, -0.0327,  0.1213,  0.5759,  1.1719,  0.1905], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 4: 100%|██████████| 35/35 [00:03<00:00,  8.81it/s, loss=32.2, acc=0.846, AUC=0.899, val_acc=0.845, val_AUC=0.899]


tensor([-1.0442,  0.4291, -0.9681, -0.3003,  0.3534,  0.1060, -0.1519, -0.3845,
        -0.1191, -0.0416,  0.1807,  0.5464,  1.1755,  0.2186], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 5: 100%|██████████| 35/35 [00:03<00:00,  9.05it/s, loss=31.8, acc=0.851, AUC=0.902, val_acc=0.839, val_AUC=0.901]


tensor([-1.0865,  0.4205, -1.0094, -0.2468,  0.3537,  0.0775, -0.1309, -0.3836,
        -0.1009, -0.0452,  0.2285,  0.5215,  1.1726,  0.2289], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 6: 100%|██████████| 35/35 [00:03<00:00,  9.05it/s, loss=31.4, acc=0.852, AUC=0.904, val_acc=0.84, val_AUC=0.9]


tensor([-1.0903,  0.4269, -1.0458, -0.1562,  0.3534,  0.0542, -0.1180, -0.3705,
        -0.1130, -0.0452,  0.2465,  0.4706,  1.1678,  0.2197], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 7: 100%|██████████| 35/35 [00:03<00:00,  9.02it/s, loss=31, acc=0.853, AUC=0.907, val_acc=0.841, val_AUC=0.902]


tensor([-1.1039,  0.4377, -1.0676, -0.0884,  0.3922,  0.0096, -0.1211, -0.3630,
        -0.1190, -0.0506,  0.2609,  0.4651,  1.1306,  0.2175], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 8: 100%|██████████| 35/35 [00:03<00:00,  9.00it/s, loss=31, acc=0.853, AUC=0.907, val_acc=0.846, val_AUC=0.905]


tensor([-1.1277,  0.4405, -1.0762, -0.0086,  0.4018, -0.0081, -0.1252, -0.3633,
        -0.1238, -0.0412,  0.2644,  0.4550,  1.1041,  0.2082], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 9: 100%|██████████| 35/35 [00:03<00:00,  9.02it/s, loss=30.9, acc=0.853, AUC=0.907, val_acc=0.848, val_AUC=0.905]


tensor([-1.1453,  0.4240, -1.0866,  0.0724,  0.3756, -0.0113, -0.1276, -0.3650,
        -0.1102, -0.0330,  0.2739,  0.4305,  1.0754,  0.2271], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 10: 100%|██████████| 35/35 [00:03<00:00,  9.02it/s, loss=30.7, acc=0.856, AUC=0.909, val_acc=0.844, val_AUC=0.906]


tensor([-1.1458,  0.4296, -1.0964,  0.1361,  0.3657, -0.0170, -0.1409, -0.3617,
        -0.1195, -0.0320,  0.2788,  0.4068,  1.0608,  0.2354], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 11: 100%|██████████| 35/35 [00:03<00:00,  8.99it/s, loss=30.6, acc=0.856, AUC=0.909, val_acc=0.842, val_AUC=0.907]


tensor([-1.1448,  0.4272, -1.1043,  0.1865,  0.3421, -0.0341, -0.1525, -0.3629,
        -0.1313, -0.0208,  0.2730,  0.4086,  1.0634,  0.2500], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 12: 100%|██████████| 35/35 [00:03<00:00,  8.98it/s, loss=30.5, acc=0.856, AUC=0.91, val_acc=0.846, val_AUC=0.907]


tensor([-1.1515,  0.4046, -1.1287,  0.2105,  0.3363, -0.0411, -0.1407, -0.3556,
        -0.1289, -0.0176,  0.2835,  0.4066,  1.0606,  0.2621], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 13: 100%|██████████| 35/35 [00:03<00:00,  8.96it/s, loss=30.6, acc=0.856, AUC=0.91, val_acc=0.846, val_AUC=0.906]


tensor([-1.1495,  0.3901, -1.1323,  0.2453,  0.3434, -0.0477, -0.1427, -0.3595,
        -0.1386, -0.0245,  0.2859,  0.4102,  1.0651,  0.2548], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 14: 100%|██████████| 35/35 [00:03<00:00,  8.93it/s, loss=30.4, acc=0.856, AUC=0.911, val_acc=0.845, val_AUC=0.908]


tensor([-1.1648,  0.3909, -1.1483,  0.2859,  0.3548, -0.0643, -0.1242, -0.3721,
        -0.1376, -0.0241,  0.2873,  0.4327,  1.0405,  0.2432], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 15: 100%|██████████| 35/35 [00:03<00:00,  8.92it/s, loss=30.2, acc=0.857, AUC=0.912, val_acc=0.848, val_AUC=0.908]


tensor([-1.1699,  0.3921, -1.1505,  0.3263,  0.3547, -0.0785, -0.1226, -0.3715,
        -0.1473, -0.0199,  0.2809,  0.4286,  1.0221,  0.2556], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 16: 100%|██████████| 35/35 [00:03<00:00,  8.81it/s, loss=30.2, acc=0.858, AUC=0.912, val_acc=0.846, val_AUC=0.906]


tensor([-1.1683,  0.4005, -1.1619,  0.3542,  0.3476, -0.0877, -0.1155, -0.3805,
        -0.1436, -0.0208,  0.2696,  0.4317,  1.0034,  0.2713], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 17: 100%|██████████| 35/35 [00:03<00:00,  8.90it/s, loss=30.2, acc=0.857, AUC=0.912, val_acc=0.845, val_AUC=0.906]


tensor([-1.1723,  0.4228, -1.1593,  0.3655,  0.3470, -0.0798, -0.1318, -0.3769,
        -0.1495, -0.0167,  0.2597,  0.4220,  0.9997,  0.2697], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 18: 100%|██████████| 35/35 [00:03<00:00,  8.88it/s, loss=30.2, acc=0.858, AUC=0.912, val_acc=0.848, val_AUC=0.908]


tensor([-1.1789,  0.4127, -1.1594,  0.4075,  0.3418, -0.0669, -0.1374, -0.3809,
        -0.1447, -0.0162,  0.2533,  0.4187,  1.0137,  0.2368], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 19: 100%|██████████| 35/35 [00:03<00:00,  8.87it/s, loss=29.9, acc=0.858, AUC=0.913, val_acc=0.843, val_AUC=0.908]


tensor([-1.1833,  0.4117, -1.1693,  0.4176,  0.3356, -0.0591, -0.1359, -0.3836,
        -0.1347, -0.0198,  0.2520,  0.4155,  1.0184,  0.2349], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 20: 100%|██████████| 35/35 [00:03<00:00,  8.86it/s, loss=30, acc=0.857, AUC=0.913, val_acc=0.847, val_AUC=0.909]


tensor([-1.1821,  0.4153, -1.1741,  0.4390,  0.3306, -0.0493, -0.1469, -0.3996,
        -0.1200, -0.0239,  0.2603,  0.4146,  1.0015,  0.2346], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 21: 100%|██████████| 35/35 [00:03<00:00,  8.86it/s, loss=29.9, acc=0.859, AUC=0.913, val_acc=0.846, val_AUC=0.908]


tensor([-1.1849,  0.4171, -1.1683,  0.4758,  0.3224, -0.0484, -0.1567, -0.4032,
        -0.1102, -0.0307,  0.2639,  0.4063,  0.9809,  0.2359], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 22: 100%|██████████| 35/35 [00:03<00:00,  8.83it/s, loss=29.9, acc=0.86, AUC=0.913, val_acc=0.852, val_AUC=0.909]


tensor([-1.1979,  0.4235, -1.1745,  0.4800,  0.3259, -0.0350, -0.1723, -0.4130,
        -0.0797, -0.0316,  0.2688,  0.3994,  0.9798,  0.2266], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 23: 100%|██████████| 35/35 [00:03<00:00,  8.83it/s, loss=29.8, acc=0.858, AUC=0.914, val_acc=0.847, val_AUC=0.908]


tensor([-1.1913,  0.4137, -1.1691,  0.4691,  0.3240, -0.0457, -0.1554, -0.4147,
        -0.0948, -0.0306,  0.2598,  0.3911,  0.9975,  0.2464], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 24: 100%|██████████| 35/35 [00:03<00:00,  8.84it/s, loss=29.8, acc=0.86, AUC=0.914, val_acc=0.849, val_AUC=0.909]


tensor([-1.2018,  0.4094, -1.1601,  0.4749,  0.3122, -0.0478, -0.1607, -0.4134,
        -0.0823, -0.0312,  0.2645,  0.3931,  0.9877,  0.2557], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 25: 100%|██████████| 35/35 [00:04<00:00,  8.73it/s, loss=29.9, acc=0.859, AUC=0.914, val_acc=0.848, val_AUC=0.908]


tensor([-1.1896,  0.4071, -1.1402,  0.4873,  0.3023, -0.0411, -0.1922, -0.4130,
        -0.0878, -0.0315,  0.2533,  0.3945,  0.9935,  0.2573], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 26: 100%|██████████| 35/35 [00:03<00:00,  8.83it/s, loss=29.8, acc=0.86, AUC=0.914, val_acc=0.848, val_AUC=0.909]


tensor([-1.1939,  0.4144, -1.1435,  0.5060,  0.2860, -0.0373, -0.1771, -0.4162,
        -0.0758, -0.0350,  0.2620,  0.3815,  0.9877,  0.2412], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 27: 100%|██████████| 35/35 [00:03<00:00,  8.83it/s, loss=29.7, acc=0.86, AUC=0.914, val_acc=0.847, val_AUC=0.908]


tensor([-1.2031,  0.4157, -1.1363,  0.5145,  0.2767, -0.0329, -0.1725, -0.4277,
        -0.0726, -0.0387,  0.2722,  0.3786,  0.9789,  0.2473], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 28: 100%|██████████| 35/35 [00:03<00:00,  8.83it/s, loss=29.7, acc=0.86, AUC=0.914, val_acc=0.849, val_AUC=0.909]


tensor([-1.2019,  0.4149, -1.1322,  0.5150,  0.2813, -0.0299, -0.1825, -0.4224,
        -0.0833, -0.0439,  0.2704,  0.3902,  0.9659,  0.2582], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 29: 100%|██████████| 35/35 [00:03<00:00,  8.82it/s, loss=29.7, acc=0.859, AUC=0.915, val_acc=0.85, val_AUC=0.911]


tensor([-1.2037,  0.4085, -1.1272,  0.5316,  0.2873, -0.0347, -0.1998, -0.4233,
        -0.0820, -0.0440,  0.2626,  0.3824,  0.9568,  0.2855], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 30: 100%|██████████| 35/35 [00:03<00:00,  8.81it/s, loss=29.7, acc=0.86, AUC=0.915, val_acc=0.854, val_AUC=0.91]


tensor([-1.2088,  0.4188, -1.1119,  0.5294,  0.2807, -0.0327, -0.2212, -0.4182,
        -0.0913, -0.0432,  0.2620,  0.3906,  0.9412,  0.3047], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 31: 100%|██████████| 35/35 [00:03<00:00,  8.79it/s, loss=29.6, acc=0.859, AUC=0.915, val_acc=0.844, val_AUC=0.91]


tensor([-1.2004,  0.4160, -1.1035,  0.5517,  0.2679, -0.0350, -0.2417, -0.4187,
        -0.0926, -0.0414,  0.2657,  0.3908,  0.9371,  0.3041], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 32: 100%|██████████| 35/35 [00:03<00:00,  8.78it/s, loss=29.7, acc=0.86, AUC=0.915, val_acc=0.848, val_AUC=0.909]


tensor([-1.2078,  0.4232, -1.0935,  0.5624,  0.2636, -0.0404, -0.2416, -0.4252,
        -0.0960, -0.0448,  0.2754,  0.3837,  0.9346,  0.3065], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 33: 100%|██████████| 35/35 [00:03<00:00,  8.77it/s, loss=29.6, acc=0.861, AUC=0.915, val_acc=0.848, val_AUC=0.911]


tensor([-1.2167,  0.4206, -1.1041,  0.5710,  0.2681, -0.0364, -0.2605, -0.4228,
        -0.0782, -0.0477,  0.2801,  0.3894,  0.9337,  0.3036], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 34: 100%|██████████| 35/35 [00:03<00:00,  8.77it/s, loss=29.5, acc=0.86, AUC=0.916, val_acc=0.849, val_AUC=0.91]


tensor([-1.2082,  0.4364, -1.1070,  0.5737,  0.2726, -0.0508, -0.2726, -0.4234,
        -0.0787, -0.0401,  0.2930,  0.3911,  0.9284,  0.2857], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 35: 100%|██████████| 35/35 [00:04<00:00,  8.74it/s, loss=29.4, acc=0.862, AUC=0.916, val_acc=0.849, val_AUC=0.911]


tensor([-1.1978,  0.4475, -1.1051,  0.5816,  0.2674, -0.0521, -0.2853, -0.4212,
        -0.0691, -0.0436,  0.2849,  0.3927,  0.9265,  0.2736], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 36: 100%|██████████| 35/35 [00:04<00:00,  8.73it/s, loss=29.4, acc=0.86, AUC=0.916, val_acc=0.852, val_AUC=0.911]


tensor([-1.1943,  0.4423, -1.1036,  0.5806,  0.2782, -0.0454, -0.3097, -0.4236,
        -0.0673, -0.0449,  0.2780,  0.4134,  0.9296,  0.2667], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 37: 100%|██████████| 35/35 [00:04<00:00,  8.72it/s, loss=29.4, acc=0.862, AUC=0.916, val_acc=0.847, val_AUC=0.908]


tensor([-1.2052,  0.4468, -1.1026,  0.5900,  0.2704, -0.0425, -0.3095, -0.4292,
        -0.0517, -0.0477,  0.2863,  0.4191,  0.9151,  0.2608], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 38: 100%|██████████| 35/35 [00:04<00:00,  8.72it/s, loss=29.4, acc=0.861, AUC=0.916, val_acc=0.846, val_AUC=0.909]


tensor([-1.2096,  0.4585, -1.0989,  0.5963,  0.2667, -0.0359, -0.3063, -0.4268,
        -0.0487, -0.0464,  0.2813,  0.4085,  0.9092,  0.2522], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 39: 100%|██████████| 35/35 [00:04<00:00,  8.73it/s, loss=29.3, acc=0.86, AUC=0.917, val_acc=0.851, val_AUC=0.911]


tensor([-1.2199,  0.4499, -1.0993,  0.5960,  0.2775, -0.0346, -0.3100, -0.4294,
        -0.0457, -0.0454,  0.2828,  0.4104,  0.9097,  0.2582], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 40: 100%|██████████| 35/35 [00:04<00:00,  8.72it/s, loss=29.3, acc=0.862, AUC=0.917, val_acc=0.852, val_AUC=0.91]


tensor([-1.2150,  0.4558, -1.1030,  0.5853,  0.2873, -0.0344, -0.3198, -0.4323,
        -0.0430, -0.0524,  0.2843,  0.4060,  0.9104,  0.2708], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 41: 100%|██████████| 35/35 [00:04<00:00,  8.71it/s, loss=29.3, acc=0.861, AUC=0.917, val_acc=0.849, val_AUC=0.91]


tensor([-1.2202,  0.4512, -1.1100,  0.5786,  0.2907, -0.0395, -0.3102, -0.4330,
        -0.0360, -0.0529,  0.2876,  0.4102,  0.9091,  0.2744], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 42: 100%|██████████| 35/35 [00:04<00:00,  8.61it/s, loss=29.2, acc=0.86, AUC=0.917, val_acc=0.849, val_AUC=0.91]


tensor([-1.2207,  0.4612, -1.0952,  0.5925,  0.2931, -0.0456, -0.3158, -0.4332,
        -0.0483, -0.0567,  0.2834,  0.3995,  0.9119,  0.2740], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 43: 100%|██████████| 35/35 [00:04<00:00,  8.70it/s, loss=29.3, acc=0.86, AUC=0.917, val_acc=0.849, val_AUC=0.909]


tensor([-1.2259,  0.4592, -1.0840,  0.5915,  0.2992, -0.0510, -0.3248, -0.4399,
        -0.0441, -0.0540,  0.2807,  0.4088,  0.9090,  0.2752], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 44: 100%|██████████| 35/35 [00:04<00:00,  8.69it/s, loss=29.3, acc=0.861, AUC=0.917, val_acc=0.85, val_AUC=0.911]


tensor([-1.2185,  0.4583, -1.0799,  0.5862,  0.2964, -0.0539, -0.3419, -0.4373,
        -0.0476, -0.0548,  0.2771,  0.4135,  0.9193,  0.2831], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 45: 100%|██████████| 35/35 [00:04<00:00,  8.69it/s, loss=29.3, acc=0.861, AUC=0.917, val_acc=0.847, val_AUC=0.911]


tensor([-1.2238,  0.4581, -1.0797,  0.5832,  0.2901, -0.0619, -0.3377, -0.4371,
        -0.0414, -0.0557,  0.2777,  0.4168,  0.9198,  0.2917], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 46: 100%|██████████| 35/35 [00:04<00:00,  8.68it/s, loss=29.2, acc=0.861, AUC=0.918, val_acc=0.847, val_AUC=0.91]


tensor([-1.2205,  0.4493, -1.0731,  0.5964,  0.2811, -0.0743, -0.3351, -0.4437,
        -0.0370, -0.0611,  0.2774,  0.4024,  0.9320,  0.3061], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 47: 100%|██████████| 35/35 [00:04<00:00,  8.69it/s, loss=29.1, acc=0.862, AUC=0.918, val_acc=0.847, val_AUC=0.91]


tensor([-1.2146,  0.4569, -1.0602,  0.6150,  0.2879, -0.0750, -0.3490, -0.4417,
        -0.0427, -0.0628,  0.2668,  0.3960,  0.9147,  0.3086], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 48: 100%|██████████| 35/35 [00:04<00:00,  8.68it/s, loss=29.2, acc=0.863, AUC=0.918, val_acc=0.85, val_AUC=0.911]


tensor([-1.2143,  0.4476, -1.0602,  0.6178,  0.2960, -0.0762, -0.3455, -0.4459,
        -0.0381, -0.0652,  0.2724,  0.3961,  0.9107,  0.3047], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 49: 100%|██████████| 35/35 [00:04<00:00,  8.68it/s, loss=29, acc=0.862, AUC=0.918, val_acc=0.847, val_AUC=0.91]


tensor([-1.2245,  0.4541, -1.0634,  0.6016,  0.3044, -0.0656, -0.3535, -0.4488,
        -0.0321, -0.0665,  0.2744,  0.4052,  0.9055,  0.3092], device='cuda:0',
       grad_fn=<DivBackward0>)


Epoch 50: 100%|██████████| 35/35 [00:04<00:00,  8.68it/s, loss=29, acc=0.863, AUC=0.919, val_acc=0.85, val_AUC=0.909]


tensor([-1.2277,  0.4556, -1.0590,  0.6082,  0.2961, -0.0565, -0.3593, -0.4456,
        -0.0222, -0.0700,  0.2779,  0.4021,  0.8951,  0.3054], device='cuda:0',
       grad_fn=<DivBackward0>)
test_acc: 0.8543562889099121
test_auc: 0.9134344345472996
-----------------------------------------


In [20]:
sum([0.7051671743392944,
     0.7082067131996155,
     0.7051671743392944,
     0.7264437675476074,
     0.7325227856636047
     ])/5

0.7155015230178833

In [21]:
sum([0.7808573540280858,
     0.7863905325443787,
     0.7797353553374963,
     0.8075237670825907,
     0.8157019294743846
     ])/5

0.7940417876933872