# 前置設定

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
# import argparse
from torch.utils.data import DataLoader

from drive.MyDrive.iirlab.topic.LSAN.LSANori.trainer import *
from drive.MyDrive.iirlab.topic.LSAN.LSANori.transformer import *
from drive.MyDrive.iirlab.topic.LSAN.LSANori.dataset import *
from drive.MyDrive.iirlab.topic.LSAN.LSANori.LSAN import *

import pickle

In [3]:
time_interval = '12_with_gap_6/18_39'
with open('/content/drive/MyDrive/iirlab/topic/trimmed_pickle/' + time_interval + '/final_train.pickle','rb') as f:
  train_data = pickle.load(f)
  train_visit = train_data[0]
  train_label = train_data[1]
  train_value = train_data[3]
  train_dataset = Dataset(train_visit, train_label, train_value)
  

with open('/content/drive/MyDrive/iirlab/topic/trimmed_pickle/' + time_interval + '/final_val.pickle','rb') as f:
  validate_data = pickle.load(f)
  validate_visit = validate_data[0]
  validate_label = validate_data[1]
  validate_value = validate_data[3]
  validate_dataset = Dataset(validate_visit, validate_label, validate_value)
  

with open('/content/drive/MyDrive/iirlab/topic/trimmed_pickle/' + time_interval + '/final_test.pickle','rb') as f:
  test_data = pickle.load(f)
  test_visit = test_data[0]
  test_label = test_data[1]
  test_value = test_data[3]
  test_dataset = Dataset(test_visit, test_label, test_value)
  

# 參數

In [4]:
output_path = '/content/drive/MyDrive/iirlab/topic/LSAN/LSANori/output/result.txt'
hidden = 256 
layers = 8
attn_heads = 8 
dropout = 0.1
batch_size = 32
epochs = 12
num_workers = 2
with_cuda = 1
lr = 0.0001
adam_weight_decay = 0.01 ###
adam_beta1 = 0.9
adam_beta2 = 0.999
saving_path = '/content/drive/MyDrive/iirlab/topic/LSAN/LSANori/model_parameters/models.pth'

In [5]:
def process():
  # print("Creating Dataloader")
  train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=train_dataset.collate_fn,shuffle=False)
  validate_data_loader = DataLoader(validate_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=test_dataset.collate_fn,shuffle=False)
  test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=test_dataset.collate_fn,shuffle=False)




  embedding_dim = hidden
  code2idx_file = '/content/drive/MyDrive/iirlab/topic/LSAN/LSANori/code2idx_new.pickle'



  with open(code2idx_file, 'rb') as f:
      code2idx = pickle.load(f)
      #  list out keys and values separately
      diagnosis_code_list = list(code2idx.keys())
      dignosis_index_list = list(code2idx.values())
      # print(len(dignosis_index_list))

  # model = LSAN(len(dignosis_index_list), embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads,
  #                     transformer_dropout = dropout, transformer_layers = layers)
  model = LSAN(51, embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads,
                      transformer_dropout = dropout, transformer_layers = layers)

  # defining trainer
  LSAN_train = LSAN_trainer(model, train_dataloader=train_data_loader, validate_dataloader=validate_data_loader, test_dataloader=test_data_loader, with_cuda=with_cuda, lr=lr, output_dir=output_path)

  # training process
  for epoch in range(epochs):
      model.train()
      LSAN_train.train(epoch)
  
      # Validation
      if validate_data_loader is not None:
          model.eval()
          LSAN_train.validate(epoch)
        
  torch.save(model.state_dict(), saving_path)

  # Test after the end of training
  # test_model = LSAN(len(dignosis_index_list), embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads, transformer_dropout = dropout, transformer_layers = layers) 
  test_model = LSAN(51, embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads, transformer_dropout = dropout, transformer_layers = layers) 


  test_model.load_state_dict(torch.load(saving_path))
  test_model.eval()

  LSAN_test = LSAN_trainer(test_model, train_dataloader=train_data_loader, validate_dataloader=validate_data_loader, test_dataloader=test_data_loader, with_cuda=with_cuda, lr=lr, output_dir=output_path)

  test_loss = LSAN_test.test(epoch, False)
  return model, test_loss

# 暫定版


In [6]:
for num in range(20):
  model, loss = process()
  loss = round(loss, 3) * 100
  saving_path = '/content/drive/MyDrive/iirlab/topic/LSAN/LSANori/model_parameters/' + str(num) + '_' + str(loss) + '_models.pth'
  torch.save(model.state_dict(), saving_path)

Done for epoch 0
0.7762383983916595
Validation epoch:0 auc:0.7661040770026357 precision:0.7735849056603774 recall:0.9288025889967637
Validation loss : 0.5312646941134804
Done for epoch 1
0.596820535028682
Validation epoch:1 auc:0.7871723439984876 precision:0.7977044476327116 recall:0.8996763754045307
Validation loss : 0.5126597415982631
Done for epoch 2
0.578893859095934
Validation epoch:2 auc:0.8018146887754535 precision:0.7972779369627507 recall:0.9004854368932039
Validation loss : 0.4966082907559579
Done for epoch 3
0.5650881209543773
Validation epoch:3 auc:0.8089780802722448 precision:0.8048069919883467 recall:0.8940129449838188
Validation loss : 0.4899330034590604
Done for epoch 4
0.5562887409654986
Validation epoch:4 auc:0.8166363616143417 precision:0.8076358296622613 recall:0.889967637540453
Validation loss : 0.4830516953217356
Done for epoch 5
0.5470481268247637
Validation epoch:5 auc:0.8208276337592721 precision:0.8101545253863135 recall:0.8907766990291263
Validation loss : 0.

KeyboardInterrupt: ignored

# 儲存模型

# 實驗5
將hidden attenhead layer同時調高

In [None]:
output_path = '/content/drive/MyDrive/iirlab/topic/LSAN/LSANori/output/result.txt'
hidden = 256 
layers = 8
attn_heads = 8 
dropout = 0.1
batch_size = 32
epochs = 200
num_workers = 2
with_cuda = 1
lr = 0.00005
adam_weight_decay = 0.01 ###
adam_beta1 = 0.9
adam_beta2 = 0.999
saving_path = '/content/drive/MyDrive/iirlab/topic/LSAN/LSANori/model_parameters/models.pth'

def process():
  # print("Creating Dataloader")
  train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=train_dataset.collate_fn,shuffle=False)
  validate_data_loader = DataLoader(validate_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=test_dataset.collate_fn,shuffle=False)
  test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=num_workers, collate_fn=test_dataset.collate_fn,shuffle=False)




  embedding_dim = hidden
  code2idx_file = '/content/drive/MyDrive/iirlab/topic/LSAN/LSANori/code2idx_new.pickle'



  with open(code2idx_file, 'rb') as f:
      code2idx = pickle.load(f)
      #  list out keys and values separately
      diagnosis_code_list = list(code2idx.keys())
      dignosis_index_list = list(code2idx.values())
      # print(len(dignosis_index_list))

  # model = LSAN(len(dignosis_index_list), embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads,
  #                     transformer_dropout = dropout, transformer_layers = layers)
  model = LSAN(51, embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads,
                      transformer_dropout = dropout, transformer_layers = layers)

  # defining trainer
  LSAN_train = LSAN_trainer(model, train_dataloader=train_data_loader, validate_dataloader=validate_data_loader, test_dataloader=test_data_loader, with_cuda=with_cuda, lr=lr, output_dir=output_path)

  # training process
  for epoch in range(epochs):
      model.train()
      LSAN_train.train(epoch)
  
      # Validation
      if validate_data_loader is not None:
          model.eval()
          LSAN_train.validate(epoch)
        
  torch.save(model.state_dict(), saving_path)

  # Test after the end of training
  # test_model = LSAN(len(dignosis_index_list), embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads, transformer_dropout = dropout, transformer_layers = layers) 
  test_model = LSAN(51, embedding_dim, transformer_hidden = hidden, attn_heads = attn_heads, transformer_dropout = dropout, transformer_layers = layers) 


  test_model.load_state_dict(torch.load(saving_path))
  test_model.eval()

  LSAN_test = LSAN_trainer(test_model, train_dataloader=train_data_loader, validate_dataloader=validate_data_loader, test_dataloader=test_data_loader, with_cuda=with_cuda, lr=lr, output_dir=output_path)

  LSAN_test.test(epoch)

In [None]:
hidden = 2048 
layers = 64
attn_heads = 64 
epochs = 100
process()

In [None]:
hidden = 2048 
layers = 64
attn_heads = 64 
epochs = 200
process()

In [None]:
hidden = 4096
layers = 64
attn_heads = 64 
epochs = 100
process()

In [None]:
hidden = 4096 
layers = 64
attn_heads = 64 
epochs = 200
process()

# 一次跑很多種

In [None]:
from itertools import product
parameters = dict(
    lr_value = [0.0005, 0.0001],
    batch_size_value = [32, 64],
    hidden_value = [128, 256, 512],
    #awd_value = [0.01, 0.005, 0.001],
    layers_value = [2, 4, 8],
)

param_values = [v for v in parameters.values()]

for lr_value, batch_size_value, hidden_value, layers_value in product(*param_values):
  lr = lr_value
  batch_size = batch_size_value
  hidden = hidden_value
  #adam_weight_decay = awd_value
  layers = layers_value
  comment = f' lr = {lr} batch_size = {batch_size} hidden = {hidden} layers = {layers}'
  print(comment)
  process()

 lr = 0.0005 batch_size = 32 hidden = 128 layers = 2
Testing epoch:19 auc:0.7757754527640222 precision:0.827996340347667 recall:0.7375713121434393 F1:0.7801724137931034
 lr = 0.0005 batch_size = 32 hidden = 128 layers = 4
Testing epoch:19 auc:0.7830828358548563 precision:0.8160919540229885 recall:0.752241238793806 F1:0.7828668363019509
 lr = 0.0005 batch_size = 32 hidden = 128 layers = 8
Testing epoch:19 auc:0.772555460403747 precision:0.8151875571820677 recall:0.726161369193154 F1:0.7681034482758621
 lr = 0.0005 batch_size = 32 hidden = 256 layers = 2
Testing epoch:19 auc:0.7773268409461676 precision:0.8090366581415175 recall:0.7734311328443357 F1:0.7908333333333334
 lr = 0.0005 batch_size = 32 hidden = 256 layers = 4
Testing epoch:19 auc:0.7764952968805376 precision:0.8031173092698933 recall:0.797881010594947 F1:0.8004905968928863
 lr = 0.0005 batch_size = 32 hidden = 256 layers = 8
Testing epoch:19 auc:0.7771324002940053 precision:0.82277318640955 recall:0.7302363488182559 F1:0.7737

In [None]:
from itertools import product
parameters = dict(
    lr_value = [0.0005, 0.0001],
    batch_size_value = [32, 64],
    hidden_value = [128, 256, 512],
    #awd_value = [0.01, 0.005, 0.001],
    layers_value = [2, 4, 8],
)

param_values = [v for v in parameters.values()]

for lr_value, batch_size_value, hidden_value, layers_value in product(*param_values):
  lr = lr_value
  batch_size = batch_size_value
  hidden = hidden_value
  #adam_weight_decay = awd_value
  layers = layers_value
  comment = f' lr = {lr} batch_size = {batch_size} hidden = {hidden} layers = {layers}'
  print(comment)
  process()

 lr = 0.0005 batch_size = 32 hidden = 128 layers = 2
Testing epoch:19 auc:0.7807040406422333 precision:0.8879456706281834 recall:0.4262428687856561 F1:0.5759911894273128
 lr = 0.0005 batch_size = 32 hidden = 128 layers = 4
Testing epoch:19 auc:0.7782549158320198 precision:0.8927973199329984 recall:0.4343928280358598 F1:0.5844298245614036
 lr = 0.0005 batch_size = 32 hidden = 128 layers = 8
Testing epoch:19 auc:0.7870240513433644 precision:0.9034267912772586 recall:0.47269763651181745 F1:0.6206527554842162
 lr = 0.0005 batch_size = 32 hidden = 256 layers = 2
Testing epoch:19 auc:0.7820913264300007 precision:0.8978930307941653 recall:0.4515077424612877 F1:0.6008676789587853
 lr = 0.0005 batch_size = 32 hidden = 256 layers = 4
Testing epoch:19 auc:0.7871178241401517 precision:0.8956521739130435 recall:0.5036674816625917 F1:0.6447574334898278
 lr = 0.0005 batch_size = 32 hidden = 256 layers = 8
Testing epoch:19 auc:0.7657059092031105 precision:0.8757861635220126 recall:0.4539527302363488 F

KeyboardInterrupt: ignored