In [None]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import random
import math
import copy
import matplotlib.pyplot as plt

from torch.utils.data import DataLoader, Dataset, ConcatDataset
from torch.utils.data.dataset import Subset
from sklearn.model_selection import KFold
from torch.nn.utils.rnn import pad_sequence

from torch.utils.tensorboard import SummaryWriter

from rdkit import Chem
from rdkit.Chem import AllChem

from rdkit import RDLogger
RDLogger.DisableLog('rdApp.*')

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# read symbol dictionary
the symbol dictionary is saved as 'OdorCode-40 Symbol Dictionary' by running 'datasetMake_pretrain.ipynb'

In [None]:
LIMIT_SMILES_LENGTH = 100

import pickle

f = open('CHEMBL/OdorCode-40 Symbol Dictionary', 'rb')
[symbol_ID, ID_symbol, sID] = pickle.load(f)
f.close()

PAD_ID = 0
CLS_ID = 1
BOS_ID = 2
EOS_ID = 3
MSK_ID = 4

# some functions
(1) positive, negative, seek_TP, seek_TN: functions used to compute F1

(2) smiles_str2smiles: translate a SMILES to a list of symbols ID

(3) smiles2smiles_str: translate a list of symbols ID to a SMILES

In [None]:
Threshold = torch.tensor([0.5]).to(device) # 0か1かを分ける閾値を作成

def positive(proba):
    a = (proba >= Threshold) # 閾値未満でFalse、以上で True に変換
    return a

def negative(proba):
    a = (proba < Threshold) # 閾値未満で True、以上で False に変換
    return a

def seek_TP(pred_y, y):
  tp = (positive(pred_y) & positive(y)).int()
  return tp

def seek_TN(pred_y, y):
  tn = (negative(pred_y) & negative(y)).int()
  return tn


#----------------------------------#
#           smiles_str2smiles      #
#----------------------------------#
# smiles を記号（1文字記号だけでなく，2文字，3文字記号も含む）のＩＤのリストに変換

max_length_symbol = max([len(s) for s in ID_symbol])

def smiles_str2smiles(smiles_str, flag=False):
  "smiles を記号の列に変換（長さ2のNaなどの元素記号も1つのindexに変換）"

  smiles = []
  i=0
  while i < len(smiles_str):
    NotFindID = True
    for j in range(max_length_symbol,0,-1) :
      if i+j <= len(smiles_str) and smiles_str[i:i+j] in symbol_ID: # j長さの文字記号として辞書登録済み
        smiles.append(symbol_ID[smiles_str[i:i+j]])
        i += j-1 # while ブロックの最後で i++ されるが，j文字なので余分に i+=j-1
        NotFindID = False
        break
    if NotFindID:
      print('something wrong on converting smiles_str to smiles')
      break
    i += 1
  return smiles

#----------------------------------#
#           smiles2smiles_str      #
#----------------------------------#
def smiles2smiles_str(smiles):
  smiles_str = ''
  for id in smiles:
    smiles_str += ID_symbol[id]
  return smiles_str


# process the data set of odor descriptors
dataset used is same to our previous study, see paper for details.

In [None]:
F_odor     = 'webScrapping/tgsc_odorant_1020.txt'           # 匂いのある物質の SMILES のファイル（実は１つだけ odorless）
F_odorless = 'webScrapping/tgsc_odorless_1020.txt'  # 匂いのない物質の SMILES のファイル

LIMIT_FREQ = 49   # 正例の数の下限

#-----------------------------------------------------------------------------------------
# LIMIT_SMILES_LENGTH 以下の smiles を対象に付与されている odor descriptors の頻度を求める
#----------------------------------------------------------------------------------------- 
label_ID ={}    # label(Odor Descriptor) のIDの辞書
freq_label = {} # label の頻度情報の辞書

with open(F_odor,'r') as inF:
  while True:
    line = inF.readline()
    if line == '':
      break
    # x = line.split("\t")
    x = line.split()
    smiles_str = x[1]

    # smiles_str が LMIT_SMILES_LENGTH より長いか mol に変換できなければパス
    if len(smiles_str) > LIMIT_SMILES_LENGTH :
      continue
    else:
      mol = Chem.MolFromSmiles(smiles_str)
      if mol == None:
        continue
      else:
        # odor_descriptors = x[2].split()
        odor_descriptors = x[2:]
        for odd in odor_descriptors:
          if odd == 'odorless':
            continue
          if odd in freq_label:
            freq_label[odd] += 1
          else:
            freq_label[odd] = 1

#----------------------------------------------
# LIMIT_FREQ 以上の頻度のラベルだけ ID 化する
#----------------------------------------------
ID2label = []
lID = 0

freq_label_sorted = sorted(freq_label.items(), key=lambda x:x[1], reverse=True)
for label, freq in freq_label_sorted:
  if freq > LIMIT_FREQ:
    label_ID[label] = lID
    ID2label.append(label)
    lID += 1

print("number of odor descriptors: ", lID)
for label, freq in freq_label_sorted:
  if freq>49:
     print('%5d : '%freq, label)


In [None]:
#------------------------------------------------------------------------------------------------------
#   SMILESのリスト smiles_list と 
#   頻度条件を満たす odor descriptors に対する正例／負例を表すベクトルのリスト labels_vec_list の作成
#-----------------------------------------------------------------------------------------------------

# スペースで区切られた label の列(label_sequence)を multi-category 分類用のベクトル(labels_vec)に変換
def labels2vec(label_sequence):
  # label_list = label_sequence.split()
  label_list = label_sequence.copy()
  labels_vec = [0.0]*lID
  for label in label_list:
    if label == 'odorless':  # odorless は labels_vec の成分にない。つまりすべての label の負例
      continue
    if label not in label_ID:
      continue
    labels_vec[label_ID[label]] = 1.0  
  return labels_vec

smiles_list = []
labels_vec_list = []
canonical_smiles_list = []

def make_data(filename):
  "smiles_list と labels_vec_list を作成"

  with open(filename,'r') as inF:
    while True:
      line = inF.readline()
      if line == '':
        break

      x = line.split()
      smiles_str = x[1]

      # smiles_str が LMIT_SMILES_LENGTH より長いか mol に変換できなければパス
      if len(smiles_str) > LIMIT_SMILES_LENGTH :
        continue
      else:
        mol = Chem.MolFromSmiles(smiles_str)
        if mol == None:
          continue
        else:
          # smiles_str を canonical に変換
          canonical_smiles_str = Chem.MolToSmiles(mol)
          smiles_list.append(smiles_str2smiles(canonical_smiles_str))      # canonical_smiles_str をID化してリストに追加
          labels_vec_list.append(labels2vec(x[2:])) # ベクトル化した odor descriptors をリストに追加
          canonical_smiles_list.append(canonical_smiles_str)

make_data(F_odor)
make_data(F_odorless)

print(smiles_list[0])
print(labels_vec_list[0])
print(smiles_list[1])
print(labels_vec_list[1])

# read molculeNET data
files of datasets were downloaded from https://moleculenet.org/datasets-1

In [None]:
dataset_list = ['ESOL', 'FreeSolv', 'Lipo', 'BACE', 'BBBP', 'Tox21']

smiles_col = {'ESOL': -1, 'FreeSolv': 1, 'Lipo': 2, 'BACE': 0, 'BBBP': 3, 'Tox21': -1}

target_col = {'ESOL': [-2], 
            'FreeSolv': [2], 
            'Lipo': [1], 
            'BACE': [2], 
            'BBBP': [2],
            'Tox21': list(range(12))}

label_name = {
            'ESOL': ['ESOL'], 
            'FreeSolv': ['FreeSolv'], 
            'Lipo': ['Lipo'], 
            'BACE': ['BACE'], 
            'BBBP': ['BBBP'],
            'Tox21': ['NR-AR','NR-AR-LBD','NR-AhR','NR-Aromatase','NR-ER','NR-ER-LBD','NR-PPAR-gamma','SR-ARE','SR-ATAD5','SR-HSE','SR-MMP','SR-p53']
}

def read_file(which_data, filepath=None):
    if filepath is None:
        filepath = 'all_data/moleculeNET/'+which_data+'.csv'

    f = open(filepath)
    all_line = f.read().split('\n')[1:-1]
    f.close()

    smiles_list = []
    labels_vec_list = []
    lID = len(target_col[which_data])
    canonical_smiles_list = []

    label_ID = {}
    temp = 0
    for i in label_name[which_data]:
        label_ID[i] = temp
        temp += 1

    if which_data in dataset_list[-3:]:
        task_type = 'classification'
        freq_dict = {}
    else:
        task_type = 'regression'
        freq_dict = None

    for each in all_line:
        line = each.split(',')

        smi = line[smiles_col[which_data]]
        mol = Chem.MolFromSmiles(smi)
        if mol == None:
            continue
        else:
            smi = Chem.MolToSmiles(mol)
            if len(smi) > LIMIT_SMILES_LENGTH:
                continue

        canonical_smiles_list.append(smi)
        smiles_list.append(smiles_str2smiles(smi))

        labels_vec_list.append([])
        for i in target_col[which_data]:
            if line[i] == '':
                line[i] = -1
            labels_vec_list[-1].append(float(line[i]))
    if task_type == 'classification':
        for i in range(lID):
            temp_name = label_name[which_data][i]
            freq_dict[temp_name] = 0
            for s in labels_vec_list:
                if s[i] == 1:
                    freq_dict[temp_name] += 1
    
    return (canonical_smiles_list, smiles_list, labels_vec_list, lID, label_ID, freq_dict, task_type)


In [None]:
canonical_smiles_list, smiles_list, labels_vec_list, lID, label_ID, freq_dict, task_type = read_file('ESOL')
ID2label = [i for i in label_ID]
print(len(smiles_list))
print(smiles_list[0])
print(labels_vec_list[0])
print(lID)
print(label_ID)
print(freq_dict)
print(task_type)
print(ID2label)

# model

In [None]:
InitRange = 0.1
NumToken = sID
NumHead = 8
Activation = 'gelu'
NormFirst = True
DimOdorCode = 100

#--------------------------------------------------------------------------------
class PositionalEncoder(nn.Module):

    def __init__(self, d_model, max_len=2048):  # d_model: 1記号のembeddingの次元, max_len : 最大のbatchサイズ？
        super().__init__()

        pe = torch.zeros(max_len, d_model).float()
        pe.require_grad = False
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0) #.transpose(0, 1)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(1)]

#----------------------------------------------------------------------------
class SymbolEncoder(nn.Module):
    def __init__(self, num_token, d_model):  
        super().__init__()
        self.d_model = d_model
        self.embed = nn.Embedding(num_token, d_model, padding_idx=PAD_ID)
        self.embed.weight.data.uniform_(-InitRange, InitRange)  # embedding の再初期化

    def forward(self, src):
        src = self.embed(src) * math.sqrt(self.d_model)
        return src
#----------------------------------------------------------------------------
class MyTransformerEncoder(nn.Module):
    def __init__(self, d_model, num_head, d_hidden):
        super().__init__()

        encoder_layers = nn.TransformerEncoderLayer(d_model, num_head, dim_feedforward=d_hidden, norm_first = NormFirst, activation=Activation, dropout=Dropout, batch_first=True)
        encoder_norm = nn.LayerNorm(d_model)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, NumLayers, norm=encoder_norm)     

    def forward(self, x, padding_mask):
        x = self.transformer_encoder(x, src_key_padding_mask = padding_mask)
        return x
#--------------------------------------------------------------------------------


class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.positional_encoder = PositionalEncoder(DimEmbed)
        self.symbol_encoder = SymbolEncoder(NumToken, DimEmbed) # ＊＊＊＊　symbol_encoder が１つで良いのか検討？　＊＊＊＊＊
        self.smiles_encoder = MyTransformerEncoder(DimEmbed, NumHead, DimTfHidden)
        self.drop0 = nn.Dropout(p=Dropout)

        self.drop = nn.Dropout(p=Dropout)  
        self.fnn = nn.Linear(DimEmbed, lID)
        self.act = nn.Sigmoid()

        self.drop2 = nn.Dropout(p=Dropout)
        self.fnn2 = nn.Linear(DimEmbed, DimEmbed)
        self.act2 = nn.ReLU()

    def forward(self, smiles):
        # 先頭に cls を挿入
        cls = torch.ones(smiles.size(0),1,dtype=torch.long).to(device)
        x = torch.concat((cls, smiles), dim=1)  # 先頭に cls ('_', ID 1) を挿入
        padding_mask = (x == PAD_ID)
        x = self.drop0(self.symbol_encoder(x) + self.positional_encoder(x)) 
        x = self.smiles_encoder(x, padding_mask)  # x : OdorCode

        x = self.act2(self.fnn2(self.drop2(x[:,0,:])))

        if task_type == 'classification':
                return self.act(self.fnn(self.drop(x)))
        else:
            return self.fnn(self.drop(x))

# function for conducting experiment

In [None]:
from sklearn.metrics import roc_auc_score
from sklearn.metrics import average_precision_score
from sklearn.metrics import r2_score

def main_program(rate1, hpara):
    '''
    rate1 は　transformer 最終層の学習係数 = LearningRate * rate1 　を意味
    hpara = {
        'pretrain_path': modelsave,
        'DimEmbed': 256,
        'DimTfHidden': 256,
        'NumHead': 8,
        'NumLayers': 10,
        'MaskRate': 0.5,
        # 'ModelEpoch': 1000,
        'LearningRate': 0.0003,
        'Dropout': 0.3
    } 
    '''
    torch.cuda.empty_cache()

    para_name = '%d_%d_%d_%d_%.2f_%.2f_%.2f' % (hpara['DimEmbed'], hpara['DimTfHidden'], 
    hpara['NumHead'], hpara['NumLayers'], hpara['MaskRate'], hpara['LearningRate'], hpara['Dropout'])

    list_train_loss = []
    list_train_metrics = [] # micro f1 or rmse
    list_test_loss = []
    list_test_metrics = [] # micro f1 or rmse

    if task_type == 'classfication':
        criterion_label = nn.BCELoss()
    else:
        criterion_label = nn.MSELoss()

    fold_cv = 5
    kf = KFold(n_splits=fold_cv, shuffle=True)

    dataset_idxs = list(range(len(smiles_list)))

    # tensorboard用数据
    # 分类时记录的是F1, 回归时记录RMSE
    avgOD_metrics = torch.zeros(fold_cv, NumEpoch).to(device) # 记录的是microF1或者rmse
    eachODFold = torch.zeros(fold_cv, NumEpoch, lID).to(device) #mean(lID)可得macro F1

    avgOD_metrics_test = torch.zeros(fold_cv, NumEpoch).to(device) # 记录的是microF1或者rmse
    eachODFold_test = torch.zeros(fold_cv, NumEpoch, lID).to(device) #mean(lID)可得macro F1


    if task_type == 'classification':
        best_metrics = [0 for i in range(fold_cv)] # macro f1
    else:
        best_metrics = [1e9 for i in range(fold_cv)] # rmse
    best_logits_pred = [None for i in range(fold_cv)]
    best_labels = [None for i in range(fold_cv)]


    # 改换单个smiles的root
    def change_smiles(smi):
        if smi.find('.') >= 0:
            return smiles_str2smiles(smi)

        mol = Chem.MolFromSmiles(smi)

        num_atoms = mol.GetNumAtoms()
        pos_list = list(range(num_atoms))

        pos = random.choice(pos_list)
        new_smi = Chem.MolToSmiles(mol, rootedAtAtom=pos)
        if len(new_smi) < LIMIT_SMILES_LENGTH:
            return smiles_str2smiles(new_smi)
        else:
            return smiles_str2smiles(smi)
        
    for _fold, (train_idxs, test_idxs) in enumerate(kf.split(dataset_idxs)):
        #============================= train_dataloader の設定 ===========================
        train_dataset_idxs = Subset(dataset_idxs, train_idxs)
        train_dataloader = DataLoader(train_dataset_idxs, batch_size, shuffle=True)

        #============================== test_dataloader の設定 =====================================
        test_dataset_idxs  = Subset(dataset_idxs, test_idxs)
        test_dataloader = DataLoader(test_dataset_idxs, batch_size, shuffle=True)  

        #====================== モデルの取得、optimizer の設定 ==============
        label_estimator = Model().to(device)


        # 事前学習済みの smiles_encoder のパラメタを読み込む
        print('fold = ', _fold)
        pf_symbol_en = hpara['pretrain_path']+'/'+'OdorCode-40 symbol_encoder D'+str(hpara['DimEmbed'])+'.Hidden'+str(hpara['DimTfHidden'])+'.Head'+ str(hpara['NumHead'])+'.L'+str(hpara['NumLayers'])+'.R'+str(hpara['MaskRate'])+'.S'+str(TotalSize)+'-epoch.'+str(ModelEpoch)
        pf_smiles_en = hpara['pretrain_path']+'/'+'OdorCode-40 smiles_encoder D'+str(hpara['DimEmbed'])+'.Hidden'+str(hpara['DimTfHidden'])+'.Head'+ str(hpara['NumHead'])+'.L'+str(hpara['NumLayers'])+'.R'+str(hpara['MaskRate'])+'.S'+str(TotalSize)+'-epoch.'+str(ModelEpoch)
        print('reading '+pf_symbol_en)
        label_estimator.symbol_encoder.load_state_dict(torch.load(pf_symbol_en))
        print('reading '+pf_smiles_en)
        label_estimator.smiles_encoder.load_state_dict(torch.load(pf_smiles_en))

        for param in label_estimator.symbol_encoder.parameters():
            param.requires_grad = False

        for param in label_estimator.smiles_encoder.parameters():
            param.requires_grad = False
        for param in label_estimator.smiles_encoder.transformer_encoder.layers[-1].parameters():
            param.requires_grad = True
        
        optimizer_label_estimator = optim.Adam([{'params': label_estimator.smiles_encoder.transformer_encoder.layers[-1].parameters(), 'lr': hpara['LearningRate']*rate1},
                                                    {'params': label_estimator.fnn2.parameters(), 'lr': hpara['LearningRate']},
                                                    {'params': label_estimator.fnn.parameters(), 'lr': hpara['LearningRate']}])
        
        for epoch in range(NumEpoch):
            #---------------- train step -----------------
            sample_num = 0
            total_loss = 0

            if task_type == 'classification': 
                total_tps = torch.zeros(lID).to(device)
                total_real_pos = torch.zeros(lID).to(device)
                total_pred_pos = torch.zeros(lID).to(device)

            label_estimator.train()

            for idxs in train_dataloader:
                # 此处随机调换smiles root
                smiles = pad_sequence([torch.tensor([BOS_ID]+change_smiles(canonical_smiles_list[idx])+[EOS_ID]) for idx in idxs], batch_first=True).to(device)
                # smiles = pad_sequence([torch.tensor([BOS_ID]+smiles_list[idx]+[EOS_ID]) for idx in idxs], batch_first=True).to(device)
                labels = torch.tensor([labels_vec_list[idx] for idx in idxs],dtype=torch.float).to(device)

                optimizer_label_estimator.zero_grad()

                estimated_labels = label_estimator(smiles)

                loss = criterion_label(estimated_labels, labels)
                
                loss.backward()     # 誤差逆伝播 

                optimizer_label_estimator.step()  # パラメータ更新

                total_loss += loss.item()*len(idxs)
                sample_num += len(idxs)

                if task_type == 'classification': 
                    total_tps += seek_TP(estimated_labels, labels).sum(0)
                    total_real_pos += (positive(labels).int()).sum(0)
                    total_pred_pos += (positive(estimated_labels).int()).sum(0)

            mean_loss = total_loss / sample_num
            list_train_loss.append(mean_loss)

            # metrics计算
            if task_type == 'classification':
                precision = total_tps/(total_pred_pos + 1e-9)
                recall    = total_tps/(total_real_pos + 1e-9)
                f1 = 2.0*precision*recall/(precision+recall+1e-9) # (lID, )
                eachODFold[_fold, epoch] = f1
                micro_mean_precision = total_tps.sum()/total_pred_pos.sum()
                micro_mean_recall    = total_tps.sum()/total_real_pos.sum()      
                metrics = 2*micro_mean_precision*micro_mean_recall/(micro_mean_precision+micro_mean_recall)
                metrics = metrics.item()
            else:
                metrics = math.sqrt(mean_loss)
                eachODFold[_fold, epoch, 0] = metrics 

            list_train_metrics.append(metrics)

            avgOD_metrics[_fold, epoch] = metrics # tensorboard记录结果
            
            # tensorboard画图
            writter.add_scalars('fold%s-microF1orRMSE/train' % _fold, {para_name: metrics}, epoch)
            if _fold == fold_cv-1:
                macro_f1 = torch.sum(eachODFold[:, epoch, :]).item() / (fold_cv*lID)
                writter.add_scalars('AvgFold-macroF1orRMSE/train', {para_name: macro_f1}, epoch) 
                for i in range(lID):
                    od_f1 = torch.sum(eachODFold[:, epoch, i]).item() / fold_cv
                    writter.add_scalars('AvgFold-%s/train' % ID2label[i], {para_name:od_f1}, epoch)


            #--------------- test step ------------------
            sample_num = 0
            total_loss = 0

            if task_type == 'classification':
                total_tps = torch.zeros(lID).to(device)
                total_real_pos = torch.zeros(lID).to(device)
                total_pred_pos = torch.zeros(lID).to(device)

            label_estimator.eval()

            labels_test = torch.zeros([1, lID]).to(device)
            preds_test = torch.zeros([1, lID]).to(device)

            for idxs in test_dataloader:
                smiles = pad_sequence([torch.tensor([BOS_ID]+smiles_list[idx]+[EOS_ID]) for idx in idxs], batch_first=True).to(device)
                labels = torch.tensor([labels_vec_list[idx] for idx in idxs],dtype=torch.float).to(device)

                estimated_labels = label_estimator(smiles)

                loss = criterion_label(estimated_labels, labels)

                total_loss += loss.item()*len(idxs)
                sample_num += len(idxs)

                if task_type == 'classification': 
                    total_tps += seek_TP(estimated_labels, labels).sum(0)
                    total_real_pos += (positive(labels).int()).sum(0)
                    total_pred_pos += (positive(estimated_labels).int()).sum(0)

                labels_test = torch.concat((labels_test, labels), dim=0)
                preds_test = torch.concat((preds_test, estimated_labels), dim=0)

            labels_test = labels_test[1:]
            preds_test = preds_test[1:]

            mean_loss = total_loss / sample_num
            list_test_loss.append(mean_loss)

            # metrics计算
            if task_type == 'classification':
                precision = total_tps/(total_pred_pos + 1e-9)
                recall    = total_tps/(total_real_pos + 1e-9)
                f1 = 2.0*precision*recall/(precision+recall+ 1e-9)
                eachODFold_test[_fold, epoch] = f1
                micro_mean_precision = total_tps.sum()/total_pred_pos.sum()
                micro_mean_recall    = total_tps.sum()/total_real_pos.sum()     
                metrics = 2*micro_mean_precision*micro_mean_recall/(micro_mean_precision+micro_mean_recall)
                metrics = metrics.item()
            else:
                metrics = math.sqrt(mean_loss)
                eachODFold_test[_fold, epoch, 0] = metrics

            list_test_metrics.append(metrics)

            avgOD_metrics_test[_fold, epoch] = metrics # tensorboard记录结果

            # tensorboard画图
            writter.add_scalars('fold%s-microF1orRMSE/test' % _fold, {para_name: metrics}, epoch)
            if _fold == fold_cv-1:
                macro_f1 = torch.sum(eachODFold_test[:, epoch, :]).item() / (fold_cv*lID)
                writter.add_scalars('AvgFold-macroF1orRMSE/test', {para_name: macro_f1}, epoch) 
                f1_eachod = torch.sum(eachODFold_test[:,epoch,:], dim=0) / fold_cv # shape=(lID, )
                for i in range(lID):
                    # od_f1 = torch.sum(eachODFold_test[:, epoch, i]).item() / fold_cv
                    writter.add_scalars('AvgFold-%s/test' % ID2label[i], {para_name: f1_eachod[i].item()}, epoch)

            # 最佳结果记录
            if task_type == 'classification':
                if macro_f1 > best_metrics[i]:
                    best_metrics[i] = macro_f1
                    best_logits_pred[i] = preds_test.tolist() # shape=(batch, lID)
                    best_labels[i] = labels_test.tolist()
            else:
                if macro_f1 < best_metrics[i]:
                    best_metrics[i] = macro_f1
                    best_logits_pred[i] = preds_test.tolist() # shape=(batch, lID)
                    best_labels[i] = labels_test.tolist()

    # 计算auc, r_square
    rocauc_macro = 0
    pr_auc_macro = 0
    rocauc_micro = 0
    pr_auc_micro = 0
    r_2 = 0
    if task_type == 'classification':
        for i in range(fold_cv):
            rocauc_macro += roc_auc_score(best_labels[i], best_logits_pred[i], average='macro')
            pr_auc_macro += average_precision_score(best_labels[i], best_logits_pred[i], average='macro')
            rocauc_micro += roc_auc_score(best_labels[i], best_logits_pred[i], average='micro')
            pr_auc_micro += average_precision_score(best_labels[i], best_logits_pred[i], average='micro')
    else:
        for i in range(fold_cv):
            r_2 += r2_score(best_labels[i], best_logits_pred[i])

    # tensorboard hpara结果记录
    result_dict = {'macroF1 or RMSE': sum(best_metrics)/fold_cv, 
    'ROCAUC_macro': rocauc_macro/fold_cv, 'ROCAUC_micro': rocauc_micro/fold_cv,
    'PRAUC macro': pr_auc_macro/fold_cv, 'PRAUC micro': pr_auc_micro/fold_cv, 
    'r^2': r_2/fold_cv}

    # for i in range(lID):
    #     result_dict[ID2label[i]] = best_F1_eachod[i].item()
    writter.add_hparams(hpara,
                            result_dict, 
                            run_name=para_name
                            )

# conduct experiment

In [None]:
torch.cuda.empty_cache()

hpara = {
        'pretrain_path': 'modelsave0203',
        'DimEmbed': 256,
        'DimTfHidden': 512,
        'NumHead': 8,
        'NumLayers': 10,
        'MaskRate': 0.5,
        'LearningRate': 0.0003, # od推测为0.0005 learningrate是od模型的，不是pretrain模型的
        'Dropout': 0.1
    } 

# モデルのパラメタ
DimEmbed = hpara['DimEmbed']     # 記号の embedding の次元
DimTfHidden = hpara['DimTfHidden']  # TransformerEncoder の FNNの中間層の次元
NumHead = hpara['NumHead']        # TransformerEncoder の multi-head の数
NumLayers = hpara['NumLayers']      # TransformerEncoder の transformer 層の数
NormFirst = True   # Vision Transformer に合わせてみた
Activation = 'gelu' # Bert や Vision Transformer に合わせてみた

MaskRate = hpara['MaskRate']
TotalSize = 100000
ModelEpoch = 600

# 学習のパラメタ
LearningRate = hpara['LearningRate']
NumEpoch = 200
Dropout = hpara['Dropout']
batch_size = 32
InitRange = 0.1

# writter
record_dict = 'tensorboard_logs/torch/0307-5fold/ESOL/'
writter = SummaryWriter(record_dict)

main_program(1.0, hpara)

writter.close()