# 0 Setting

In [43]:
# Parameter Setting
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
folder_name = 'Research-(D5) Synthesized input model'
pretrained_model_name = 'model_microsoft.ckpt'

config = {
    'learning_rate': 1e-4,
    'batch_size': 32,
    'seq_length': 5,

    'shuffle': False,
    'criterion': torch.nn.CrossEntropyLoss(),
    'seed': 42,
    'valid_ratio': 0.2,
    'test_ratio': 0.2,

    'n_epochs': 3000,
    'early_stop': 50,
    'device': device,

    'h_text_size': 64,
    'h_c_size': 1,
    'h_news_size': 1,
    'h_tech_size': 6,
    'h_size': 32,
}


sector_id_list = [ # Not confirmed
    "XLK",  # Information Technology
    "XLV",  # Health Care
    "XLF",  # Financials
    "XLI",  # Industrials
    "XLY",  # Consumer Discretionary
    "XLE",  # Energy
    "XLB",  # Materials
    "XLC",  # Communication Services
    "XLU",  # Utilities
    "XLRE",  # Real Estate
    "XLP"  # Consumer Staples
]

company_list = [
    "Information Technology",
    "Health Care",
    "Financials",
    "Industrials",
    "Consumer Discretionary",
    "Energy",
    "Materials",
    "Communication Services",
    "Utilities",
    "Real Estate",
    "Consumer Staples"
]

process_id = 0  #26

company_name = company_list[process_id]

config_2 = {'input_path': '/content/drive/MyDrive/Colab Notebooks/'+folder_name+'/data/2_'+company_name+'_for_model.csv',
            'save_path': '/content/drive/MyDrive/Colab Notebooks/'+folder_name+'/model_saved/model_reproduce.ckpt',
            # 'pretrained_model_path': '/content/drive/MyDrive/Colab Notebooks/'+folder_name+'/premodel/' + pretrained_model_name,
            # 'continue_model_path': '/content/drive/MyDrive/Colab Notebooks/'+folder_name+'/model_saved/model_1.ckpt'
            }

feature = [
    # X_1
    'input_ids',
    'attention_mask',
    'section_dummy',
    'publication_dummy',

    # X_2
    # 1. tech indicator
    # 'Open',
    # 'High',
    # 'Low',
    # 'Close',
    # 'Volume',
    # 'Dividends',
    # 'Stock Splits',
    'today_return',
    # 'today_return_cate',
    # 'Sma',
    # 'Rsi',
    # 'Kd',
    # 'Ema_12',
    # 'Ema_26',
    # 'Macd',
    # 'sentiment',

    # 2. market index
    '^DJI',
    '^GSPC',
    '^NDX',
    '^IXIC',
    '^SOX',
    '^NYA',

    # y
    # '1_day_return',
    # '2_day_return',
    # '3_day_return',
    # '4_day_return',
    # '5_day_return',
    # '1_day_return_cate',
    # '2_day_return_cate',
    # '3_day_return_cate',
    # '4_day_return_cate',
    # '5_day_return_cate',
    # '^DJI', '^DJI_1_day_return', '^GSPC', '^GSPC_1_day_return',
    #    '^NDX', '^NDX_1_day_return', '^IXIC', '^IXIC_1_day_return', '^SOX',
    #    '^SOX_1_day_return',
    # 'excess_return_^DJI',
    # 'excess_return_^DJI_cate',
    # 'excess_return_^GSPC',
    'excess_return_^GSPC_cate',
    # 'excess_return_^NDX',
    # 'excess_return_^NDX_cate',
    # 'excess_return_^IXIC',
    # 'excess_return_^IXIC_cate',
    # 'excess_return_^SOX',
    # 'excess_return_^SOX_cate',

    # Do not mark the datetime, it's for operation
    'datetime',
    ]

# All the news dataset
# time_start = '2016-01-01T00:00:00'
# time_end = '2020-04-02T00:00:00'

time_start = '2016-01-01T00:00:00'
time_end = '2019-12-31T00:00:00'

print(len(feature)-2)

11


## (1) Import

In [44]:
# Google
from google.colab import drive
drive.mount('/content/drive')

# pip installation
!pip install transformers

# Basic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

# Sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# PyTorch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from transformers import BertTokenizer, BertModel, BertConfig

# others
from datetime import datetime, timedelta
from tqdm import tqdm
from torchsummary import summary
import ast

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [45]:
def same_seed(seed):
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Set seed for reproducibility
same_seed(config['seed'])


In [46]:
df = pd.read_csv(config_2['input_path'])
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits',
       'Capital Gains', 'today_return', 'today_return_cate', '1_day_return',
       '2_day_return', '3_day_return', '4_day_return', '5_day_return',
       '1_day_return_cate', '2_day_return_cate', '3_day_return_cate',
       '4_day_return_cate', '5_day_return_cate', 'Sma', 'Rsi', 'Kd', 'Ema_12',
       'Ema_26', 'Macd', 'datetime', '^DJI', '^DJI_1_day_return', '^GSPC',
       '^GSPC_1_day_return', '^NDX', '^NDX_1_day_return', '^IXIC',
       '^IXIC_1_day_return', '^SOX', '^SOX_1_day_return', '^NYA',
       '^NYA_1_day_return', 'excess_return_^DJI', 'excess_return_^DJI_cate',
       'excess_return_^GSPC', 'excess_return_^GSPC_cate', 'excess_return_^NDX',
       'excess_return_^NDX_cate', 'excess_return_^IXIC',
       'excess_return_^IXIC_cate', 'excess_return_^SOX',
       'excess_return_^SOX_cate', 'excess_return_^NYA',
       'excess_return_^NYA_cate', 'input_ids', 'attention_mask',
       'section_dummy', '

In [47]:
df = df.sort_values(by='datetime', ascending=True)
df.shape

(1070, 56)

In [48]:
# Only contain selected features
df = df[feature]
df.columns

Index(['input_ids', 'attention_mask', 'section_dummy', 'publication_dummy',
       'today_return', '^DJI', '^GSPC', '^NDX', '^IXIC', '^SOX', '^NYA',
       'excess_return_^GSPC_cate', 'datetime'],
      dtype='object')

## (2) check nan

In [49]:
df[df.isna().any(axis=1)]

Unnamed: 0,input_ids,attention_mask,section_dummy,publication_dummy,today_return,^DJI,^GSPC,^NDX,^IXIC,^SOX,^NYA,excess_return_^GSPC_cate,datetime


In [50]:
df.isnull().sum()

input_ids                   0
attention_mask              0
section_dummy               0
publication_dummy           0
today_return                0
^DJI                        0
^GSPC                       0
^NDX                        0
^IXIC                       0
^SOX                        0
^NYA                        0
excess_return_^GSPC_cate    0
datetime                    0
dtype: int64

In [51]:
df = df.dropna()
df = df.reset_index(drop=True)
df.isnull().sum()

input_ids                   0
attention_mask              0
section_dummy               0
publication_dummy           0
today_return                0
^DJI                        0
^GSPC                       0
^NDX                        0
^IXIC                       0
^SOX                        0
^NYA                        0
excess_return_^GSPC_cate    0
datetime                    0
dtype: int64

## (2) Time Period Selection

In [52]:
# We use index to filter for time periods
df = df[(df['datetime']> time_start) & (df['datetime'] < time_end)]

# Drop datetime after using it
df.drop(columns=['datetime'], inplace=True)
df.shape

(1006, 12)

## (3) Transform str back to list

In [53]:
# 将字符串转换回列表的函数
def string_to_list(s):
    return ast.literal_eval(s)

# 将列中的字符串转换回列表
df['input_ids'] = df['input_ids'].apply(string_to_list)
df['attention_mask'] = df['attention_mask'].apply(string_to_list)
df['section_dummy'] = df['section_dummy'].apply(string_to_list)
df['publication_dummy'] = df['publication_dummy'].apply(string_to_list)

In [54]:
df['input_ids'][0][0]

[101,
 6522,
 1005,
 1055,
 7069,
 8654,
 1042,
 29401,
 2003,
 2066,
 1037,
 6097,
 8654,
 2008,
 3216,
 3645,
 102,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [55]:
df['attention_mask'][0][0]

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [56]:
df['section_dummy'][0][0]

[0, 0, 0, 0, 0, 1, 0, 0, 0]

## (3) List: Same amount of elements

In [57]:
input_ids_list_length = len(df['input_ids'][0][0])
attention_mask_list_length = len(df['attention_mask'][0][0])
section_dummy_list_length = len(df['section_dummy'][0][0])
publication_dummy_list_length = len(df['publication_dummy'][0][0])

In [58]:

# 找到最大的內部列表長度
max_inner_length = max(df['input_ids'].apply(len))

# 定義一個函數來填充內部列表，使其長度達到最大值
def pad_inner_list(lst, zero_list):
    while len(lst) < max_inner_length:
        lst.append(zero_list)  # 這裡可以填充任何你想要的值，例如 None

# 將 "input_ids" 列中的每個內部列表填充到最大長度
df['input_ids'].apply(pad_inner_list, zero_list=[0] * input_ids_list_length)
df['attention_mask'].apply(pad_inner_list, zero_list=[0] * attention_mask_list_length)
df['section_dummy'].apply(pad_inner_list, zero_list=[0] * section_dummy_list_length)
df['publication_dummy'].apply(pad_inner_list, zero_list=[0] * publication_dummy_list_length)
df

Unnamed: 0,input_ids,attention_mask,section_dummy,publication_dummy,today_return,^DJI,^GSPC,^NDX,^IXIC,^SOX,^NYA,excess_return_^GSPC_cate
0,"[[101, 6522, 1005, 1055, 7069, 8654, 1042, 294...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, ...",0.004993,-0.014739,-0.012531,0.002854,0.001111,0.008979,-0.001845,0
1,"[[101, 13938, 1005, 1055, 8292, 2015, 2355, 25...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, ...",-0.005660,0.000651,0.001455,-0.006175,-0.005370,-0.012003,0.002104,1
2,"[[101, 2013, 2745, 4027, 2000, 2887, 1043, 102...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, ...",0.002407,-0.014475,-0.010663,0.007758,0.004570,-0.011092,-0.003948,1
3,"[[101, 2129, 2146, 2097, 1005, 27830, 3201, 10...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, ...",-0.009316,-0.022161,-0.021271,-0.009984,-0.009917,-0.010667,-0.010350,0
4,"[[101, 2131, 5736, 5222, 7485, 1005, 1055, 358...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 1, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, ...",-0.016438,-0.010456,-0.012302,-0.015069,-0.016601,-0.019801,-0.016923,1
...,...,...,...,...,...,...,...,...,...,...,...,...
1001,"[[101, 22950, 3049, 2758, 2009, 1521, 2222, 30...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, ...",-0.001423,-0.001999,-0.000642,-0.000665,-0.000238,0.001204,-0.000562,1
1002,"[[101, 2025, 3243, 5306, 1010, 2021, 3056, 205...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, ...",0.005359,0.002871,0.003938,0.006924,0.005817,-0.000499,0.002103,0
1003,"[[101, 2182, 2024, 2184, 1997, 1996, 5476, 100...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,...","[[0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",-0.004117,-0.001049,-0.002220,-0.003961,-0.004735,-0.006431,-0.002340,1
1004,"[[101, 1996, 3595, 2369, 9733, 7427, 6959, 200...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [1, ...",-0.005550,-0.006722,-0.005802,-0.006463,-0.006492,-0.006593,-0.005384,1


In [59]:
df['section_dummy'][0]

[[0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1],
 [1, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 1],
 [0, 0, 1, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 1, 0],
 [0, 0, 0, 0, 0, 1, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0, 0, 0, 0, 0, 0, 0],
 [0, 0, 0,

In [60]:
# 使用 apply 方法計算每個列表中元素的數量
temp = df['input_ids'].apply(len)

# 打印 DataFrame
temp

0       61
1       61
2       61
3       61
4       61
        ..
1001    61
1002    61
1003    61
1004    61
1005    61
Name: input_ids, Length: 1006, dtype: int64

In [61]:
# 計算"input_ids"列中所有list的平均長度
average_length = df['input_ids'].apply(len).mean()

# 計算"input_ids"列中最長的list的長度
max_length = df['input_ids'].apply(len).max()

# 計算"input_ids"列中最短的list的長度
min_length = df['input_ids'].apply(len).min()

# 打印結果
print(f"平均長度: {average_length}")
print(f"最長長度: {max_length}")
print(f"最短長度: {min_length}")


平均長度: 61.0
最長長度: 61
最短長度: 61


## int to float (section, publication)

In [62]:
type(df.input_ids[0][0])

list

In [63]:
def recursive_convert_to_float(item):
    if isinstance(item, list):
        return [recursive_convert_to_float(x) if x is not None else None for x in item]
    else:
        return float(item) if item is not None else None

# 使用 apply 方法將函數應用於每個元素
df['section_dummy'] = df['section_dummy'].apply(recursive_convert_to_float)
df['publication_dummy'] = df['publication_dummy'].apply(recursive_convert_to_float)

In [64]:
df['section_dummy'][0]

[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0],
 [0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0

## (4) Train_test_split

In [65]:
# 1. Set up X, y
to_remove_list = ['datetime', 'excess_return_^GSPC_cate']

# Filter out values in to_remove_list
filtered_list = [x for x in feature if x not in to_remove_list]

X = df[filtered_list]
y = df['excess_return_^GSPC_cate']

In [66]:
# Check X, y shape
print('X:', X.shape)
print('y:', y.shape)

X: (1006, 11)
y: (1006,)


In [67]:
# 2. train_test_split
# val dataset for final examination

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=config['test_ratio'], random_state=config['seed'], shuffle=config['shuffle'])
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=config['valid_ratio'], random_state=config['seed'], shuffle=config['shuffle'])
X_train


Unnamed: 0,input_ids,attention_mask,section_dummy,publication_dummy,today_return,^DJI,^GSPC,^NDX,^IXIC,^SOX,^NYA
0,"[[101, 6522, 1005, 1055, 7069, 8654, 1042, 294...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.004993,-0.014739,-0.012531,0.002854,0.001111,0.008979,-0.001845
1,"[[101, 13938, 1005, 1055, 8292, 2015, 2355, 25...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",-0.005660,0.000651,0.001455,-0.006175,-0.005370,-0.012003,0.002104
2,"[[101, 2013, 2745, 4027, 2000, 2887, 1043, 102...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.002407,-0.014475,-0.010663,0.007758,0.004570,-0.011092,-0.003948
3,"[[101, 2129, 2146, 2097, 1005, 27830, 3201, 10...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",-0.009316,-0.022161,-0.021271,-0.009984,-0.009917,-0.010667,-0.010350
4,"[[101, 2131, 5736, 5222, 7485, 1005, 1055, 358...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,...",-0.016438,-0.010456,-0.012302,-0.015069,-0.016601,-0.019801,-0.016923
...,...,...,...,...,...,...,...,...,...,...,...
638,"[[101, 1996, 22098, 1060, 2629, 16014, 8231, 2...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.014753,0.003434,0.007245,0.015417,0.013306,0.022504,0.003750
639,"[[101, 25130, 6187, 10175, 8017, 2072, 1998, 6...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,...",-0.001372,0.002606,0.001519,-0.002311,-0.000635,0.001301,0.002891
640,"[[101, 1996, 2381, 2369, 1996, 4182, 15950, 90...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,...",-0.001102,-0.002969,-0.001737,-0.001670,-0.000575,-0.000095,0.000009
641,"[[101, 19102, 1521, 1055, 6745, 9088, 1055, 26...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",-0.003571,0.000678,-0.000970,-0.003916,-0.002917,-0.004205,0.000915


## (5) Scaler

In [68]:
scale_feature = [
    # X_2
    # 1. tech indicator
    # 'Open',
    # 'High',
    # 'Low',
    # 'Close',
    # 'Volume',
    # 'Dividends',
    # 'Stock Splits',
    'today_return',
    # 'Today_trend_cate',
    # 'Sma',
    # 'Rsi',
    # 'Kd',
    # 'Ema_12',
    # 'Ema_26',
    # 'Macd',
    # 'sentiment',

    # 2. market index
    '^DJI',
    '^GSPC',
    '^NDX',
    '^IXIC',
    '^SOX',
    # 'datetime'
    ]

def CustomScaler(X_train, X_val, X_test):
  scaler = MinMaxScaler()
  for i in scale_feature:

    # 對特定欄位進行標準化
    X_train_scaled = scaler.fit_transform(X_train[[i]])
    X_val_scaled = scaler.transform(X_val[[i]])
    X_test_scaled = scaler.transform(X_test[[i]])

    # 將標準化後的值重新賦值給 DataFrame
    X_train[i] = X_train_scaled
    X_val[i] = X_val_scaled
    X_test[i] = X_test_scaled

  return X_train, X_val, X_test

X_train, X_val, X_test = CustomScaler(X_train, X_val, X_test)

X_train

Unnamed: 0,input_ids,attention_mask,section_dummy,publication_dummy,today_return,^DJI,^GSPC,^NDX,^IXIC,^SOX,^NYA
0,"[[101, 6522, 1005, 1055, 7069, 8654, 1042, 294...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.564830,0.355624,0.377236,0.525722,0.515721,0.591670,-0.001845
1,"[[101, 13938, 1005, 1055, 8292, 2015, 2355, 25...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.442231,0.557596,0.578553,0.424253,0.436452,0.374338,0.002104
2,"[[101, 2013, 2745, 4027, 2000, 2887, 1043, 102...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.535075,0.359087,0.404126,0.580837,0.558035,0.383766,-0.003948
3,"[[101, 2129, 2146, 2097, 1005, 27830, 3201, 10...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.400162,0.258224,0.251419,0.381444,0.380842,0.388167,-0.010350
4,"[[101, 2131, 5736, 5222, 7485, 1005, 1055, 358...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,...",0.318206,0.411837,0.380522,0.324291,0.299087,0.293563,-0.016923
...,...,...,...,...,...,...,...,...,...,...,...
638,"[[101, 1996, 22098, 1060, 2629, 16014, 8231, 2...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.677151,0.594123,0.661905,0.666913,0.664887,0.731776,0.003750
639,"[[101, 25130, 6187, 10175, 8017, 2072, 1998, 6...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0,...",0.491584,0.583255,0.579473,0.467672,0.494370,0.512147,0.002891
640,"[[101, 1996, 2381, 2369, 1996, 4182, 15950, 90...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0,...",0.494692,0.510084,0.532605,0.474877,0.495106,0.497680,0.000009
641,"[[101, 19102, 1521, 1055, 6745, 9088, 1055, 26...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0]...","[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",0.466278,0.557952,0.543649,0.449631,0.466455,0.455104,0.000915


## (6) Check number

In [69]:
def calculate_class_stats(y):
    class_counts = y.value_counts()
    total_samples = len(y)
    class_ratios = class_counts / total_samples
    return class_counts, class_ratios

# 計算類別數量和比例
train_class_counts, train_class_ratios = calculate_class_stats(y_train)
val_class_counts, val_class_ratios = calculate_class_stats(y_val)
test_class_counts, test_class_ratios = calculate_class_stats(y_test)

# 創建包含數量和比例的 DataFrame
class_stats = pd.DataFrame({
    'Train Count': train_class_counts,
    'Train Ratio': train_class_ratios,
    'Validation Count': val_class_counts,
    'Validation Ratio': val_class_ratios,
    'Test Count': test_class_counts,
    'Test Ratio': test_class_ratios
})

# 打印 DataFrame
print(class_stats)


   Train Count  Train Ratio  Validation Count  Validation Ratio  Test Count  \
1          344     0.534992                91          0.565217         114   
0          299     0.465008                70          0.434783          88   

   Test Ratio  
1    0.564356  
0    0.435644  


In [70]:
# Time period
print('Time Period')
print('From:', time_start)
print('To:', time_end, '\n')

# Sample size
print('Sample size:', X.shape[0])
print('Feature:', X.columns, '\n')
print('Target:', y.name, '\n')
print('Train: Val: Test =', X_train.shape[0], X_test.shape[0], X_val.shape[0])

Time Period
From: 2016-01-01T00:00:00
To: 2019-12-31T00:00:00 

Sample size: 1006
Feature: Index(['input_ids', 'attention_mask', 'section_dummy', 'publication_dummy',
       'today_return', '^DJI', '^GSPC', '^NDX', '^IXIC', '^SOX', '^NYA'],
      dtype='object') 

Target: excess_return_^GSPC_cate 

Train: Val: Test = 643 202 161


# Model

## (1) Dataset & Dataloader

In [71]:
# Dataset
X_1 =['input_ids', 'attention_mask', 'section_dummy', 'publication_dummy']


class CustomDataset(Dataset):
    def __init__(self, X, y, config):
        # X_1
        self.input_ids = X['input_ids']
        self.attention_mask = X['attention_mask']
        self.section = X['section_dummy']
        self.publication = X['publication_dummy']

        # X_2
        self.X_2 = torch.tensor(X.drop(columns=X_1).values, dtype=torch.float)

        # y
        self.y = torch.tensor(y.values, dtype=torch.long)

        # other setting
        self.len = X.shape[0]
        self.seq_length = config['seq_length']

    def __getitem__(self,idx):
        # X_1
        input_ids_list = self.input_ids[idx : idx + self.seq_length].tolist() # All to list
        input_ids = torch.tensor(input_ids_list) # Then to tensor
        attention_mask_list = self.attention_mask[idx : idx + self.seq_length].tolist()
        attention_mask = torch.tensor(attention_mask_list)
        section_list = self.section[idx : idx + self.seq_length].tolist()
        section = torch.tensor(section_list)
        publication_list = self.publication[idx : idx + self.seq_length].tolist()
        publication = torch.tensor(publication_list)

        # X_2
        X_2 = self.X_2[idx : idx + self.seq_length]

        # 3. y
        y = self.y[idx + self.seq_length - 1]

        return input_ids, attention_mask, section, publication, X_2, y

    def __len__(self):
        return self.len - self.seq_length

In [72]:
# DataLoader
train_dataset = CustomDataset(X_train, y_train, config)
val_dataset = CustomDataset(X_val, y_val, config)
test_dataset = CustomDataset(X_test, y_test, config)

train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=config['shuffle'], drop_last=True, pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=config['shuffle'], drop_last=True, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=config['shuffle'], drop_last=True, pin_memory=True)

# Check loader output
for batch in train_loader:
    input_ids, attention_mask, section, publication, X_2, y = batch

    # 打印批次数据的形状，以确保它们符合预期
    print("Input IDs shape:", input_ids.shape)
    print("Attention Mask shape:", attention_mask.shape)
    print("Section shape:", section.shape)
    print("Publication shape:", publication.shape)
    print("X_2 shape:", X_2.shape)
    print("Labels shape:", y.shape)

    # print("Input IDs:", input_ids)
    # print("Attention Mask:", attention_mask)
    # print("Section:", section)
    # print("Publication:", publication)
    # print("X_2:", X_2)
    # print("Labels:", y)

    break


Input IDs shape: torch.Size([32, 5, 61, 32])
Attention Mask shape: torch.Size([32, 5, 61, 32])
Section shape: torch.Size([32, 5, 61, 9])
Publication shape: torch.Size([32, 5, 61, 13])
X_2 shape: torch.Size([32, 5, 7])
Labels shape: torch.Size([32])


## (2) Model Architecture

### 1 Premodel

In [73]:
# # model_microsoft
# class PreModel(nn.Module):
#     def __init__(self, base_model):
#         super(PreModel, self).__init__()
#         self.base_model = BertModel.from_pretrained('bert-base-uncased')

#         # 2. FC layers
#         self.fc1 = nn.Linear(base_model.config.hidden_size, 256)
#         self.fc2 = nn.Linear(256, 64)
#         self.fc3 = nn.Linear(64, 1)
#         # self.fc4 = nn.Linear(256, 64)
#         # self.fc5 = nn.Linear(64, 5)

#     def forward(self, input_ids, attention_mask):
#         out = self.base_model(input_ids=input_ids, attention_mask=attention_mask)

#         out = out.pooler_output
#         out = self.fc1(out)
#         out = self.fc2(out)
#         out = self.fc3(out)
#         # out = self.fc4(out)
#         # out = self.fc5(out)

#         return out

### 2 Prediction model

In [74]:
# # New structure: With C
# class MyModel(nn.Module):
#     def __init__(self, base_model, config, element_size, section_length, publication_length, X_2_length, batch_size):
#         super(MyModel, self).__init__()
#         self.seq_length = config['seq_length']
#         self.batch_size = batch_size
#         self.element_size = element_size
#         self.abandon_tensor = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                               0, 0, 0, 0, 0, 0, 0, 0], device=device)

#         self.section_length = section_length
#         self.publication_length = publication_length
#         self.config = config

#         # 1. News
#         # text
#         self.base_model = base_model
#         self.fc1 = nn.Linear(768, 256)
#         self.fc2 = nn.Linear(256, config['h_text_size'])

#         # c
#         self.fc_h_c = nn.Linear(section_length + publication_length, config['h_c_size'])

#         # news (concated)
#         # self.fc_h_news = nn.Linear(config['h_text_size'] + config['h_c_size'], config['h_news_size'])
#         self.fc_h_news = nn.Linear(config['h_text_size'] + config['h_c_size'], config['h_news_size'])

#         # 2. Indicator
#         # tech
#         # self.fc_h_tech = nn.Linear(X_2_length, config['h_tech_size'])

#         # 1&2. converge
#         # news + tech
#         # self.fc_h = nn.Linear(config['h_news_size'] + X_2_length, config['h_size'])

#         # 3. LSTM
#         self.lstm_1 = nn.LSTM(config['h_news_size'] + X_2_length, 64, dropout=0.2, num_layers=2, batch_first=True, bidirectional=False)
#         self.sequential = nn.Sequential(
#             nn.Linear(64, 2)
#         )
#         self.dropout = nn.Dropout(0.2)


#     def forward(self, input_ids, attention_mask, section, publication, X_2):
#         # 1. News
#         flattened_input_ids = input_ids.view(-1, 32)
#         flattened_attention_mask = attention_mask.view(-1, 32)
#         flattened_section = section.view(-1, self.section_length)
#         flattened_publication = publication.view(-1, self.publication_length)

#         e_list = []
#         for i in range(0, flattened_input_ids.size(0), self.element_size):
#           # 获取当前组的子张量
#           sub_input_ids = flattened_input_ids[i:i+self.element_size]
#           sub_attention_mask = flattened_attention_mask[i:i+self.element_size]
#           sub_section = flattened_section[i:i+self.element_size]
#           sub_publication = flattened_publication[i:i+self.element_size]

#           non_zero_mask = (sub_input_ids != 0).any(dim=1)
#           non_zero_input_ids = sub_input_ids[non_zero_mask]
#           non_zero_attention_mask = sub_attention_mask[non_zero_mask]
#           non_zero_section = sub_section[non_zero_mask]
#           non_zero_publication = sub_publication[non_zero_mask]

#           # input_ids, attention_mask
#           out = self.base_model(input_ids=non_zero_input_ids, attention_mask=non_zero_attention_mask)
#           out = out.pooler_output
#           out = self.fc1(out)
#           h_text = self.fc2(out)

#           # section, publication
#           out = torch.cat([non_zero_section, non_zero_publication], dim=1)
#           h_c = self.fc_h_c(out)

#           # h_news
#           out = torch.cat([h_text, h_c], dim=1)
#           out = self.fc_h_news(out)
#           h_news = self.dropout(out)
#           element_mean = torch.mean(h_news, dim=0)
#           e_list.append(element_mean)

#         temp_tensor = torch.stack(e_list)
#         b_tensor = temp_tensor.view(self.batch_size, self.seq_length, self.config['h_news_size'])

#         # 2. Indicator
#         # h_tech
#         # h_tech = self.fc_h_tech(X_2)
#         h_tech = X_2

#         # h
#         h = torch.cat([b_tensor, h_tech], dim=2)
#         # h = self.fc_h(out)

#         # 3. LSTM
#         out, _ = self.lstm_1(h)
#         out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term

#         final_out = self.sequential(out)

#         return final_out


2. New method: C as a scaler

In [75]:
# # New structure: With C
# class MyModel(nn.Module):
#     def __init__(self, base_model, config, element_size, section_length, publication_length, X_2_length, batch_size):
#         super(MyModel, self).__init__()
#         self.seq_length = config['seq_length']
#         self.batch_size = batch_size
#         self.element_size = element_size
#         self.abandon_tensor = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                               0, 0, 0, 0, 0, 0, 0, 0], device=device)

#         self.section_length = section_length
#         self.publication_length = publication_length
#         self.config = config

#         # 1. News
#         # text
#         self.base_model = base_model
#         self.fc1 = nn.Linear(768, 128)
#         self.fc2 = nn.Linear(128, config['h_text_size'])

#         # c
#         self.fc_h_c = nn.Linear(section_length + publication_length, config['h_c_size'])

#         # text -> news
#         self.fc_h_news = nn.Linear(config['h_text_size'], config['h_news_size'])

#         # 3. LSTM
#         self.lstm_1 = nn.LSTM(config['h_news_size'] + X_2_length, 32, dropout=0.2, num_layers=2, batch_first=True, bidirectional=False)
#         self.sequential = nn.Sequential(
#             nn.Linear(32, 2)
#         )
#         self.relu = nn.ReLU()



#     def forward(self, input_ids, attention_mask, section, publication, X_2):
#         # 1. News
#         flattened_input_ids = input_ids.view(-1, 32)
#         flattened_attention_mask = attention_mask.view(-1, 32)
#         flattened_section = section.view(-1, self.section_length)
#         flattened_publication = publication.view(-1, self.publication_length)

#         e_list = []
#         for i in range(0, flattened_input_ids.size(0), self.element_size):
#           # 获取当前组的子张量
#           sub_input_ids = flattened_input_ids[i:i+self.element_size]
#           sub_attention_mask = flattened_attention_mask[i:i+self.element_size]
#           sub_section = flattened_section[i:i+self.element_size]
#           sub_publication = flattened_publication[i:i+self.element_size]

#           non_zero_mask = (sub_input_ids != 0).any(dim=1)
#           non_zero_input_ids = sub_input_ids[non_zero_mask]
#           non_zero_attention_mask = sub_attention_mask[non_zero_mask]
#           non_zero_section = sub_section[non_zero_mask]
#           non_zero_publication = sub_publication[non_zero_mask]

#           # input_ids, attention_mask
#           out = self.base_model(input_ids=non_zero_input_ids, attention_mask=non_zero_attention_mask)
#           out = out.pooler_output
#           out = self.fc1(out)
#           h_text = self.fc2(out)

#           # section, publication
#           out = torch.cat([non_zero_section, non_zero_publication], dim=1)
#           out = self.fc_h_c(out)
#           h_c = self.relu(out)

#           # text * C
#           out = h_text * h_c
#           h_news = self.fc_h_news(out)
#           element_mean = torch.mean(h_news, dim=0)
#           e_list.append(element_mean)

#         temp_tensor = torch.stack(e_list)
#         b_tensor = temp_tensor.view(self.batch_size, self.seq_length, self.config['h_news_size'])

#         h_tech = X_2

#         h = torch.cat([b_tensor, h_tech], dim=2)

#         # 3. LSTM
#         out, _ = self.lstm_1(h)
#         out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term

#         final_out = self.sequential(out)

#         return final_out


In [76]:
# New method: C as a scaler + activation function
class MyModel(nn.Module):
    def __init__(self, base_model, config, element_size, section_length, publication_length, X_2_length, batch_size):
        super(MyModel, self).__init__()
        self.seq_length = config['seq_length']
        self.batch_size = batch_size
        self.element_size = element_size
        self.abandon_tensor = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                              0, 0, 0, 0, 0, 0, 0, 0], device=device)

        self.section_length = section_length
        self.publication_length = publication_length
        self.config = config

        # 1. News
        # text
        self.base_model = base_model
        self.fc1 = nn.Linear(768, config['h_text_size'])
        # self.fc2 = nn.Linear(64, config['h_text_size'])

        # c
        self.fc_h_c = nn.Linear(section_length + publication_length, config['h_c_size'])

        # text -> news
        self.fc_h_news = nn.Linear(config['h_text_size'], config['h_news_size'])

        # 3. LSTM
        self.lstm_1 = nn.LSTM(config['h_news_size'] + X_2_length, 32, dropout=0.2, num_layers=2, batch_first=True, bidirectional=False)
        self.sequential = nn.Sequential(
            nn.Linear(32, 2)
        )
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()



    def forward(self, input_ids, attention_mask, section, publication, X_2):
        # 1. News
        flattened_input_ids = input_ids.view(-1, 32)
        flattened_attention_mask = attention_mask.view(-1, 32)
        flattened_section = section.view(-1, self.section_length)
        flattened_publication = publication.view(-1, self.publication_length)

        e_list = []
        for i in range(0, flattened_input_ids.size(0), self.element_size):
          # 获取当前组的子张量
          sub_input_ids = flattened_input_ids[i:i+self.element_size]
          sub_attention_mask = flattened_attention_mask[i:i+self.element_size]
          sub_section = flattened_section[i:i+self.element_size]
          sub_publication = flattened_publication[i:i+self.element_size]

          non_zero_mask = (sub_input_ids != 0).any(dim=1)
          non_zero_input_ids = sub_input_ids[non_zero_mask]
          non_zero_attention_mask = sub_attention_mask[non_zero_mask]
          non_zero_section = sub_section[non_zero_mask]
          non_zero_publication = sub_publication[non_zero_mask]

          # input_ids, attention_mask
          out = self.base_model(input_ids=non_zero_input_ids, attention_mask=non_zero_attention_mask)
          out = out.pooler_output
          h_text = self.fc1(out)
          # h_text = self.fc2(out)

          # section, publication
          out = torch.cat([non_zero_section, non_zero_publication], dim=1)
          out = self.fc_h_c(out)
          h_c = self.relu(out)

          # text * C
          out = h_text * h_c
          out = self.fc_h_news(out)
          h_news = self.relu(out)

          element_mean = torch.mean(h_news, dim=0)
          e_list.append(element_mean)

        temp_tensor = torch.stack(e_list)
        b_tensor = temp_tensor.view(self.batch_size, self.seq_length, self.config['h_news_size'])

        h_tech = X_2

        h = torch.cat([b_tensor, h_tech], dim=2)

        # 3. LSTM
        out, _ = self.lstm_1(h)
        out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term

        final_out = self.sequential(out)

        return final_out

3. Simplified model
without c

In [77]:
# # New structure: Save Computation
# class MyModel(nn.Module):
#     def __init__(self, base_model, config, element_size, section_length, publication_length, X_2_length, batch_size):
#         super(MyModel, self).__init__()
#         self.seq_length = config['seq_length']
#         self.batch_size = batch_size
#         self.element_size = element_size
#         self.abandon_tensor = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                               0, 0, 0, 0, 0, 0, 0, 0], device=device)

#         self.section_length = section_length
#         self.publication_length = publication_length
#         self.config = config

#         # 1. News
#         # text
#         self.base_model = base_model
#         self.fc1 = nn.Linear(768, 128)
#         self.fc2 = nn.Linear(128, config['h_text_size'])


#         # 1&2. converge
#         # news + tech
#         # self.fc_h = nn.Linear(config['h_text_size'] + config['h_tech_size'], config['h_size'])

#         # 3. LSTM
#         self.lstm_1 = nn.LSTM(config['h_text_size']+X_2_length, 32, dropout=0.2, num_layers=3, batch_first=True, bidirectional=False)
#         self.sequential = nn.Sequential(
#             nn.Linear(32, 2)
#         )
#         self.dropout = nn.Dropout(0.2)


#     def forward(self, input_ids, attention_mask, section, publication, X_2):
#         # 1. News
#         flattened_input_ids = input_ids.view(-1, 32)
#         flattened_attention_mask = attention_mask.view(-1, 32)
#         flattened_section = section.view(-1, self.section_length)
#         flattened_publication = publication.view(-1, self.publication_length)

#         e_list = []
#         for i in range(0, flattened_input_ids.size(0), self.element_size):
#           # 获取当前组的子张量
#           sub_input_ids = flattened_input_ids[i:i+self.element_size]
#           sub_attention_mask = flattened_attention_mask[i:i+self.element_size]
#           sub_section = flattened_section[i:i+self.element_size]
#           sub_publication = flattened_publication[i:i+self.element_size]

#           non_zero_mask = (sub_input_ids != 0).any(dim=1)
#           non_zero_input_ids = sub_input_ids[non_zero_mask]
#           non_zero_attention_mask = sub_attention_mask[non_zero_mask]
#           non_zero_section = sub_section[non_zero_mask]
#           non_zero_publication = sub_publication[non_zero_mask]

#           # input_ids, attention_mask
#           out = self.base_model(input_ids=non_zero_input_ids, attention_mask=non_zero_attention_mask)
#           out = out.pooler_output
#           out = self.fc1(out)
#           out = self.fc2(out)

#           element_mean = torch.mean(out, dim=0)
#           e_list.append(element_mean)

#         temp_tensor = torch.stack(e_list)
#         b_tensor = temp_tensor.view(self.batch_size, self.seq_length, self.config['h_text_size'])

#         # 2. Indicator
#         # h_tech
#         h_tech = X_2

#         # h
#         h = torch.cat([b_tensor, h_tech], dim=2)

#         # 3. LSTM
#         out, _ = self.lstm_1(h)
#         out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term

#         final_out = self.sequential(out)

#         return final_out


4. Sequence classifiction model

In [78]:
# # New method: C as a scaler + activation function
# class MyModel(nn.Module):
#     def __init__(self, base_model, config, element_size, section_length, publication_length, X_2_length, batch_size):
#         super(MyModel, self).__init__()
#         self.seq_length = config['seq_length']
#         self.batch_size = batch_size
#         self.element_size = element_size
#         self.abandon_tensor = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
#                               0, 0, 0, 0, 0, 0, 0, 0], device=device)

#         self.section_length = section_length
#         self.publication_length = publication_length
#         self.config = config

#         # 1. News
#         # text
#         self.base_model = base_model
#         # self.fc1 = nn.Linear(768, 64)
#         # self.fc2 = nn.Linear(64, config['h_text_size'])

#         # c
#         self.fc_h_c = nn.Linear(section_length + publication_length, config['h_c_size'])

#         # text -> news
#         self.fc_h_news = nn.Linear(3, config['h_news_size'])

#         # 3. LSTM
#         self.lstm_1 = nn.LSTM(config['h_news_size'] + X_2_length, 32, dropout=0.2, num_layers=2, batch_first=True, bidirectional=False)
#         self.sequential = nn.Sequential(
#             nn.Linear(32, 2)
#         )
#         self.relu = nn.ReLU()



#     def forward(self, input_ids, attention_mask, section, publication, X_2):
#         # 1. News
#         flattened_input_ids = input_ids.view(-1, 32)
#         flattened_attention_mask = attention_mask.view(-1, 32)
#         flattened_section = section.view(-1, self.section_length)
#         flattened_publication = publication.view(-1, self.publication_length)

#         e_list = []
#         for i in range(0, flattened_input_ids.size(0), self.element_size):
#           # 获取当前组的子张量
#           sub_input_ids = flattened_input_ids[i:i+self.element_size]
#           sub_attention_mask = flattened_attention_mask[i:i+self.element_size]
#           sub_section = flattened_section[i:i+self.element_size]
#           sub_publication = flattened_publication[i:i+self.element_size]

#           non_zero_mask = (sub_input_ids != 0).any(dim=1)
#           non_zero_input_ids = sub_input_ids[non_zero_mask]
#           non_zero_attention_mask = sub_attention_mask[non_zero_mask]
#           non_zero_section = sub_section[non_zero_mask]
#           non_zero_publication = sub_publication[non_zero_mask]

#           # input_ids, attention_mask
#           h_text = self.base_model(input_ids=non_zero_input_ids, attention_mask=non_zero_attention_mask)
#           # h_text = out.pooler_output
#           # out = self.fc1(out)
#           # h_text = self.fc2(out)

#           # section, publication
#           out = torch.cat([non_zero_section, non_zero_publication], dim=1)
#           out = self.fc_h_c(out)
#           h_c = self.relu(out)

#           # text * C
#           out = h_text.logits * h_c
#           h_news = self.fc_h_news(out)
#           h_news = self.relu(h_news)
#           element_mean = torch.mean(h_news, dim=0)
#           e_list.append(element_mean)

#         temp_tensor = torch.stack(e_list)
#         b_tensor = temp_tensor.view(self.batch_size, self.seq_length, self.config['h_news_size'])

#         h_tech = X_2

#         h = torch.cat([b_tensor, h_tech], dim=2)

#         # 3. LSTM
#         out, _ = self.lstm_1(h)
#         out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term

#         final_out = self.sequential(out)

#         return final_out


## (4) Load Model

### 1. Load pretrain model

In [79]:
# 載入預訓練模型
from transformers import AutoModelForSequenceClassification
# base_model = PreModel(base_model)
# base_model.load_state_dict(torch.load(config_2['pretrained_model_path']))
# bert_config = BertConfig(hidden_dropout_prob=0.2)
# model_name = 'ProsusAI/finbert'
model_name = 'bert-base-uncased'
base_model = BertModel.from_pretrained(model_name)
# base_model = BertModel.from_pretrained(model_name, config=bert_config)
# base_model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Parameter
element_size = len(df['input_ids'][0])  # 114
section_length = len(df['section_dummy'][0][0])
publication_length = len(df['publication_dummy'][0][0])
X_2_length = len(feature) - 6


### 2. Initiate Model

In [80]:
model = MyModel(base_model, config, element_size, section_length, publication_length, X_2_length, config['batch_size'])

model.to(device)

MyModel(
  (base_model): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_aff

### Extra: Contunue training

In [81]:
# model = MyModel(base_model, config, section_length, publication_length, X_2_length)
model.load_state_dict(torch.load(config_2['save_path']))
# model.to(device)

# # 分段訓練
# trainer2(model, train_loader, val_loader, config, device)
# trainer1(model, train_loader, val_loader, config, device)

## (5) Require_grad

In [82]:
# Freeze all layers
for param in model.base_model.parameters():
  param.requires_grad = False

# Unfreeze part of layers
# for param in model.base_model.encoder.layer[6].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[7].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[8].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[9].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[10].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[11].parameters():
#     param.requires_grad = True

# for param in model.base_model.bert.encoder.layer[11].parameters():
#     param.requires_grad = True

# for param in model.base_model.classifier.parameters():
#     param.requires_grad = True

# for param in model.base_model.fc1.parameters():
#     param.requires_grad = True

# for param in model.base_model.fc2.parameters():
#     param.requires_grad = True

# for param in model.base_model.fc3.parameters():
    # param.requires_grad = True

# Check requires_grad status
for name, param in model.named_parameters():
    print(name, param.requires_grad)

base_model.embeddings.word_embeddings.weight False
base_model.embeddings.position_embeddings.weight False
base_model.embeddings.token_type_embeddings.weight False
base_model.embeddings.LayerNorm.weight False
base_model.embeddings.LayerNorm.bias False
base_model.encoder.layer.0.attention.self.query.weight False
base_model.encoder.layer.0.attention.self.query.bias False
base_model.encoder.layer.0.attention.self.key.weight False
base_model.encoder.layer.0.attention.self.key.bias False
base_model.encoder.layer.0.attention.self.value.weight False
base_model.encoder.layer.0.attention.self.value.bias False
base_model.encoder.layer.0.attention.output.dense.weight False
base_model.encoder.layer.0.attention.output.dense.bias False
base_model.encoder.layer.0.attention.output.LayerNorm.weight False
base_model.encoder.layer.0.attention.output.LayerNorm.bias False
base_model.encoder.layer.0.intermediate.dense.weight False
base_model.encoder.layer.0.intermediate.dense.bias False
base_model.encoder.la

# Training

In [83]:
def trainer(model, train_loader, val_loader, config, device):

    criterion = config['criterion']

    # ----------------------Learning Rate-----------------------
    # optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])
    # optimizer = torch.optim.AdamW(model.parameters(), lr=config['learning_rate'])

    learning_rates = {
        'base_model': 1e-5,  # 设置base_model的学习率
        'base_model_fc': 1e-5,
        'other_params': 1e-4  # 设置其他参数的学习率
    }

    param_groups = [
        {'params': model.base_model.parameters(), 'lr': learning_rates['base_model']},
        {'params': model.fc1.weight, 'lr': learning_rates['base_model_fc']},
        {'params': model.fc1.bias, 'lr': learning_rates['base_model_fc']},
        # {'params': model.fc2.weight, 'lr': learning_rates['base_model_fc']},
        # {'params': model.fc2.bias, 'lr': learning_rates['base_model_fc']},
        {'params': model.fc_h_c.weight, 'lr': learning_rates['base_model_fc']},
        {'params': model.fc_h_c.bias, 'lr': learning_rates['base_model_fc']},
        {'params': model.fc_h_news.weight, 'lr': learning_rates['base_model_fc']},
        {'params': model.fc_h_news.bias, 'lr': learning_rates['base_model_fc']},
        {'params': model.lstm_1.weight_ih_l0, 'lr': learning_rates['other_params']},
        {'params': model.lstm_1.weight_hh_l0, 'lr': learning_rates['other_params']},
        {'params': model.lstm_1.bias_ih_l0, 'lr': learning_rates['other_params']},
        {'params': model.lstm_1.bias_hh_l0, 'lr': learning_rates['other_params']},
        {'params': model.lstm_1.weight_ih_l1, 'lr': learning_rates['other_params']},
        {'params': model.lstm_1.weight_hh_l1, 'lr': learning_rates['other_params']},
        {'params': model.lstm_1.bias_ih_l1, 'lr': learning_rates['other_params']},
        {'params': model.lstm_1.bias_hh_l1, 'lr': learning_rates['other_params']},
        {'params': model.sequential[0].weight, 'lr': learning_rates['other_params']},
        {'params': model.sequential[0].bias, 'lr': learning_rates['other_params']},
    ]
    optimizer = torch.optim.AdamW(param_groups)
    # ----------------------------------------------

    writer = SummaryWriter()  # Writer of tensoboard.
    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

    # 1. Training
    for epoch in range(n_epochs):
      model.train()  # Set the model to training mode
      loss_record = []

      train_pbar = tqdm(train_loader, position=0, leave=True)  # tqdm is a package to visualize your training progress.
      for input_ids, attention_mask, section, publication, X_2, y in train_loader:
        optimizer.zero_grad()  # Set gradient to zero

        # Forward pass
        input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
        pred = model(input_ids, attention_mask, section, publication, X_2)
        loss = criterion(pred, y)
        loss.backward()                     # Compute gradient(backpropagation).
        optimizer.step()                    # Update parameters.
        step += 1
        loss_record.append(loss.detach().item())

        # Display current epoch number and loss on tqdm progress bar.
        train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
        train_pbar.set_postfix({'loss': loss.detach().item()})

      mean_train_loss = sum(loss_record)/len(loss_record)
      writer.add_scalar('Loss/train', mean_train_loss, step)

      # 2. Evaluation
      model.eval() # Set your model to evaluation mode.
      loss_record = []
      predicted_labels_list = []
      targets_list = []
      for input_ids, attention_mask, section, publication, X_2, y in val_loader:
          input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
          with torch.no_grad():
              pred = model(input_ids, attention_mask, section, publication, X_2)
              _, predicted = torch.max(pred, 1)
              loss = criterion(pred, y)
              predicted_labels_list.extend(predicted.tolist())
              targets_list.extend(y.tolist())
              loss_record.append(loss.item())
      accuracy = accuracy_score(targets_list, predicted_labels_list)

      # Mean
      mean_valid_loss = sum(loss_record)/len(loss_record)
      print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}, Val Acc: {accuracy:.4f}')
      writer.add_scalar('Loss/valid', mean_valid_loss, step)

      # 3. Judge of saving model
      if mean_valid_loss < best_loss:
          best_loss = mean_valid_loss
          torch.save(model.state_dict(), config_2['save_path']) # Save your best model
          print('Saving model with loss {:.3f}...'.format(best_loss))
          early_stop_count = 0
      else:
          early_stop_count += 1

      if early_stop_count >= config['early_stop']:
          print('\nModel is not improving, so we halt the training session.')
          return


In [None]:
# 全部訓練
trainer(model, train_loader, val_loader, config, device)


Epoch [1/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.687]

Epoch [1/3000]: Train loss: 0.6914, Valid loss: 0.6898, Val Acc: 0.5547
Saving model with loss 0.690...


Epoch [1/3000]:   0%|          | 0/19 [03:00<?, ?it/s, loss=0.687]
Epoch [2/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.687]

Epoch [2/3000]: Train loss: 0.6911, Valid loss: 0.6895, Val Acc: 0.5547
Saving model with loss 0.690...


Epoch [2/3000]:   0%|          | 0/19 [02:56<?, ?it/s, loss=0.687]
Epoch [3/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.686]

Epoch [3/3000]: Train loss: 0.6909, Valid loss: 0.6893, Val Acc: 0.5547
Saving model with loss 0.689...


Epoch [3/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.686]
Epoch [4/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.685]

Epoch [4/3000]: Train loss: 0.6908, Valid loss: 0.6890, Val Acc: 0.5547
Saving model with loss 0.689...


Epoch [4/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.685]
Epoch [5/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.685]

Epoch [5/3000]: Train loss: 0.6909, Valid loss: 0.6889, Val Acc: 0.5547
Saving model with loss 0.689...


Epoch [5/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.685]
Epoch [6/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.684]

Epoch [6/3000]: Train loss: 0.6908, Valid loss: 0.6887, Val Acc: 0.5547
Saving model with loss 0.689...


Epoch [6/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.684]
Epoch [7/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.684]

Epoch [7/3000]: Train loss: 0.6907, Valid loss: 0.6886, Val Acc: 0.5547
Saving model with loss 0.689...


Epoch [7/3000]:   0%|          | 0/19 [02:57<?, ?it/s, loss=0.684]
Epoch [8/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.684]

Epoch [8/3000]: Train loss: 0.6906, Valid loss: 0.6885, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [8/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.684]
Epoch [9/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.684]

Epoch [9/3000]: Train loss: 0.6907, Valid loss: 0.6884, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [9/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.684]
Epoch [10/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.683]

Epoch [10/3000]: Train loss: 0.6908, Valid loss: 0.6883, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [10/3000]:   0%|          | 0/19 [02:56<?, ?it/s, loss=0.683]
Epoch [11/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.684]

Epoch [11/3000]: Train loss: 0.6906, Valid loss: 0.6882, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [11/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.684]
Epoch [12/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [12/3000]: Train loss: 0.6905, Valid loss: 0.6882, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [12/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [13/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [13/3000]: Train loss: 0.6905, Valid loss: 0.6881, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [13/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [14/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [14/3000]: Train loss: 0.6906, Valid loss: 0.6881, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [14/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [15/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [15/3000]: Train loss: 0.6905, Valid loss: 0.6880, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [15/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [16/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [16/3000]: Train loss: 0.6903, Valid loss: 0.6880, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [16/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [17/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [17/3000]: Train loss: 0.6908, Valid loss: 0.6880, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [17/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [18/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [18/3000]: Train loss: 0.6906, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [18/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [19/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [19/3000]: Train loss: 0.6905, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [19/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [20/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [20/3000]: Train loss: 0.6905, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [20/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [21/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.681]

Epoch [21/3000]: Train loss: 0.6905, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [21/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.681]
Epoch [22/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [22/3000]: Train loss: 0.6907, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [22/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [23/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [23/3000]: Train loss: 0.6905, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [23/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [24/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [24/3000]: Train loss: 0.6906, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [24/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [25/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.681]

Epoch [25/3000]: Train loss: 0.6905, Valid loss: 0.6879, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [25/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.681]
Epoch [26/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [26/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [26/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [27/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [27/3000]: Train loss: 0.6907, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [27/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [28/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [28/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [28/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [29/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [29/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [29/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [30/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [30/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [30/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [31/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [31/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [31/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [32/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [32/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [32/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [33/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [33/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [33/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [34/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [34/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [34/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [35/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [35/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [35/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [36/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [36/3000]: Train loss: 0.6907, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [36/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [37/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [37/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547


Epoch [37/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [38/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [38/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547


Epoch [38/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.682]
Epoch [39/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [39/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [39/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [40/3000]:   0%|          | 0/19 [02:20<?, ?it/s, loss=0.682]

Epoch [40/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [40/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [41/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [41/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [41/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [42/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [42/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [42/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [43/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [43/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [43/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [44/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [44/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [44/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [45/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [45/3000]: Train loss: 0.6905, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [45/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [46/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [46/3000]: Train loss: 0.6906, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [46/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [47/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [47/3000]: Train loss: 0.6904, Valid loss: 0.6878, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [47/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [48/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [48/3000]: Train loss: 0.6903, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [48/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [49/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [49/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [49/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [50/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [50/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [50/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [51/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [51/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [51/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [52/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [52/3000]: Train loss: 0.6904, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [52/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.683]
Epoch [53/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [53/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [53/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.682]
Epoch [54/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [54/3000]: Train loss: 0.6907, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [54/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.683]
Epoch [55/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [55/3000]: Train loss: 0.6903, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [55/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [56/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [56/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [56/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [57/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [57/3000]: Train loss: 0.6907, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [57/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [58/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [58/3000]: Train loss: 0.6908, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [58/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.682]
Epoch [59/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [59/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [59/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [60/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [60/3000]: Train loss: 0.6904, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [60/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [61/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [61/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [61/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [62/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [62/3000]: Train loss: 0.6904, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [62/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [63/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [63/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [63/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [64/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [64/3000]: Train loss: 0.6904, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [64/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [65/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [65/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [65/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [66/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [66/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [66/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [67/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [67/3000]: Train loss: 0.6903, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [67/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [68/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [68/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [68/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [69/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [69/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [69/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [70/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [70/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [70/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.682]
Epoch [71/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [71/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [71/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [72/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [72/3000]: Train loss: 0.6903, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [72/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [73/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [73/3000]: Train loss: 0.6903, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [73/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [74/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [74/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [74/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [75/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [75/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [75/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [76/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [76/3000]: Train loss: 0.6902, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [76/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [77/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [77/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [77/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [78/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [78/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [78/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [79/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [79/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [79/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [80/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [80/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [80/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [81/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [81/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [81/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [82/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [82/3000]: Train loss: 0.6903, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [82/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [83/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [83/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [83/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [84/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [84/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [84/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [85/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [85/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [85/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [86/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [86/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [86/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [87/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [87/3000]: Train loss: 0.6906, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [87/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [88/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [88/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [88/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [89/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.682]

Epoch [89/3000]: Train loss: 0.6904, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [89/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [90/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [90/3000]: Train loss: 0.6907, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [90/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [91/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [91/3000]: Train loss: 0.6904, Valid loss: 0.6877, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [91/3000]:   0%|          | 0/19 [02:56<?, ?it/s, loss=0.682]
Epoch [92/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.683]

Epoch [92/3000]: Train loss: 0.6903, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [92/3000]:   0%|          | 0/19 [02:56<?, ?it/s, loss=0.683]
Epoch [93/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [93/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [93/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [94/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [94/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [94/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [95/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [95/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [95/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [96/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [96/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [96/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [97/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [97/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [97/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [98/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.682]

Epoch [98/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [98/3000]:   0%|          | 0/19 [02:56<?, ?it/s, loss=0.682]
Epoch [99/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.682]

Epoch [99/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [99/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [100/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [100/3000]: Train loss: 0.6907, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [100/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [101/3000]:   0%|          | 0/19 [02:24<?, ?it/s, loss=0.681]

Epoch [101/3000]: Train loss: 0.6905, Valid loss: 0.6877, Val Acc: 0.5547


Epoch [101/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.681]
Epoch [102/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [102/3000]: Train loss: 0.6903, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [102/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [103/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [103/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [103/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [104/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [104/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [104/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [105/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [105/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [105/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [106/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [106/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [106/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [107/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [107/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [107/3000]:   0%|          | 0/19 [02:57<?, ?it/s, loss=0.683]
Epoch [108/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [108/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [108/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [109/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [109/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [109/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [110/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [110/3000]: Train loss: 0.6908, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [110/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [111/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [111/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [111/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [112/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [112/3000]: Train loss: 0.6908, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [112/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [113/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [113/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [113/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [114/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [114/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [114/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [115/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [115/3000]: Train loss: 0.6907, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [115/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [116/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [116/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [116/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [117/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [117/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [117/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [118/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [118/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [118/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [119/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [119/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [119/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [120/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [120/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [120/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [121/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [121/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [121/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [122/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.681]

Epoch [122/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [122/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.681]
Epoch [123/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [123/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [123/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [124/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [124/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [124/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [125/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [125/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [125/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [126/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [126/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [126/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [127/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [127/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [127/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [128/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [128/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [128/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [129/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [129/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [129/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [130/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [130/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [130/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [131/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [131/3000]: Train loss: 0.6906, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [131/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.683]
Epoch [132/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [132/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [132/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [133/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [133/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [133/3000]:   0%|          | 0/19 [02:56<?, ?it/s, loss=0.682]
Epoch [134/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [134/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [134/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [135/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.681]

Epoch [135/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [135/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.681]
Epoch [136/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [136/3000]: Train loss: 0.6907, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [136/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [137/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [137/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [137/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [138/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [138/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [138/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [139/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [139/3000]: Train loss: 0.6905, Valid loss: 0.6876, Val Acc: 0.5547


Epoch [139/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [140/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [140/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [140/3000]:   0%|          | 0/19 [02:59<?, ?it/s, loss=0.683]
Epoch [141/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [141/3000]: Train loss: 0.6904, Valid loss: 0.6876, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [141/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [142/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [142/3000]: Train loss: 0.6902, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [142/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [143/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [143/3000]: Train loss: 0.6905, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [143/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [144/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [144/3000]: Train loss: 0.6904, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [144/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [145/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [145/3000]: Train loss: 0.6906, Valid loss: 0.6875, Val Acc: 0.5547


Epoch [145/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.683]
Epoch [146/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [146/3000]: Train loss: 0.6905, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [146/3000]:   0%|          | 0/19 [02:57<?, ?it/s, loss=0.683]
Epoch [147/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [147/3000]: Train loss: 0.6905, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.688...


Epoch [147/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [148/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [148/3000]: Train loss: 0.6903, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [148/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [149/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [149/3000]: Train loss: 0.6904, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [149/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [150/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [150/3000]: Train loss: 0.6905, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [150/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [151/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [151/3000]: Train loss: 0.6907, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [151/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [152/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [152/3000]: Train loss: 0.6905, Valid loss: 0.6875, Val Acc: 0.5547


Epoch [152/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [153/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [153/3000]: Train loss: 0.6904, Valid loss: 0.6875, Val Acc: 0.5547


Epoch [153/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [154/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [154/3000]: Train loss: 0.6904, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [154/3000]:   0%|          | 0/19 [02:57<?, ?it/s, loss=0.682]
Epoch [155/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [155/3000]: Train loss: 0.6905, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [155/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [156/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [156/3000]: Train loss: 0.6903, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [156/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [157/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [157/3000]: Train loss: 0.6903, Valid loss: 0.6875, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [157/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [158/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [158/3000]: Train loss: 0.6904, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [158/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [159/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [159/3000]: Train loss: 0.6902, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [159/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [160/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [160/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547


Epoch [160/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [161/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [161/3000]: Train loss: 0.6906, Valid loss: 0.6875, Val Acc: 0.5547


Epoch [161/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [162/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [162/3000]: Train loss: 0.6904, Valid loss: 0.6874, Val Acc: 0.5547


Epoch [162/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [163/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [163/3000]: Train loss: 0.6905, Valid loss: 0.6875, Val Acc: 0.5547


Epoch [163/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [164/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [164/3000]: Train loss: 0.6904, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [164/3000]:   0%|          | 0/19 [02:57<?, ?it/s, loss=0.682]
Epoch [165/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [165/3000]: Train loss: 0.6903, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [165/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [166/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [166/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [166/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [167/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [167/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547


Epoch [167/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [168/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [168/3000]: Train loss: 0.6904, Valid loss: 0.6874, Val Acc: 0.5547


Epoch [168/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [169/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [169/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547


Epoch [169/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [170/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [170/3000]: Train loss: 0.6903, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [170/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [171/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [171/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [171/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [172/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.681]

Epoch [172/3000]: Train loss: 0.6904, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [172/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.681]
Epoch [173/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [173/3000]: Train loss: 0.6906, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [173/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [174/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.684]

Epoch [174/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [174/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.684]
Epoch [175/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [175/3000]: Train loss: 0.6904, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [175/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [176/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [176/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [176/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [177/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [177/3000]: Train loss: 0.6904, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [177/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [178/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [178/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [178/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [179/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.684]

Epoch [179/3000]: Train loss: 0.6905, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [179/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.684]
Epoch [180/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [180/3000]: Train loss: 0.6907, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [180/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [181/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [181/3000]: Train loss: 0.6903, Valid loss: 0.6874, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [181/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [182/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [182/3000]: Train loss: 0.6903, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [182/3000]:   0%|          | 0/19 [02:56<?, ?it/s, loss=0.683]
Epoch [183/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [183/3000]: Train loss: 0.6903, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [183/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [184/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.684]

Epoch [184/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [184/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.684]
Epoch [185/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [185/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [185/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [186/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [186/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [186/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [187/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [187/3000]: Train loss: 0.6903, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [187/3000]:   0%|          | 0/19 [02:58<?, ?it/s, loss=0.682]
Epoch [188/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [188/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [188/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [189/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [189/3000]: Train loss: 0.6906, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [189/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [190/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [190/3000]: Train loss: 0.6906, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [190/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [191/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [191/3000]: Train loss: 0.6904, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [191/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.683]
Epoch [192/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [192/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [192/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [193/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [193/3000]: Train loss: 0.6902, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [193/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [194/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [194/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [194/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [195/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [195/3000]: Train loss: 0.6903, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [195/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [196/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [196/3000]: Train loss: 0.6904, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [196/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [197/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [197/3000]: Train loss: 0.6903, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [197/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [198/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [198/3000]: Train loss: 0.6903, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [198/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [199/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [199/3000]: Train loss: 0.6906, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [199/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [200/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [200/3000]: Train loss: 0.6908, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [200/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.683]
Epoch [201/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [201/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [201/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [202/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [202/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [202/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [203/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [203/3000]: Train loss: 0.6904, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [203/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [204/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [204/3000]: Train loss: 0.6903, Valid loss: 0.6873, Val Acc: 0.5547


Epoch [204/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [205/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [205/3000]: Train loss: 0.6905, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [205/3000]:   0%|          | 0/19 [02:57<?, ?it/s, loss=0.683]
Epoch [206/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [206/3000]: Train loss: 0.6902, Valid loss: 0.6873, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [206/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [207/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [207/3000]: Train loss: 0.6902, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [207/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [208/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.681]

Epoch [208/3000]: Train loss: 0.6903, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [208/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.681]
Epoch [209/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [209/3000]: Train loss: 0.6905, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [209/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [210/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [210/3000]: Train loss: 0.6904, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [210/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [211/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [211/3000]: Train loss: 0.6903, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [211/3000]:   0%|          | 0/19 [02:59<?, ?it/s, loss=0.683]
Epoch [212/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [212/3000]: Train loss: 0.6905, Valid loss: 0.6872, Val Acc: 0.5547


Epoch [212/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [213/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [213/3000]: Train loss: 0.6904, Valid loss: 0.6872, Val Acc: 0.5547


Epoch [213/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [214/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [214/3000]: Train loss: 0.6901, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [214/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [215/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [215/3000]: Train loss: 0.6903, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [215/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [216/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [216/3000]: Train loss: 0.6904, Valid loss: 0.6872, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [216/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [217/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [217/3000]: Train loss: 0.6906, Valid loss: 0.6872, Val Acc: 0.5547


Epoch [217/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [218/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [218/3000]: Train loss: 0.6903, Valid loss: 0.6872, Val Acc: 0.5547


Epoch [218/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [219/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [219/3000]: Train loss: 0.6903, Valid loss: 0.6872, Val Acc: 0.5547


Epoch [219/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [220/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [220/3000]: Train loss: 0.6902, Valid loss: 0.6871, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [220/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [221/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [221/3000]: Train loss: 0.6903, Valid loss: 0.6871, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [221/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [222/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [222/3000]: Train loss: 0.6900, Valid loss: 0.6871, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [222/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [223/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [223/3000]: Train loss: 0.6904, Valid loss: 0.6871, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [223/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [224/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [224/3000]: Train loss: 0.6905, Valid loss: 0.6871, Val Acc: 0.5547


Epoch [224/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [225/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.681]

Epoch [225/3000]: Train loss: 0.6903, Valid loss: 0.6871, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [225/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.681]
Epoch [226/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [226/3000]: Train loss: 0.6903, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [226/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [227/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [227/3000]: Train loss: 0.6902, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [227/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [228/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.684]

Epoch [228/3000]: Train loss: 0.6903, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [228/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.684]
Epoch [229/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [229/3000]: Train loss: 0.6901, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [229/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [230/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [230/3000]: Train loss: 0.6902, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [230/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [231/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [231/3000]: Train loss: 0.6905, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [231/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [232/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [232/3000]: Train loss: 0.6906, Valid loss: 0.6870, Val Acc: 0.5547


Epoch [232/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [233/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [233/3000]: Train loss: 0.6902, Valid loss: 0.6870, Val Acc: 0.5547


Epoch [233/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [234/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [234/3000]: Train loss: 0.6905, Valid loss: 0.6870, Val Acc: 0.5547


Epoch [234/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [235/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [235/3000]: Train loss: 0.6905, Valid loss: 0.6870, Val Acc: 0.5547


Epoch [235/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [236/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [236/3000]: Train loss: 0.6903, Valid loss: 0.6870, Val Acc: 0.5547


Epoch [236/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [237/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [237/3000]: Train loss: 0.6903, Valid loss: 0.6870, Val Acc: 0.5547


Epoch [237/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [238/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [238/3000]: Train loss: 0.6900, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [238/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [239/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [239/3000]: Train loss: 0.6903, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [239/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]
Epoch [240/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [240/3000]: Train loss: 0.6903, Valid loss: 0.6870, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [240/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.682]
Epoch [241/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [241/3000]: Train loss: 0.6901, Valid loss: 0.6869, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [241/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [242/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [242/3000]: Train loss: 0.6904, Valid loss: 0.6869, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [242/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [243/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [243/3000]: Train loss: 0.6899, Valid loss: 0.6869, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [243/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [244/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [244/3000]: Train loss: 0.6902, Valid loss: 0.6869, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [244/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [245/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [245/3000]: Train loss: 0.6898, Valid loss: 0.6869, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [245/3000]:   0%|          | 0/19 [02:57<?, ?it/s, loss=0.682]
Epoch [246/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [246/3000]: Train loss: 0.6902, Valid loss: 0.6869, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [246/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [247/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [247/3000]: Train loss: 0.6904, Valid loss: 0.6869, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [247/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [248/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [248/3000]: Train loss: 0.6902, Valid loss: 0.6869, Val Acc: 0.5547


Epoch [248/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [249/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [249/3000]: Train loss: 0.6902, Valid loss: 0.6869, Val Acc: 0.5547


Epoch [249/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [250/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [250/3000]: Train loss: 0.6903, Valid loss: 0.6868, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [250/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.683]
Epoch [251/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [251/3000]: Train loss: 0.6903, Valid loss: 0.6868, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [251/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [252/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [252/3000]: Train loss: 0.6903, Valid loss: 0.6868, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [252/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [253/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [253/3000]: Train loss: 0.6903, Valid loss: 0.6868, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [253/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [254/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [254/3000]: Train loss: 0.6903, Valid loss: 0.6868, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [254/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [255/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [255/3000]: Train loss: 0.6903, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [255/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [256/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [256/3000]: Train loss: 0.6905, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [256/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [257/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [257/3000]: Train loss: 0.6901, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [257/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [258/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.681]

Epoch [258/3000]: Train loss: 0.6897, Valid loss: 0.6868, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [258/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.681]
Epoch [259/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [259/3000]: Train loss: 0.6899, Valid loss: 0.6867, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [259/3000]:   0%|          | 0/19 [02:55<?, ?it/s, loss=0.683]
Epoch [260/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [260/3000]: Train loss: 0.6906, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [260/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [261/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [261/3000]: Train loss: 0.6905, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [261/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [262/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.683]

Epoch [262/3000]: Train loss: 0.6901, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [262/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [263/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [263/3000]: Train loss: 0.6901, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [263/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [264/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [264/3000]: Train loss: 0.6901, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [264/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.683]
Epoch [265/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.684]

Epoch [265/3000]: Train loss: 0.6903, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [265/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.684]
Epoch [266/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [266/3000]: Train loss: 0.6904, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [266/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.682]
Epoch [267/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.683]

Epoch [267/3000]: Train loss: 0.6905, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [267/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [268/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [268/3000]: Train loss: 0.6898, Valid loss: 0.6868, Val Acc: 0.5547


Epoch [268/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [269/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.681]

Epoch [269/3000]: Train loss: 0.6902, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [269/3000]:   0%|          | 0/19 [02:51<?, ?it/s, loss=0.681]
Epoch [270/3000]:   0%|          | 0/19 [02:21<?, ?it/s, loss=0.682]

Epoch [270/3000]: Train loss: 0.6902, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [270/3000]:   0%|          | 0/19 [02:52<?, ?it/s, loss=0.682]
Epoch [271/3000]:   0%|          | 0/19 [02:22<?, ?it/s, loss=0.682]

Epoch [271/3000]: Train loss: 0.6904, Valid loss: 0.6867, Val Acc: 0.5547
Saving model with loss 0.687...


Epoch [271/3000]:   0%|          | 0/19 [02:54<?, ?it/s, loss=0.682]
Epoch [272/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.685]

Epoch [272/3000]: Train loss: 0.6904, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [272/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.685]
Epoch [273/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.683]

Epoch [273/3000]: Train loss: 0.6905, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [273/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.683]
Epoch [274/3000]:   0%|          | 0/19 [02:23<?, ?it/s, loss=0.682]

Epoch [274/3000]: Train loss: 0.6899, Valid loss: 0.6867, Val Acc: 0.5547


Epoch [274/3000]:   0%|          | 0/19 [02:53<?, ?it/s, loss=0.682]


In [None]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/

# Evaluate

In [None]:
# # Evaluation Dataloader
# con_train_loader = DataLoader(train_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)
# con_val_loader = DataLoader(val_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)
# con_test_loader = DataLoader(test_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)

# # Evaluation mode
# model.eval()
# accuracy_list = []

# # 1. Train part
# with torch.no_grad():
#     predicted_labels_list = []
#     targets_list = []
#     for input_ids, attention_mask, section, publication, X_2, y in con_train_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

#     # 计算准确率
#     accuracy = accuracy_score(targets_list, predicted_labels_list)
#     accuracy_list.append(accuracy)

# print('=====================================================================================================================')
# print('Training Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list), '\n')

# # 2. Val part
# with torch.no_grad():
#     predicted_labels_list = []
#     targets_list = []
#     for input_ids, attention_mask, section, publication, X_2, y in con_val_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

#     # 计算准确率
#     accuracy = accuracy_score(targets_list, predicted_labels_list)
#     accuracy_list.append(accuracy)

# print('=====================================================')
# print('Val Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list))


# # 3. Test part
# with torch.no_grad():
#     predicted_labels_list = []
#     targets_list = []
#     for input_ids, attention_mask, section, publication, X_2, y in con_test_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

#     # 计算准确率
#     accuracy = accuracy_score(targets_list, predicted_labels_list)
#     accuracy_list.append(accuracy)

# print('=====================================================')
# print('Testing Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list))

# print('=====================================================', '\n')
# print("Accuracy [Train, Val, Test]: ", accuracy_list, '\n')
# print(model, '\n')
# print('Config: ', config, '\n')
# print('Feature: ', feature)
# print('time_start: ', time_start, 'time_end: ', time_end)

In [None]:
# Evaluation Dataloader
con_train_loader = DataLoader(train_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)
con_val_loader = DataLoader(val_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)
con_test_loader = DataLoader(test_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)

model = MyModel(base_model, config, element_size, section_length, publication_length, X_2_length, batch_size=1)
model.load_state_dict(torch.load(config_2['save_path']))
model.to(device)

# Evaluation mode
model.eval()

accuracy_list = []

# 1. Train part
with torch.no_grad():
    predicted_labels_list = []
    targets_list = []
    for input_ids, attention_mask, section, publication, X_2, y in con_train_loader:
        input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
        outputs = model(input_ids, attention_mask, section, publication, X_2)
        _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

    # 计算准确率
    accuracy = accuracy_score(targets_list, predicted_labels_list)
    accuracy_list.append(accuracy)

print('=====================================================================================================================')
print('Training Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list), '\n')

# 2. Val part
with torch.no_grad():
    predicted_labels_list = []
    targets_list = []
    for input_ids, attention_mask, section, publication, X_2, y in con_val_loader:
        input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
        outputs = model(input_ids, attention_mask, section, publication, X_2)
        _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

    # 计算准确率
    accuracy = accuracy_score(targets_list, predicted_labels_list)
    accuracy_list.append(accuracy)

print('=====================================================')
print('Val Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list))


# 3. Test part
with torch.no_grad():
    predicted_labels_list = []
    targets_list = []
    for input_ids, attention_mask, section, publication, X_2, y in con_test_loader:
        input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
        outputs = model(input_ids, attention_mask, section, publication, X_2)
        _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

    # 计算准确率
    accuracy = accuracy_score(targets_list, predicted_labels_list)
    accuracy_list.append(accuracy)

print('=====================================================')
print('Testing Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list))

print('=====================================================', '\n')
print("Accuracy [Train, Val, Test]: ", accuracy_list, '\n')
# print(model, '\n')
print('Config: ', config, '\n')
print('Feature: ', feature)
print('time_start: ', time_start, 'time_end: ', time_end)

In [None]:
# # Evaluation Dataloader
# con_train_loader = DataLoader(train_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)
# con_val_loader = DataLoader(val_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)
# con_test_loader = DataLoader(test_dataset, batch_size=1, shuffle=config['shuffle'], drop_last=True, pin_memory=True)

# # Evaluation mode
# model.eval()
# accuracy_list = []
# threshold = 0.5

# # 1. Train part
# predicted_labels_list = []
# targets_list = []

# with torch.no_grad():
#     for input_ids, attention_mask, section, publication, X_2, y in con_train_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
#         predicted_labels = (outputs >= threshold).float()

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

# accuracy = accuracy_score(targets_list, predicted_labels_list)

# print('=====================================================================================================================')
# print('Training Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list), '\n')

# # 2. Val part
# predicted_labels_list = []
# targets_list = []

# with torch.no_grad():
#     for input_ids, attention_mask, section, publication, X_2, y in con_val_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
#         predicted_labels = (outputs >= 0.5).float()

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

# accuracy = accuracy_score(targets_list, predicted_labels_list)

# print('=====================================================')
# print('Val Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list))


# # 3. Test part
# predicted_labels_list = []
# targets_list = []

# with torch.no_grad():
#     for input_ids, attention_mask, section, publication, X_2, y in con_test_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
#         predicted_labels = (outputs >= 0.5).float()

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

# accuracy = accuracy_score(targets_list, predicted_labels_list)

# print('=====================================================')
# print('Testing Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list))

# print('=====================================================', '\n')
# print("Accuracy [Train, Val, Test]: ", accuracy_list, '\n')
# # print(model, '\n')
# print('Config: ', config, '\n')
# print('Feature: ', feature)
# print('time_start: ', time_start, 'time_end: ', time_end)

In [None]:
# # Evaluation Dataloader
# con_train_loader = DataLoader(train_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)
# con_val_loader = DataLoader(val_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)
# con_test_loader = DataLoader(test_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)

# model = MyModel(base_model, config, element_size, section_length, publication_length, X_2_length, batch_size=1)
# model.load_state_dict(torch.load(config_2['save_path']))
# model.to(device)

# # Evaluation mode
# model.eval()
# accuracy_list = []
# threshold = 0.5

# # 1. Train part
# predicted_labels_list = []
# targets_list = []

# with torch.no_grad():
#     for input_ids, attention_mask, section, publication, X_2, y in con_train_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
#         predicted_labels = (outputs >= threshold).float()

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

# accuracy = accuracy_score(targets_list, predicted_labels_list)

# print('=====================================================================================================================')
# print('Training Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list), '\n')

# # 2. Val part
# predicted_labels_list = []
# targets_list = []

# with torch.no_grad():
#     for input_ids, attention_mask, section, publication, X_2, y in con_val_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
#         predicted_labels = (outputs >= 0.5).float()

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

# accuracy = accuracy_score(targets_list, predicted_labels_list)

# print('=====================================================')
# print('Val Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list))


# # 3. Test part
# predicted_labels_list = []
# targets_list = []

# with torch.no_grad():
#     for input_ids, attention_mask, section, publication, X_2, y in con_test_loader:
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         outputs = model(input_ids, attention_mask, section, publication, X_2)
#         # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
#         predicted_labels = (outputs >= 0.5).float()

#         predicted_labels_list.extend(predicted_labels.tolist())
#         targets_list.extend(y.tolist())

# accuracy = accuracy_score(targets_list, predicted_labels_list)

# print('=====================================================')
# print('Testing Result:')
# print(classification_report(targets_list, predicted_labels_list))
# print(confusion_matrix(targets_list, predicted_labels_list))

# print('=====================================================', '\n')
# print("Accuracy [Train, Val, Test]: ", accuracy_list, '\n')
# print('Config: ', config, '\n')
# print('Feature: ', feature)
# print('time_start: ', time_start, 'time_end: ', time_end)

## (2) Thresholds

In [None]:
# # function of evaluation
# def evaluate_model(model, dataloader, threshold=0.5, device='cuda'):
#     predicted_labels_list = []
#     targets_list = []

#     with torch.no_grad():
#         for input_ids, attention_mask, section, publication, X_2, y in dataloader:
#             input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#             outputs = model(input_ids, attention_mask, section, publication, X_2)
#             predicted_labels = (outputs >= threshold).float()

#             predicted_labels_list.extend(predicted_labels.tolist())
#             targets_list.extend(y.tolist())

#     accuracy = accuracy_score(targets_list, predicted_labels_list)

#     print('=====================================================================================================================')
#     print(str(dataloader), ' Result:')
#     print(classification_report(targets_list, predicted_labels_list))
#     print(confusion_matrix(targets_list, predicted_labels_list), '\n')

In [None]:
# accuracy_list = []
# threshold = 0.51

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.52

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.53

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.54

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.55

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.56

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.57

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.58

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

# Benchmark

In [None]:
# class BenchmarkModel(nn.Module):
#     def __init__(self, config, section_length, publication_length, X_2_length):
#         super(BenchmarkModel, self).__init__()
#         # tech
#         self.fc_h_tech = nn.Linear(X_2_length, config['h_tech_size'])

#         # 3. LSTM
#         self.lstm_1 = nn.LSTM(config['h_size'], 2, num_layers=1, batch_first=True, bidirectional=False)
#         self.sequential = nn.Sequential(
#             nn.Linear(2, 1)
#         )
#         self.sigmoid = nn.Sigmoid()

#     def forward(self, input_ids, attention_mask, section, publication, X_2):
#         # h_tech
#         h_tech = self.fc_h_tech(X_2)

#         # 3. LSTM
#         out, _ = self.lstm_1(h_tech)
#         out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term
#         # print('out 1(last layer of LSTM): ', out.shape, out)
#         out = self.sequential(out)
#         # print('out 2(after sequnetial): ',out.shape , out)
#         final_out = self.sigmoid(out)
#         # print('final_out: ', final_out.shape, final_out)

#         return final_out
