# 0 Setting

In [1]:
# Parameter Setting
import torch

device = 'cuda' if torch.cuda.is_available() else 'cpu'
folder_name = 'Research-(D5) Synthesized input model'

config = {
    'learning_rate': 1e-3,
    'batch_size': 4,
    'seq_length': 5,

    'shuffle': False,
    'criterion': torch.nn.BCELoss(),
    'seed': 42,
    'valid_ratio': 0.2,
    'test_ratio': 0.2,
    'max_length': 512,
    'n_epochs': 3000,
    'early_stop': 50,
}

feature = [
    # X_1
    # 'input_ids',
    # 'attention_mask',
    # 'section_dummy',
    # 'publication_dummy',

    # X_2
    # 1. tech indicator
    # 'Open',
    # 'High',
    # 'Low',
    # 'Close',
    # 'Volume',
    # 'Dividends',
    # 'Stock Splits',
    'today_return',
    # 'today_return_cate',
    # 'Sma',
    # 'Rsi',
    # 'Kd',
    # 'Ema_12',
    # 'Ema_26',
    # 'Macd',

    # 2. market index
    '^DJI',
    '^GSPC',
    '^NDX',
    '^IXIC',
    '^SOX',

    # y
    # '1_day_return',
    # '2_day_return',
    # '3_day_return',
    # '4_day_return',
    # '5_day_return',
    '1_day_return_cate',
    # '2_day_return_cate',
    # '3_day_return_cate',
    # '4_day_return_cate',
    # '5_day_return_cate',

    # Do not mark the datetime, it's for operation
    'datetime',
    ]

# All the news dataset
time_start = '2016-01-01T00:00:00'
time_end = '2020-04-02T00:00:00'

# time_start = '2016-01-01T00:00:00'
# time_end = '2018-12-31T00:00:00'

print(len(feature)-2)

stock_id_list = [
    'MSFT',
    "AMZN",
    "GOOG",
    "TSLA",
    "UBER",
    "JNJ",
    "BABA",
    "INTC",
    "IBM",
    "SONY",
    "ORCL",
    "PYPL",
    "CSCO",
    "ABNB",
    "NVDA",
    "QCOM",
    "CRM",
    "BIDU",
    "ADBE",
    "DELL",
    "HPQ",
    "MU",
    "AMD",
    "AVGO",
    "SAP",
    "TXN",
    "AMAT"
]

company_list = [
    'microsoft',
    "amazon",
    "google",
    "tesla",
    "uber",
    "johnson johnson",
    "alibaba",
    "intel",
    "ibm",
    "sony",
    "oracle",
    "paypal",
    "cisco",
    "airbnb",
    "nvidia",
    "qualcomm",
    "salesforce",
    "baidu",
    "adobe",
    "dell",
    "hp",
    "micron",
    "amd",
    "broadcom",
    "sap",
    "texas instruments",
    "applied material"
]

process_id = 0 #26

stock_id = stock_id_list[process_id]
company_name = company_list[process_id]
config_2 = {'input_path': '/content/drive/MyDrive/Colab Notebooks/'+folder_name+'/data/2_'+company_name+'_for_model.csv',
            'save_path': '/content/drive/MyDrive/Colab Notebooks/'+folder_name+'/model_saved/model.ckpt',
            }

6


## (1) Import

In [2]:
# Google
from google.colab import drive
drive.mount('/content/drive')

# pip installation
!pip install transformers

# Basic
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import math

# Sklearn
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# PyTorch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter

from transformers import XLNetModel, XLNetTokenizer, BertTokenizer, BertModel

# others
from datetime import datetime, timedelta
from tqdm import tqdm
from torchsummary import summary
import ast

Mounted at /content/drive
Collecting transformers
  Downloading transformers-4.33.3-py3-none-any.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m56.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.15.1 (from transformers)
  Downloading huggingface_hub-0.17.3-py3-none-any.whl (295 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m295.0/295.0 kB[0m [31m33.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m119.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m89

In [3]:
def same_seed(seed):
    '''Fixes random number generator seeds for reproducibility.'''
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Set seed for reproducibility
same_seed(config['seed'])


In [4]:
df = pd.read_csv(config_2['input_path'])
df = df.sort_values(by='datetime', ascending=True)
df

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,today_return,today_return_cate,1_day_return,...,datetime,^DJI,^GSPC,^NDX,^IXIC,^SOX,input_ids,attention_mask,section_dummy,publication_dummy
0,48.370305,48.797729,47.542168,48.797729,53778000,0.0,0.0,0.008837,1,0.004562,...,2016-01-04,-0.014739,-0.012531,0.002854,0.001111,0.008979,"[[101, 19102, 1005, 1055, 9088, 21628, 21572, ...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, ..."
1,48.913500,49.323115,48.566217,49.020355,34079700,0.0,0.0,0.002185,1,-0.018165,...,2016-01-05,0.000651,0.001455,-0.006175,-0.005370,-0.012003,"[[101, 16485, 2024, 9881, 2075, 2070, 5621, 68...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, ..."
2,48.370310,48.441549,47.764790,48.129883,39518900,0.0,0.0,-0.004971,0,-0.034783,...,2016-01-06,-0.014475,-0.010663,0.007758,0.004570,-0.011092,"[[101, 2009, 1521, 1055, 2025, 3733, 2108, 105...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,...","[[0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, ..."
3,46.927750,47.631222,46.366753,46.455799,56564900,0.0,0.0,-0.010057,0,0.003067,...,2016-01-07,-0.022161,-0.021271,-0.009984,-0.009917,-0.010667,"[[101, 2003, 5082, 2633, 20727, 2046, 1996, 25...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, ..."
4,46.633883,47.444210,46.437982,46.598267,48754000,0.0,0.0,-0.000764,0,-0.000573,...,2016-01-08,-0.010456,-0.012302,-0.015069,-0.016601,-0.019801,"[[101, 2012, 2560, 2431, 1997, 2866, 5085, 203...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1065,146.929075,149.969321,144.460083,144.944199,57042300,0.0,0.0,-0.013509,0,0.070341,...,2020-03-27,-0.011950,-0.005634,-0.009020,-0.006866,-0.015872,"[[101, 2822, 23707, 12058, 2015, 13463, 21887,...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, ..."
1066,147.597165,155.497935,145.244355,155.139679,63420300,0.0,0.0,0.051102,1,-0.015727,...,2020-03-30,0.029950,0.026444,0.027371,0.025146,0.025789,"[[101, 2235, 2449, 10940, 2071, 2468, 4539, 19...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, ..."
1067,154.336067,159.545156,151.586293,152.699768,77927200,0.0,0.0,-0.010602,0,-0.035508,...,2020-03-31,-0.013115,-0.011512,-0.005377,-0.005163,-0.020954,"[[101, 2005, 1996, 4895, 5498, 10711, 3064, 19...","[[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...","[[0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, ..."
1068,148.139374,152.738472,146.028637,147.277649,57969900,0.0,0.0,-0.005817,0,0.020709,...,2020-04-01,-0.013373,-0.011041,-0.012725,-0.013261,-0.018769,,,,


In [5]:
df.columns

Index(['Open', 'High', 'Low', 'Close', 'Volume', 'Dividends', 'Stock Splits',
       'today_return', 'today_return_cate', '1_day_return', '2_day_return',
       '3_day_return', '4_day_return', '5_day_return', '1_day_return_cate',
       '2_day_return_cate', '3_day_return_cate', '4_day_return_cate',
       '5_day_return_cate', 'Sma', 'Rsi', 'Kd', 'Ema_12', 'Ema_26', 'Macd',
       'datetime', '^DJI', '^GSPC', '^NDX', '^IXIC', '^SOX', 'input_ids',
       'attention_mask', 'section_dummy', 'publication_dummy'],
      dtype='object')

In [6]:
# Only contain selected features
df = df[feature]
df.columns

Index(['today_return', '^DJI', '^GSPC', '^NDX', '^IXIC', '^SOX',
       '1_day_return_cate', 'datetime'],
      dtype='object')

## check data

In [7]:
df.isnull().sum()

today_return         0
^DJI                 0
^GSPC                0
^NDX                 0
^IXIC                0
^SOX                 0
1_day_return_cate    0
datetime             0
dtype: int64

In [8]:
df = df.dropna()
df = df.reset_index(drop=True)
df.isnull().sum()

today_return         0
^DJI                 0
^GSPC                0
^NDX                 0
^IXIC                0
^SOX                 0
1_day_return_cate    0
datetime             0
dtype: int64

## (2) Time Period Selection

In [9]:
# We use index to filter for time periods
df = df[(df['datetime']> time_start) & (df['datetime'] < time_end)]

# Drop datetime after using it
df.drop(columns=['datetime'], inplace=True)
df.shape

(1070, 7)

## (3) Transform str back to list

In [10]:
df.isnull().sum()

today_return         0
^DJI                 0
^GSPC                0
^NDX                 0
^IXIC                0
^SOX                 0
1_day_return_cate    0
dtype: int64

In [11]:
# # 将字符串转换回列表的函数
# def string_to_list(s):
#     return ast.literal_eval(s)

# # 将列中的字符串转换回列表
# df['input_ids'] = df['input_ids'].apply(string_to_list)
# df['attention_mask'] = df['attention_mask'].apply(string_to_list)
# df['section_dummy'] = df['section_dummy'].apply(string_to_list)
# df['publication_dummy'] = df['publication_dummy'].apply(string_to_list)

## (4) Train_test_split

In [12]:
# 1. Set up X, y
to_remove_list = ['datetime', '1_day_return_cate']

# Filter out values in to_remove_list
filtered_list = [x for x in feature if x not in to_remove_list]

X = df[filtered_list]
y = df['1_day_return_cate']

In [13]:
# print(X['section_dummy'])
# print(type(X['section_dummy'][0]))
# temp_array = np.array(X['section_dummy'][0])
# print(temp_array.dtype)

# def int_to_float(int_list):
#   float_list = np.array(int_list, dtype=np.float32)
#   return float_list
# X['section_dummy'] = X['section_dummy'].apply(int_to_float)
# X['publication_dummy'] = X['publication_dummy'].apply(int_to_float)\
# temp_array = np.array(X['section_dummy'][0])
# print(temp_array.dtype)


In [14]:
# def int_to_float(int_list):
#   float_list = np.array(int_list, dtype=np.float32)
#   return float_list
# X['section_dummy'] = X['section_dummy'].apply(int_to_float)
# X['publication_dummy'] = X['publication_dummy'].apply(int_to_float)

In [15]:
# Check X, y shape
print('X:', X.shape)
print('y:', y.shape)

X: (1070, 6)
y: (1070,)


In [16]:
# 2. train_test_split
# val dataset for final examination

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=config['test_ratio'], random_state=config['seed'], shuffle=config['shuffle'])
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=config['valid_ratio'], random_state=config['seed'], shuffle=config['shuffle'])
X_train


Unnamed: 0,today_return,^DJI,^GSPC,^NDX,^IXIC,^SOX
0,0.008837,-0.014739,-0.012531,0.002854,0.001111,0.008979
1,0.002185,0.000651,0.001455,-0.006175,-0.005370,-0.012003
2,-0.004971,-0.014475,-0.010663,0.007758,0.004570,-0.011092
3,-0.010057,-0.022161,-0.021271,-0.009984,-0.009917,-0.010667
4,-0.000764,-0.010456,-0.012302,-0.015069,-0.016601,-0.019801
...,...,...,...,...,...,...
679,0.007046,0.002379,0.002530,0.003512,0.001722,-0.001817
680,0.000088,-0.000569,-0.000482,-0.003944,-0.002008,0.005629
681,-0.013634,-0.003424,-0.005176,-0.011679,-0.012064,-0.009262
682,0.009092,0.006548,0.004694,0.007195,0.006648,0.003821


## (4.5) Resampling

In [17]:
# from sklearn.utils import resample

# # Train set
# train_df = pd.concat([X_train, y_train], axis=1)

# def undersample_majority_class(df, class_column):
#     minority_class = df[class_column].value_counts().idxmin()
#     majority_class = df[class_column].value_counts().idxmax()

#     minority_samples = df[df[class_column] == minority_class]
#     majority_samples = df[df[class_column] == majority_class]

#     majority_samples_undersampled = resample(majority_samples, replace=False, n_samples=len(minority_samples))
#     undersampled_df = pd.concat([minority_samples, majorit y_samples_undersampled])

#     return undersampled_df

# target_name = 't+1_day_trend_cate'
# # 下採樣多數類別
# undersampled_train_df = undersample_majority_class(train_df, target_name)

# # 分割回 X_train 和 y_train
# X_train = undersampled_train_df.drop(target_name, axis=1)
# y_train = undersampled_train_df[target_name]

# # 打印下採樣後的類別數量
# print("Undersampled Train Class Counts:\n", y_train.value_counts())

# # Val Set
# val_df = pd.concat([X_val, y_val], axis=1)
# undersampled_val_df = undersample_majority_class(val_df, target_name)
# X_val = undersampled_val_df.drop(target_name, axis=1)
# y_val = undersampled_val_df[target_name]

# # Test Set
# test_df = pd.concat([X_test, y_test], axis=1)
# undersampled_test_df = undersample_majority_class(test_df, target_name)
# X_test = undersampled_test_df.drop(target_name, axis=1)
# y_test = undersampled_test_df[target_name]

## (5) Scaler

In [18]:
scale_feature = [
    # X_2
    # 1. tech indicator
    # 'Open',
    # 'High',
    # 'Low',
    # 'Close',
    # 'Volume',
    # 'Dividends',
    # 'Stock Splits',
    # 'Today_trend',
    # 'Today_trend_cate',
    # 'Sma',
    # 'Rsi',
    # 'Kd',
    # 'Ema_12',
    # 'Ema_26',
    # 'Macd',

    # 2. market index
    # '^DJI',
    '^GSPC',
    # '^NDX',
    # '^IXIC',
    # '^SOX',
    # 'datetime'
    ]

def CustomScaler(X_train, X_val, X_test):
  scaler = MinMaxScaler()
  for i in scale_feature:

    # 對特定欄位進行標準化
    X_train_scaled = scaler.fit_transform(X_train[[i]])
    X_val_scaled = scaler.transform(X_val[[i]])
    X_test_scaled = scaler.transform(X_test[[i]])

    # 將標準化後的值重新賦值給 DataFrame
    X_train[i] = X_train_scaled
    X_val[i] = X_val_scaled
    X_test[i] = X_test_scaled

  return X_train, X_val, X_test

X_train, X_val, X_test = CustomScaler(X_train, X_val, X_test)

X_train

Unnamed: 0,today_return,^DJI,^GSPC,^NDX,^IXIC,^SOX
0,0.008837,-0.014739,0.377236,0.002854,0.001111,0.008979
1,0.002185,0.000651,0.578553,-0.006175,-0.005370,-0.012003
2,-0.004971,-0.014475,0.404126,0.007758,0.004570,-0.011092
3,-0.010057,-0.022161,0.251419,-0.009984,-0.009917,-0.010667
4,-0.000764,-0.010456,0.380522,-0.015069,-0.016601,-0.019801
...,...,...,...,...,...,...
679,0.007046,0.002379,0.594032,0.003512,0.001722,-0.001817
680,0.000088,-0.000569,0.550677,-0.003944,-0.002008,0.005629
681,-0.013634,-0.003424,0.483104,-0.011679,-0.012064,-0.009262
682,0.009092,0.006548,0.625183,0.007195,0.006648,0.003821


## (6) Check number

In [19]:
def calculate_class_stats(y):
    class_counts = y.value_counts()
    total_samples = len(y)
    class_ratios = class_counts / total_samples
    return class_counts, class_ratios

# 計算類別數量和比例
train_class_counts, train_class_ratios = calculate_class_stats(y_train)
val_class_counts, val_class_ratios = calculate_class_stats(y_val)
test_class_counts, test_class_ratios = calculate_class_stats(y_test)

# 創建包含數量和比例的 DataFrame
class_stats = pd.DataFrame({
    'Train Count': train_class_counts,
    'Train Ratio': train_class_ratios,
    'Validation Count': val_class_counts,
    'Validation Ratio': val_class_ratios,
    'Test Count': test_class_counts,
    'Test Ratio': test_class_ratios
})

# 打印 DataFrame
print(class_stats)


   Train Count  Train Ratio  Validation Count  Validation Ratio  Test Count  \
1          382      0.55848                95          0.552326         122   
0          302      0.44152                77          0.447674          92   

   Test Ratio  
1    0.570093  
0    0.429907  


In [20]:
# Time period
print('Time Period')
print('From:', time_start)
print('To:', time_end, '\n')

# Sample size
print('Sample size:', X.shape[0])
print('Feature:', X.columns, '\n')
print('Target:', y.name, '\n')
print('Train: Val: Test = 0.81: 0.1: 0.09=', X_train.shape[0], X_test.shape[0], X_val.shape[0])

Time Period
From: 2016-01-01T00:00:00
To: 2020-04-02T00:00:00 

Sample size: 1070
Feature: Index(['today_return', '^DJI', '^GSPC', '^NDX', '^IXIC', '^SOX'], dtype='object') 

Target: 1_day_return_cate 

Train: Val: Test = 0.81: 0.1: 0.09= 684 214 172


# Model

## (1) Dataset & Dataloader

In [21]:
# Dataset
# X_1 =['input_ids', 'attention_mask', 'section_dummy', 'publication_dummy']


class CustomDataset(Dataset):
    def __init__(self, X, y, config):
        # X_2
        self.X_2 = torch.tensor(X.values, dtype=torch.float)

        # y
        self.y = torch.tensor(y.values, dtype=torch.float)

        # other setting
        self.len = X.shape[0]
        self.seq_length = config['seq_length']

    def __getitem__(self,idx):
        # X_2
        X_2 = self.X_2[idx : idx + self.seq_length]

        # 3. y
        y = self.y[idx + self.seq_length - 1]

        return X_2, y

    def __len__(self):
        return self.len - self.seq_length

In [22]:
# DataLoader
train_dataset = CustomDataset(X_train, y_train, config)
val_dataset = CustomDataset(X_val, y_val, config)
test_dataset = CustomDataset(X_test, y_test, config)

train_loader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=config['shuffle'], pin_memory=True)
val_loader = DataLoader(val_dataset, batch_size=config['batch_size'], shuffle=config['shuffle'], pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=config['batch_size'], shuffle=config['shuffle'], pin_memory=True)

In [23]:
# Check loader output
for batch in train_loader:
    X_2, y = batch

    # 打印批次数据的形状，以确保它们符合预期
    # print("Input IDs shape:", input_ids.shape)
    # print("Attention Mask shape:", attention_mask.shape)
    # print("Section shape:", section.shape)
    # print("Publication shape:", publication.shape)
    print("X_2 shape:", X_2.shape)
    print("Labels shape:", y.shape)

    # print("Input IDs:", input_ids)
    # print("Attention Mask:", attention_mask)
    # print("Section:", section)
    # print("Publication:", publication)
    print("X_2:", X_2)
    print("Labels:", y)

    break


X_2 shape: torch.Size([4, 5, 6])
Labels shape: torch.Size([4])
X_2: tensor([[[ 8.8365e-03, -1.4739e-02,  3.7724e-01,  2.8539e-03,  1.1107e-03,
           8.9786e-03],
         [ 2.1846e-03,  6.5083e-04,  5.7855e-01, -6.1746e-03, -5.3702e-03,
          -1.2003e-02],
         [-4.9706e-03, -1.4475e-02,  4.0413e-01,  7.7579e-03,  4.5702e-03,
          -1.1092e-02],
         [-1.0057e-02, -2.2161e-02,  2.5142e-01, -9.9836e-03, -9.9168e-03,
          -1.0667e-02],
         [-7.6374e-04, -1.0456e-02,  3.8052e-01, -1.5069e-02, -1.6601e-02,
          -1.9801e-02]],

        [[ 2.1846e-03,  6.5083e-04,  5.7855e-01, -6.1746e-03, -5.3702e-03,
          -1.2003e-02],
         [-4.9706e-03, -1.4475e-02,  4.0413e-01,  7.7579e-03,  4.5702e-03,
          -1.1092e-02],
         [-1.0057e-02, -2.2161e-02,  2.5142e-01, -9.9836e-03, -9.9168e-03,
          -1.0667e-02],
         [-7.6374e-04, -1.0456e-02,  3.8052e-01, -1.5069e-02, -1.6601e-02,
          -1.9801e-02],
         [-3.9992e-03,  2.4366e-03,  5.

## (2) Model Architecture

In [24]:
class MyModel(nn.Module):
    def __init__(self, config):
        super(MyModel, self).__init__()
        self.seq_length = config['seq_length']
        self.lstm_1 = nn.LSTM(6, 32, num_layers=1, batch_first=True, bidirectional=False)
        self.sequential = nn.Sequential(
            nn.Linear(32, 1)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, X_2):
        h_tech_list = []
        # for i in range(self.seq_length):
        #   out = X_2[:, i, :]
        #   h_tech_list.append(out)
        # h_tech = torch.stack(h_tech_list, dim=1)  # 5 [64, 500] tensor to 1 [64, 5, 500] tensor
        h_tech = X_2


        # 3. LSTM
        out, _ = self.lstm_1(h_tech)
        out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term
        out = self.sequential(out)
        final_out = self.sigmoid(out)

        return final_out


## (3) Trainer

### 1 BCELoss

In [25]:
def trainer(model, train_loader, val_loader, config, device):

    criterion = config['criterion']
    optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])

    writer = SummaryWriter()  # Writer of tensoboard.
    n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

    # 1. Training
    for epoch in range(n_epochs):
      model.train()  # Set the model to training mode
      loss_record = []

      train_pbar = tqdm(train_loader, position=0, leave=True)  # tqdm is a package to visualize your training progress.
      for X_2, y in train_loader:
        optimizer.zero_grad()  # Set gradient to zero

        # Forward pass
        X_2, y = X_2.to(device), y.to(device)
        pred = model(X_2)

        y = y.reshape(-1, 1)
        loss = criterion(pred, y)
        loss.backward()                     # Compute gradient(backpropagation).
        optimizer.step()                    # Update parameters.
        step += 1
        loss_record.append(loss.detach().item())

        # Display current epoch number and loss on tqdm progress bar.
        train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
        train_pbar.set_postfix({'loss': loss.detach().item()})

      mean_train_loss = sum(loss_record)/len(loss_record)
      writer.add_scalar('Loss/train', mean_train_loss, step)

      # 2. Evaluation
      model.eval() # Set your model to evaluation mode.
      loss_record = []
      for X_2, y in val_loader:
          X_2, y = X_2.to(device), y.to(device)
          with torch.no_grad():
              pred = model(X_2)
              y = y.reshape(-1, 1)
              loss = criterion(pred, y)
          loss_record.append(loss.item())

      # Mean
      mean_valid_loss = sum(loss_record)/len(loss_record)
      print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
      writer.add_scalar('Loss/valid', mean_valid_loss, step)

      # 3. Judge of saving model
      if mean_valid_loss < best_loss:
          best_loss = mean_valid_loss
          torch.save(model.state_dict(), config_2['save_path']) # Save your best model
          print('Saving model with loss {:.3f}...'.format(best_loss))
          early_stop_count = 0
      else:
          early_stop_count += 1

      if early_stop_count >= config['early_stop']:
          print('\nModel is not improving, so we halt the training session.')
          return


### 2 Balanced Cross Entropy

In [26]:
print(train_class_counts)

print(train_class_counts[0])

1    382
0    302
Name: 1_day_return_cate, dtype: int64
302


In [27]:
X_train.shape[0]

684

In [28]:
# # # 2. training set-based

# # # smooth factor
# # smooth = 1e-15

# # total_samples = X_train.shape[0]
# # pos_weight = (total_samples) / (train_class_counts[1]*2 + smooth)
# # neg_weight = (total_samples) / (train_class_counts[0]*2 + smooth)

# # Beta is usealess, Alpha is for adjusting Balance power.
# beta = 2
# alpha = 1.28 # 1.25 < alpha < 1.27
# class BalancedBCELoss(nn.Module):
#     def __init__(self, beta):
#         super(BalancedBCELoss, self).__init__()

#     def forward(self, input, target):
#         # Number counts
#         num_pos = torch.sum(target == 1).float()
#         num_neg = torch.sum(target == 0).float()

#         # smooth factor
#         smooth = 1e-15

#         # Weight Calculation
#         # 1. Batch-based
#         total_samples = num_pos + num_neg
#         pos_weight = (total_samples) / (num_pos*beta + smooth) * alpha
#         neg_weight = (total_samples) / (num_neg*beta + smooth) * (1/alpha)

#         pos_weight = pos_weight

#         # 根据目标张量的值创建判断式
#         weights = torch.where(target == 1, torch.tensor(pos_weight), torch.tensor(neg_weight))

#         # 使用nn.BCEWithLogitsLoss，并应用平衡权重
#         criterion = nn.BCEWithLogitsLoss(weight=weights)

#         return criterion(input, target)

In [29]:
# def trainer(model, train_loader, val_loader, config, device):

#     criterion = BalancedBCELoss(beta=beta)
#     optimizer = torch.optim.Adam(model.parameters(), lr=config['learning_rate'])

#     writer = SummaryWriter()  # Writer of tensoboard.
#     n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

#     # 1. Training
#     for epoch in range(n_epochs):
#       model.train()  # Set the model to training mode
#       loss_record = []

#       train_pbar = tqdm(train_loader, position=0, leave=True)  # tqdm is a package to visualize your training progress.
#       for input_ids, attention_mask, section, publication, X_2, y in train_loader:
#         optimizer.zero_grad()  # Set gradient to zero

#         # Forward pass
#         input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#         pred = model(input_ids, attention_mask, section, publication, X_2)

#         y = y.reshape(-1, 1)
#         loss = criterion(pred, y)
#         loss.backward()                     # Compute gradient(backpropagation).
#         optimizer.step()                    # Update parameters.
#         step += 1
#         loss_record.append(loss.detach().item())

#         # Display current epoch number and loss on tqdm progress bar.
#         train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
#         train_pbar.set_postfix({'loss': loss.detach().item()})

#       mean_train_loss = sum(loss_record)/len(loss_record)
#       writer.add_scalar('Loss/train', mean_train_loss, step)

#       # 2. Evaluation
#       model.eval() # Set your model to evaluation mode.
#       loss_record = []
#       for input_ids, attention_mask, section, publication, X_2, y in val_loader:
#           input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#           with torch.no_grad():
#               pred = model(input_ids, attention_mask, section, publication, X_2)
#               y = y.reshape(-1, 1)
#               loss = criterion(pred, y)
#           loss_record.append(loss.item())

#       # Mean
#       mean_valid_loss = sum(loss_record)/len(loss_record)
#       print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
#       writer.add_scalar('Loss/valid', mean_valid_loss, step)

#       # 3. Judge of saving model
#       if mean_valid_loss < best_loss:
#           best_loss = mean_valid_loss
#           torch.save(model.state_dict(), config_2['save_path']) # Save your best model
#           print('Saving model with loss {:.3f}...'.format(best_loss))
#           early_stop_count = 0
#       else:
#           early_stop_count += 1

#       if early_stop_count >= config['early_stop']:
#           print('\nModel is not improving, so we halt the training session.')
#           return


### 3 Training by parts (分段訓練)

In [30]:
# # 嘗試分段訓練

# def trainer1(model, train_loader, val_loader, config, device):

#     criterion = config['criterion']

#     # Stage 1: Train base_model and fc1
#     optimizer_stage1 = torch.optim.Adam([
#         {'params': model.base_model.parameters(), 'lr': config['learning_rate']},
#         {'params': model.fc1.parameters(), 'lr': config['learning_rate']}
#     ])

#     # Writer of tensoboard.
#     writer = SummaryWriter()

#     # Not sure what's these for
#     n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

#     for epoch in range(n_epochs):
#       model.train()  # Set the model to training mode
#       loss_record = []

#       # tqdm is a package to visualize your training progress.
#       train_pbar = tqdm(train_loader, position=0, leave=True)
#       for input_ids_tensor, attention_mask_tensor, X_2, y in train_loader:
#         # Set gradient to zero
#         optimizer_stage1.zero_grad()

#         # Forward pass
#         input_ids_tensor, attention_mask_tensor, X_2, y = input_ids_tensor.to(device), attention_mask_tensor.to(device), X_2.to(device), y.to(device)
#         pred = model(input_ids_tensor, attention_mask_tensor, X_2)

#         y = y.reshape(-1, 1)

#         loss = criterion(pred, y)
#         loss.backward()                     # Compute gradient(backpropagation).
#         optimizer_stage1.step()                    # Update parameters.
#         step += 1
#         loss_record.append(loss.detach().item())

#         # Display current epoch number and loss on tqdm progress bar.
#         train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
#         train_pbar.set_postfix({'loss': loss.detach().item()})

#       mean_train_loss = sum(loss_record)/len(loss_record)
#       writer.add_scalar('Loss/train', mean_train_loss, step)

#       model.eval() # Set your model to evaluation mode.
#       loss_record = []
#       for input_ids_tensor, attention_mask_tensor, X_2, y in val_loader:
#           input_ids_tensor, attention_mask_tensor, X_2, y = input_ids_tensor.to(device), attention_mask_tensor.to(device), X_2.to(device), y.to(device)
#           with torch.no_grad():
#               pred = model(input_ids_tensor, attention_mask_tensor, X_2)
#               y = y.reshape(-1, 1)
#               loss = criterion(pred, y)

#           loss_record.append(loss.item())

#       mean_valid_loss = sum(loss_record)/len(loss_record)
#       print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
#       writer.add_scalar('Loss/valid', mean_valid_loss, step)

#       if mean_valid_loss < best_loss:
#           best_loss = mean_valid_loss
#           torch.save(model.state_dict(), config_2['save_path']) # Save your best model
#           print('Saving model with loss {:.3f}...'.format(best_loss))
#           early_stop_count = 0
#       else:
#           early_stop_count += 1

#       if early_stop_count >= config['early_stop']:
#           print('\nModel is not improving, so we halt the training session.')
#           return


# def trainer2(model, train_loader, val_loader, config, device):

#     criterion = config['criterion']

#     # Stage 2: Train lstm1 and fc2
#     optimizer_stage2 = torch.optim.Adam([
#         {'params': model.lstm1.parameters(), 'lr': config['learning_rate']},
#         {'params': model.fc2.parameters(), 'lr': config['learning_rate']}
#     ])

#     # Writer of tensoboard.
#     writer = SummaryWriter()

#     # Not sure what's these for
#     n_epochs, best_loss, step, early_stop_count = config['n_epochs'], math.inf, 0, 0

#     for epoch in range(n_epochs):
#       model.train()  # Set the model to training mode
#       loss_record = []

#       # tqdm is a package to visualize your training progress.
#       train_pbar = tqdm(train_loader, position=0, leave=True)
#       for input_ids_tensor, attention_mask_tensor, X_2, y in train_loader:
#         # Set gradient to zero
#         optimizer_stage2.zero_grad()

#         # Forward pass
#         input_ids_tensor, attention_mask_tensor, X_2, y = input_ids_tensor.to(device), attention_mask_tensor.to(device), X_2.to(device), y.to(device)
#         pred = model(input_ids_tensor, attention_mask_tensor, X_2)

#         y = y.reshape(-1, 1)

#         loss = criterion(pred, y)
#         loss.backward()                     # Compute gradient(backpropagation).
#         optimizer_stage2.step()                    # Update parameters.
#         step += 1
#         loss_record.append(loss.detach().item())

#         # Display current epoch number and loss on tqdm progress bar.
#         train_pbar.set_description(f'Epoch [{epoch+1}/{n_epochs}]')
#         train_pbar.set_postfix({'loss': loss.detach().item()})

#       mean_train_loss = sum(loss_record)/len(loss_record)
#       writer.add_scalar('Loss/train', mean_train_loss, step)

#       model.eval() # Set your model to evaluation mode.
#       loss_record = []
#       for input_ids_tensor, attention_mask_tensor, X_2, y in val_loader:
#           input_ids_tensor, attention_mask_tensor, X_2, y = input_ids_tensor.to(device), attention_mask_tensor.to(device), X_2.to(device), y.to(device)
#           with torch.no_grad():
#               pred = model(input_ids_tensor, attention_mask_tensor, X_2)
#               y = y.reshape(-1, 1)
#               loss = criterion(pred, y)

#           loss_record.append(loss.item())

#       mean_valid_loss = sum(loss_record)/len(loss_record)
#       print(f'Epoch [{epoch+1}/{n_epochs}]: Train loss: {mean_train_loss:.4f}, Valid loss: {mean_valid_loss:.4f}')
#       writer.add_scalar('Loss/valid', mean_valid_loss, step)

#       if mean_valid_loss < best_loss:
#           best_loss = mean_valid_loss
#           torch.save(model.state_dict(), config_2['save_path']) # Save your best model
#           print('Saving model with loss {:.3f}...'.format(best_loss))
#           early_stop_count = 0
#       else:
#           early_stop_count += 1

#       if early_stop_count >= config['early_stop']:
#           print('\nModel is not improving, so we halt the training session.')
#           return


## (4) Load Model

1. Load pretrain model

In [31]:
# 載入預訓練模型
# base_model = BertModel.from_pretrained('bert-base-uncased')
# base_model = PreModel(base_model)
# base_model.load_state_dict(torch.load(config_2['pretrained_model_path']))

# Parameter
# section_length = len(df['section_dummy'][0])
# publication_length = len(df['publication_dummy'][0])
# X_2_length = len(feature) - 6

2-1. Initial Model

In [32]:
model = MyModel(config)
model.to(device)

MyModel(
  (lstm_1): LSTM(6, 32, batch_first=True)
  (sequential): Sequential(
    (0): Linear(in_features=32, out_features=1, bias=True)
  )
  (sigmoid): Sigmoid()
)

2-2. Contunue training from model.ckpt

In [33]:
# model = MyModel(base_model, config, section_length, publication_length, X_2_length)
# model.load_state_dict(torch.load(config_2['continue_model_path']))
# model.to(device)

## (5) Require_grad

In [34]:

# # Freeze all layers
# for param in model.pretrained_model.parameters():
#   param.requires_grad = False

# for param in model.pretrained_model.base_model.encoder.layer[11].parameters():
#     param.requires_grad = True

# for param in model.pretrained_model.fc1.parameters():
#     param.requires_grad = True

# # for param in model.fc1.parameters():
# #     param.requires_grad = True

# # Check requires_grad status
# for name, param in model.named_parameters():
#     print(name, param.requires_grad)

In [35]:
# Freeze all layers
# for param in model.base_model.parameters():
#   param.requires_grad = False

# Unfreeze part of layers
# for param in model.base_model.encoder.layer[6].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[7].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[8].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[9].parameters():
#     param.requires_grad = True

# for param in model.base_model.encoder.layer[10].parameters():
#     param.requires_grad = True

# for param in model.base_model.base_model.encoder.layer[11].parameters():
#     param.requires_grad = True

# for param in model.base_model.fc1.parameters():
#     param.requires_grad = True

# for param in model.base_model.fc2.parameters():
#     param.requires_grad = True

# for param in model.base_model.fc3.parameters():
    # param.requires_grad = True

# Check requires_grad status
for name, param in model.named_parameters():
    print(name, param.requires_grad)

lstm_1.weight_ih_l0 True
lstm_1.weight_hh_l0 True
lstm_1.bias_ih_l0 True
lstm_1.bias_hh_l0 True
sequential.0.weight True
sequential.0.bias True


# Training

In [None]:
# 全部訓練
trainer(model, train_loader, val_loader, config, device)

# # 分段訓練
# trainer2(model, train_loader, val_loader, config, device)
# trainer1(model, train_loader, val_loader, config, device)

Epoch [1/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.742]

Epoch [1/3000]: Train loss: 0.6892, Valid loss: 0.6885
Saving model with loss 0.688...


Epoch [1/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.742]
Epoch [2/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.747]

Epoch [2/3000]: Train loss: 0.6874, Valid loss: 0.6888


Epoch [2/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.747]
Epoch [3/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.746]

Epoch [3/3000]: Train loss: 0.6873, Valid loss: 0.6887


Epoch [3/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.746]
Epoch [4/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.745]

Epoch [4/3000]: Train loss: 0.6872, Valid loss: 0.6886


Epoch [4/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.745]
Epoch [5/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.744]

Epoch [5/3000]: Train loss: 0.6871, Valid loss: 0.6885
Saving model with loss 0.688...


Epoch [5/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.744]
Epoch [6/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.744]

Epoch [6/3000]: Train loss: 0.6870, Valid loss: 0.6884
Saving model with loss 0.688...


Epoch [6/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.744]
Epoch [7/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.743]

Epoch [7/3000]: Train loss: 0.6868, Valid loss: 0.6883
Saving model with loss 0.688...


Epoch [7/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.743]
Epoch [8/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.742]

Epoch [8/3000]: Train loss: 0.6867, Valid loss: 0.6881
Saving model with loss 0.688...


Epoch [8/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.742]
Epoch [9/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.741]

Epoch [9/3000]: Train loss: 0.6866, Valid loss: 0.6880
Saving model with loss 0.688...


Epoch [9/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.741]
Epoch [10/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.74] 

Epoch [10/3000]: Train loss: 0.6864, Valid loss: 0.6878
Saving model with loss 0.688...


Epoch [10/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.74]
Epoch [11/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.739]

Epoch [11/3000]: Train loss: 0.6862, Valid loss: 0.6875
Saving model with loss 0.687...


Epoch [11/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.739]
Epoch [12/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.737]

Epoch [12/3000]: Train loss: 0.6859, Valid loss: 0.6871
Saving model with loss 0.687...


Epoch [12/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.737]
Epoch [13/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.735]

Epoch [13/3000]: Train loss: 0.6855, Valid loss: 0.6865
Saving model with loss 0.686...


Epoch [13/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.735]
Epoch [14/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.732]

Epoch [14/3000]: Train loss: 0.6850, Valid loss: 0.6857
Saving model with loss 0.686...


Epoch [14/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.732]
Epoch [15/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.73] 

Epoch [15/3000]: Train loss: 0.6843, Valid loss: 0.6847
Saving model with loss 0.685...


Epoch [15/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.73]
Epoch [16/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.727]

Epoch [16/3000]: Train loss: 0.6836, Valid loss: 0.6840
Saving model with loss 0.684...


Epoch [16/3000]:   0%|          | 0/170 [00:03<?, ?it/s, loss=0.727]
Epoch [17/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.724]

Epoch [17/3000]: Train loss: 0.6827, Valid loss: 0.6836
Saving model with loss 0.684...


Epoch [17/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.724]
Epoch [18/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.72] 

Epoch [18/3000]: Train loss: 0.6819, Valid loss: 0.6838


Epoch [18/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.72]
Epoch [19/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.715]

Epoch [19/3000]: Train loss: 0.6811, Valid loss: 0.6843


Epoch [19/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.715]
Epoch [20/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.71] 

Epoch [20/3000]: Train loss: 0.6803, Valid loss: 0.6853


Epoch [20/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.71]
Epoch [21/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.705]

Epoch [21/3000]: Train loss: 0.6795, Valid loss: 0.6866


Epoch [21/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.705]
Epoch [22/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.699]

Epoch [22/3000]: Train loss: 0.6788, Valid loss: 0.6882


Epoch [22/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.699]
Epoch [23/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.693]

Epoch [23/3000]: Train loss: 0.6780, Valid loss: 0.6900


Epoch [23/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.693]
Epoch [24/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.688]

Epoch [24/3000]: Train loss: 0.6773, Valid loss: 0.6920


Epoch [24/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.688]
Epoch [25/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.684]

Epoch [25/3000]: Train loss: 0.6768, Valid loss: 0.6939


Epoch [25/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.684]
Epoch [26/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.68] 

Epoch [26/3000]: Train loss: 0.6763, Valid loss: 0.6957


Epoch [26/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.68]
Epoch [27/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.677]

Epoch [27/3000]: Train loss: 0.6759, Valid loss: 0.6973


Epoch [27/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.677]
Epoch [28/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.675]

Epoch [28/3000]: Train loss: 0.6755, Valid loss: 0.6987


Epoch [28/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.675]
Epoch [29/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.673]

Epoch [29/3000]: Train loss: 0.6752, Valid loss: 0.6999


Epoch [29/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.673]
Epoch [30/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.671]

Epoch [30/3000]: Train loss: 0.6749, Valid loss: 0.7011


Epoch [30/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.671]
Epoch [31/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.67] 

Epoch [31/3000]: Train loss: 0.6746, Valid loss: 0.7021


Epoch [31/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.67]
Epoch [32/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.669]

Epoch [32/3000]: Train loss: 0.6744, Valid loss: 0.7031


Epoch [32/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.669]
Epoch [33/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.668]

Epoch [33/3000]: Train loss: 0.6741, Valid loss: 0.7040


Epoch [33/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.668]
Epoch [34/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.668]

Epoch [34/3000]: Train loss: 0.6738, Valid loss: 0.7050


Epoch [34/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.668]
Epoch [35/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]

Epoch [35/3000]: Train loss: 0.6736, Valid loss: 0.7059


Epoch [35/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]
Epoch [36/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.667]

Epoch [36/3000]: Train loss: 0.6733, Valid loss: 0.7069


Epoch [36/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]
Epoch [37/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.667]

Epoch [37/3000]: Train loss: 0.6730, Valid loss: 0.7080


Epoch [37/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]
Epoch [38/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]

Epoch [38/3000]: Train loss: 0.6727, Valid loss: 0.7091


Epoch [38/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]
Epoch [39/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]

Epoch [39/3000]: Train loss: 0.6724, Valid loss: 0.7104


Epoch [39/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]
Epoch [40/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.668]

Epoch [40/3000]: Train loss: 0.6720, Valid loss: 0.7118


Epoch [40/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.668]
Epoch [41/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.669]

Epoch [41/3000]: Train loss: 0.6717, Valid loss: 0.7135


Epoch [41/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.669]
Epoch [42/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.67] 

Epoch [42/3000]: Train loss: 0.6712, Valid loss: 0.7154


Epoch [42/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.67]
Epoch [43/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.673]

Epoch [43/3000]: Train loss: 0.6707, Valid loss: 0.7177


Epoch [43/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.673]
Epoch [44/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.675]

Epoch [44/3000]: Train loss: 0.6702, Valid loss: 0.7204


Epoch [44/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.675]
Epoch [45/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.679]

Epoch [45/3000]: Train loss: 0.6696, Valid loss: 0.7236


Epoch [45/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.679]
Epoch [46/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.682]

Epoch [46/3000]: Train loss: 0.6689, Valid loss: 0.7273


Epoch [46/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.682]
Epoch [47/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.684]

Epoch [47/3000]: Train loss: 0.6682, Valid loss: 0.7316


Epoch [47/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.684]
Epoch [48/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.686]

Epoch [48/3000]: Train loss: 0.6675, Valid loss: 0.7363


Epoch [48/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.686]
Epoch [49/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.687]

Epoch [49/3000]: Train loss: 0.6667, Valid loss: 0.7415


Epoch [49/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.687]
Epoch [50/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.687]

Epoch [50/3000]: Train loss: 0.6660, Valid loss: 0.7467


Epoch [50/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.687]
Epoch [51/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.685]

Epoch [51/3000]: Train loss: 0.6652, Valid loss: 0.7517


Epoch [51/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.685]
Epoch [52/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.682]

Epoch [52/3000]: Train loss: 0.6644, Valid loss: 0.7561


Epoch [52/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.682]
Epoch [53/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.679]

Epoch [53/3000]: Train loss: 0.6636, Valid loss: 0.7598


Epoch [53/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.679]
Epoch [54/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.674]

Epoch [54/3000]: Train loss: 0.6629, Valid loss: 0.7628


Epoch [54/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.674]
Epoch [55/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.67] 

Epoch [55/3000]: Train loss: 0.6621, Valid loss: 0.7650


Epoch [55/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.67]
Epoch [56/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.667]

Epoch [56/3000]: Train loss: 0.6615, Valid loss: 0.7665


Epoch [56/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.667]
Epoch [57/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.664]

Epoch [57/3000]: Train loss: 0.6608, Valid loss: 0.7676


Epoch [57/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.664]
Epoch [58/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.663]

Epoch [58/3000]: Train loss: 0.6602, Valid loss: 0.7683


Epoch [58/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.663]
Epoch [59/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]

Epoch [59/3000]: Train loss: 0.6596, Valid loss: 0.7688


Epoch [59/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]
Epoch [60/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]

Epoch [60/3000]: Train loss: 0.6591, Valid loss: 0.7690


Epoch [60/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]
Epoch [61/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]

Epoch [61/3000]: Train loss: 0.6586, Valid loss: 0.7692


Epoch [61/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]
Epoch [62/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]

Epoch [62/3000]: Train loss: 0.6583, Valid loss: 0.7693


Epoch [62/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]
Epoch [63/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]

Epoch [63/3000]: Train loss: 0.6579, Valid loss: 0.7694


Epoch [63/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]
Epoch [64/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]

Epoch [64/3000]: Train loss: 0.6576, Valid loss: 0.7696


Epoch [64/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.662]
Epoch [65/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.661]

Epoch [65/3000]: Train loss: 0.6574, Valid loss: 0.7700


Epoch [65/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.661]
Epoch [66/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.66] 

Epoch [66/3000]: Train loss: 0.6572, Valid loss: 0.7705


Epoch [66/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.66]
Epoch [67/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.658]

Epoch [67/3000]: Train loss: 0.6570, Valid loss: 0.7712


Epoch [67/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.658]
Epoch [68/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.656]

Epoch [68/3000]: Train loss: 0.6568, Valid loss: 0.7722


Epoch [68/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.656]
Epoch [69/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.654]

Epoch [69/3000]: Train loss: 0.6565, Valid loss: 0.7733


Epoch [69/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.654]
Epoch [70/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.652]

Epoch [70/3000]: Train loss: 0.6563, Valid loss: 0.7745


Epoch [70/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.652]
Epoch [71/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.65] 

Epoch [71/3000]: Train loss: 0.6560, Valid loss: 0.7759


Epoch [71/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.65]
Epoch [72/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.648]

Epoch [72/3000]: Train loss: 0.6558, Valid loss: 0.7773


Epoch [72/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.648]
Epoch [73/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.647]

Epoch [73/3000]: Train loss: 0.6555, Valid loss: 0.7788


Epoch [73/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.647]
Epoch [74/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.645]

Epoch [74/3000]: Train loss: 0.6552, Valid loss: 0.7803


Epoch [74/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.645]
Epoch [75/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.644]

Epoch [75/3000]: Train loss: 0.6549, Valid loss: 0.7818


Epoch [75/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.644]
Epoch [76/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.643]

Epoch [76/3000]: Train loss: 0.6546, Valid loss: 0.7834


Epoch [76/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.643]
Epoch [77/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.642]

Epoch [77/3000]: Train loss: 0.6543, Valid loss: 0.7850


Epoch [77/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.642]
Epoch [78/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.64] 

Epoch [78/3000]: Train loss: 0.6539, Valid loss: 0.7867


Epoch [78/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.64]
Epoch [79/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.639]

Epoch [79/3000]: Train loss: 0.6536, Valid loss: 0.7884


Epoch [79/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.639]
Epoch [80/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.638]

Epoch [80/3000]: Train loss: 0.6533, Valid loss: 0.7901


Epoch [80/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.638]
Epoch [81/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.637]

Epoch [81/3000]: Train loss: 0.6530, Valid loss: 0.7920


Epoch [81/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.637]
Epoch [82/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.637]

Epoch [82/3000]: Train loss: 0.6526, Valid loss: 0.7938


Epoch [82/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.637]
Epoch [83/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.636]

Epoch [83/3000]: Train loss: 0.6523, Valid loss: 0.7958


Epoch [83/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.636]
Epoch [84/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.635]

Epoch [84/3000]: Train loss: 0.6520, Valid loss: 0.7978


Epoch [84/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.635]
Epoch [85/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.634]

Epoch [85/3000]: Train loss: 0.6516, Valid loss: 0.7999


Epoch [85/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.634]
Epoch [86/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.633]

Epoch [86/3000]: Train loss: 0.6513, Valid loss: 0.8021


Epoch [86/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.633]
Epoch [87/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.632]

Epoch [87/3000]: Train loss: 0.6509, Valid loss: 0.8044


Epoch [87/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.632]
Epoch [88/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.631]

Epoch [88/3000]: Train loss: 0.6506, Valid loss: 0.8068


Epoch [88/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.631]
Epoch [89/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.63] 

Epoch [89/3000]: Train loss: 0.6502, Valid loss: 0.8092


Epoch [89/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.63]
Epoch [90/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.629]

Epoch [90/3000]: Train loss: 0.6498, Valid loss: 0.8118


Epoch [90/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.629]
Epoch [91/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.629]

Epoch [91/3000]: Train loss: 0.6494, Valid loss: 0.8144


Epoch [91/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.629]
Epoch [92/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.628]

Epoch [92/3000]: Train loss: 0.6491, Valid loss: 0.8171


Epoch [92/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.628]
Epoch [93/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.627]

Epoch [93/3000]: Train loss: 0.6487, Valid loss: 0.8199


Epoch [93/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.627]
Epoch [94/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.626]

Epoch [94/3000]: Train loss: 0.6483, Valid loss: 0.8228


Epoch [94/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.626]
Epoch [95/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.625]

Epoch [95/3000]: Train loss: 0.6479, Valid loss: 0.8257


Epoch [95/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.625]
Epoch [96/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.624]

Epoch [96/3000]: Train loss: 0.6475, Valid loss: 0.8287


Epoch [96/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.624]
Epoch [97/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.623]

Epoch [97/3000]: Train loss: 0.6470, Valid loss: 0.8317


Epoch [97/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.623]
Epoch [98/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.622]

Epoch [98/3000]: Train loss: 0.6466, Valid loss: 0.8347


Epoch [98/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.622]
Epoch [99/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.622]

Epoch [99/3000]: Train loss: 0.6462, Valid loss: 0.8378


Epoch [99/3000]:   0%|          | 0/170 [00:02<?, ?it/s, loss=0.622]
Epoch [100/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.621]

Epoch [100/3000]: Train loss: 0.6458, Valid loss: 0.8409


Epoch [100/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.621]
Epoch [101/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.62] 

Epoch [101/3000]: Train loss: 0.6453, Valid loss: 0.8440


Epoch [101/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.62]
Epoch [102/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.619]

Epoch [102/3000]: Train loss: 0.6449, Valid loss: 0.8471


Epoch [102/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.619]
Epoch [103/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.618]

Epoch [103/3000]: Train loss: 0.6444, Valid loss: 0.8502


Epoch [103/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.618]
Epoch [104/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.618]

Epoch [104/3000]: Train loss: 0.6439, Valid loss: 0.8533


Epoch [104/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.618]
Epoch [105/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.617]

Epoch [105/3000]: Train loss: 0.6435, Valid loss: 0.8564


Epoch [105/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.617]
Epoch [106/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.616]

Epoch [106/3000]: Train loss: 0.6430, Valid loss: 0.8595


Epoch [106/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.616]
Epoch [107/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.616]

Epoch [107/3000]: Train loss: 0.6425, Valid loss: 0.8625


Epoch [107/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.616]
Epoch [108/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.615]

Epoch [108/3000]: Train loss: 0.6420, Valid loss: 0.8655


Epoch [108/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.615]
Epoch [109/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.615]

Epoch [109/3000]: Train loss: 0.6415, Valid loss: 0.8684


Epoch [109/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.615]
Epoch [110/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.614]

Epoch [110/3000]: Train loss: 0.6410, Valid loss: 0.8714


Epoch [110/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.614]
Epoch [111/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.614]

Epoch [111/3000]: Train loss: 0.6405, Valid loss: 0.8743


Epoch [111/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.614]
Epoch [112/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.613]

Epoch [112/3000]: Train loss: 0.6400, Valid loss: 0.8771


Epoch [112/3000]:   0%|          | 0/170 [00:01<?, ?it/s, loss=0.613]
Epoch [113/3000]:   0%|          | 0/170 [00:00<?, ?it/s, loss=0.43]

In [None]:
%reload_ext tensorboard
%tensorboard --logdir=./runs/

# Evaluate

In [None]:
# Evaluation Dataloader
con_train_loader = DataLoader(train_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)
con_val_loader = DataLoader(val_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)
con_test_loader = DataLoader(test_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)

# Evaluation mode
model.eval()
accuracy_list = []
threshold = 0.5

# 1. Train part
predicted_labels_list = []
targets_list = []

with torch.no_grad():
    for X_2, y in con_train_loader:
        X_2, y = X_2.to(device), y.to(device)
        outputs = model(X_2)
        # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
        predicted_labels = (outputs >= threshold).float()

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

accuracy = accuracy_score(targets_list, predicted_labels_list)

print('=====================================================================================================================')
print('Training Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list), '\n')

# 2. Val part
predicted_labels_list = []
targets_list = []

with torch.no_grad():
    for X_2, y in con_val_loader:
        X_2, y = X_2.to(device), y.to(device)
        outputs = model(X_2)
        # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
        predicted_labels = (outputs >= 0.5).float()

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

accuracy = accuracy_score(targets_list, predicted_labels_list)

print('=====================================================')
print('Val Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list))


# 3. Test part
predicted_labels_list = []
targets_list = []

with torch.no_grad():
    for X_2, y in con_test_loader:
        X_2, y = X_2.to(device), y.to(device)
        outputs = model(X_2)
        # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
        predicted_labels = (outputs >= 0.5).float()

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

accuracy = accuracy_score(targets_list, predicted_labels_list)

print('=====================================================')
print('Testing Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list))

print('=====================================================', '\n')
print("Accuracy [Train, Val, Test]: ", accuracy_list, '\n')
# print(model, '\n')
print('Config: ', config, '\n')
print('Feature: ', feature)
print('time_start: ', time_start, 'time_end: ', time_end)

In [None]:
# Evaluation Dataloader
con_train_loader = DataLoader(train_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)
con_val_loader = DataLoader(val_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)
con_test_loader = DataLoader(test_dataset, batch_size=1, shuffle=config['shuffle'], pin_memory=True)

model = MyModel(config)
model.load_state_dict(torch.load(config_2['save_path']))
model.to(device)

# Evaluation mode
model.eval()
accuracy_list = []
threshold = 0.5

# 1. Train part
predicted_labels_list = []
targets_list = []

with torch.no_grad():
    for X_2, y in con_train_loader:
        X_2, y = X_2.to(device), y.to(device)
        outputs = model(X_2)
        # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
        predicted_labels = (outputs >= threshold).float()

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

accuracy = accuracy_score(targets_list, predicted_labels_list)

print('=====================================================================================================================')
print('Training Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list), '\n')

# 2. Val part
predicted_labels_list = []
targets_list = []

with torch.no_grad():
    for X_2, y in con_val_loader:
        X_2, y = X_2.to(device), y.to(device)
        outputs = model(X_2)
        # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
        predicted_labels = (outputs >= 0.5).float()

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

accuracy = accuracy_score(targets_list, predicted_labels_list)

print('=====================================================')
print('Val Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list))


# 3. Test part
predicted_labels_list = []
targets_list = []

with torch.no_grad():
    for X_2, y in con_test_loader:
        X_2, y = X_2.to(device), y.to(device)
        outputs = model(X_2)
        # _, predicted_labels = torch.max(outputs, dim=1)  # 获取每个样本预测的类别索引
        predicted_labels = (outputs >= 0.5).float()

        predicted_labels_list.extend(predicted_labels.tolist())
        targets_list.extend(y.tolist())

accuracy = accuracy_score(targets_list, predicted_labels_list)

print('=====================================================')
print('Testing Result:')
print(classification_report(targets_list, predicted_labels_list))
print(confusion_matrix(targets_list, predicted_labels_list))

print('=====================================================', '\n')
print("Accuracy [Train, Val, Test]: ", accuracy_list, '\n')
# print(model, '\n')
print('Config: ', config, '\n')
print('Feature: ', feature)
print('time_start: ', time_start, 'time_end: ', time_end)

## (2) Thresholds

In [None]:
# # function of evaluation
# def evaluate_model(model, dataloader, threshold=0.5, device='cuda'):
#     predicted_labels_list = []
#     targets_list = []

#     with torch.no_grad():
#         for input_ids, attention_mask, section, publication, X_2, y in dataloader:
#             input_ids, attention_mask, section, publication, X_2, y = input_ids.to(device), attention_mask.to(device), section.to(device), publication.to(device), X_2.to(device), y.to(device)
#             outputs = model(input_ids, attention_mask, section, publication, X_2)
#             predicted_labels = (outputs >= threshold).float()

#             predicted_labels_list.extend(predicted_labels.tolist())
#             targets_list.extend(y.tolist())

#     accuracy = accuracy_score(targets_list, predicted_labels_list)

#     print('=====================================================================================================================')
#     print(str(dataloader), ' Result:')
#     print(classification_report(targets_list, predicted_labels_list))
#     print(confusion_matrix(targets_list, predicted_labels_list), '\n')

In [None]:
# accuracy_list = []
# threshold = 0.51

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.52

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.53

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.54

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.55

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.56

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.57

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

In [None]:
# accuracy_list = []
# threshold = 0.58

# evaluate_model(model, con_train_loader, threshold=threshold)
# evaluate_model(model, con_val_loader, threshold=threshold)
# evaluate_model(model, con_test_loader, threshold=threshold)

# Benchmark

In [None]:
class BenchmarkModel(nn.Module):
    def __init__(self, config, section_length, publication_length, X_2_length):
        super(BenchmarkModel, self).__init__()
        # tech
        self.fc_h_tech = nn.Linear(X_2_length, config['h_tech_size'])

        # 3. LSTM
        self.lstm_1 = nn.LSTM(config['h_size'], 2, num_layers=1, batch_first=True, bidirectional=False)
        self.sequential = nn.Sequential(
            nn.Linear(2, 1)
        )
        self.sigmoid = nn.Sigmoid()

    def forward(self, input_ids, attention_mask, section, publication, X_2):
        # h_tech
        h_tech = self.fc_h_tech(X_2)

        # 3. LSTM
        out, _ = self.lstm_1(h_tech)
        out = out[:, -1, :]  # Get the last one of LSTM output for prediction of next-term
        # print('out 1(last layer of LSTM): ', out.shape, out)
        out = self.sequential(out)
        # print('out 2(after sequnetial): ',out.shape , out)
        final_out = self.sigmoid(out)
        # print('final_out: ', final_out.shape, final_out)

        return final_out
