In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

In [2]:
def set_seed(SEED=42):
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.cuda.manual_seed_all(SEED)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Preprocess Stock Price Data

In [3]:
page_df = pd.read_csv('Page.csv')
tickers = page_df['ticker'].unique().tolist()
tickers = [ticker + '.JK' for ticker in tickers]

In [15]:
start_date = '2005-04-01'
end_date   = '2025-03-31'
interval   = '1d'
tickers = ['MEDC.JK']

raw = yf.download(
    tickers=tickers,
    start=start_date,
    end=end_date,
    interval=interval,
    group_by='ticker',
    progress=False
)

df = (
    raw
    .stack(level=0)
    .rename_axis(['Date','Ticker'])
    .reset_index()
)

df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

weekly_ff = (
    df
    .groupby('Ticker')
    .resample('W-FRI')
    .agg({
        'Open':   'first',
        'High':   'max',
        'Low':    'min',
        'Close':  'last',
        'Volume': 'sum'
    })
    .dropna()
    .reset_index()
)

weekly_ff


YF.download() has changed argument auto_adjust default to True


  raw


Price,Ticker,Date,Open,High,Low,Close,Volume
0,MEDC.JK,2005-04-01,408.560736,429.405656,408.560736,421.067688,6336749
1,MEDC.JK,2005-04-08,429.405713,579.489169,429.405713,529.461365,206700352
2,MEDC.JK,2005-04-15,521.123391,550.306183,512.785332,521.123352,27473244
3,MEDC.JK,2005-04-22,516.954428,516.954428,462.757603,475.264557,42606369
4,MEDC.JK,2005-04-29,475.264542,475.264542,429.405670,429.405670,32741619
...,...,...,...,...,...,...,...
1032,MEDC.JK,2025-02-28,1060.000000,1060.000000,1000.000000,1000.000000,157145100
1033,MEDC.JK,2025-03-07,1020.000000,1035.000000,990.000000,1005.000000,155914900
1034,MEDC.JK,2025-03-14,1010.000000,1025.000000,985.000000,995.000000,148779500
1035,MEDC.JK,2025-03-21,1000.000000,1025.000000,900.000000,975.000000,175640800


In [16]:
# 1) compute month‐over‐month % change on Close
weekly_ff['Pct_Change'] = (
    weekly_ff
    .groupby('Ticker')['Close']
    .pct_change()
)

weekly_ff['Trend'] = np.where(
    weekly_ff['Pct_Change'] >=  0, 1,
    np.where(weekly_ff['Pct_Change'] < 0, 0, 0)
)

# 3) (optional) drop the helper column
weekly_ff.drop(columns='Pct_Change', inplace=True)

# preview
weekly_ff


Price,Ticker,Date,Open,High,Low,Close,Volume,Trend
0,MEDC.JK,2005-04-01,408.560736,429.405656,408.560736,421.067688,6336749,0
1,MEDC.JK,2005-04-08,429.405713,579.489169,429.405713,529.461365,206700352,1
2,MEDC.JK,2005-04-15,521.123391,550.306183,512.785332,521.123352,27473244,0
3,MEDC.JK,2005-04-22,516.954428,516.954428,462.757603,475.264557,42606369,0
4,MEDC.JK,2005-04-29,475.264542,475.264542,429.405670,429.405670,32741619,0
...,...,...,...,...,...,...,...,...
1032,MEDC.JK,2025-02-28,1060.000000,1060.000000,1000.000000,1000.000000,157145100,0
1033,MEDC.JK,2025-03-07,1020.000000,1035.000000,990.000000,1005.000000,155914900,1
1034,MEDC.JK,2025-03-14,1010.000000,1025.000000,985.000000,995.000000,148779500,0
1035,MEDC.JK,2025-03-21,1000.000000,1025.000000,900.000000,975.000000,175640800,0


In [17]:
# assume weekly_ff is your DataFrame
weekly_ff['Date'] = pd.to_datetime(weekly_ff['Date'])
train_start = '2005-04-01'
train_end   = '2024-03-31'

# store scalers if you need to inverse_transform later
scalers = {}

for ticker, grp in weekly_ff.groupby('Ticker'):
    # boolean masks for this ticker
    mask_all   = weekly_ff['Ticker'] == ticker
    mask_train = mask_all & weekly_ff['Date'].between(train_start, train_end)
    mask_test  = mask_all & (weekly_ff['Date'] > train_end)
    
    # fit scaler on TRAIN close prices
    for feature in ['Open', 'High', 'Low', 'Close']:
        scaler = MinMaxScaler(feature_range=(0,1))
        scaler.fit(weekly_ff.loc[mask_train, [feature]])
        scalers[ticker] = scaler
        
        # transform both train and test
        weekly_ff.loc[mask_train, feature] = scaler.transform(weekly_ff.loc[mask_train, [feature]])
        weekly_ff.loc[mask_test,  feature] = scaler.transform(weekly_ff.loc[mask_test,  [feature]])

weekly_ff[['Ticker', 'Date', 'Open', 'High', 'Low', 'Close']]


Price,Ticker,Date,Open,High,Low,Close
0,MEDC.JK,2005-04-01,0.203515,0.206522,0.224854,0.214193
1,MEDC.JK,2005-04-08,0.217807,0.305919,0.240644,0.289248
2,MEDC.JK,2005-04-15,0.280692,0.286592,0.303806,0.283475
3,MEDC.JK,2005-04-22,0.277834,0.264504,0.265909,0.251721
4,MEDC.JK,2005-04-29,0.249250,0.236893,0.240644,0.219967
...,...,...,...,...,...,...
1032,MEDC.JK,2025-02-28,0.650163,0.624149,0.672880,0.615063
1033,MEDC.JK,2025-03-07,0.622737,0.607592,0.665305,0.618525
1034,MEDC.JK,2025-03-14,0.615881,0.600970,0.661517,0.611601
1035,MEDC.JK,2025-03-21,0.609025,0.600970,0.597128,0.597752


In [18]:
weekly_ff[['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Trend']].to_csv('Weekly_Stock_Price.csv', index=False)

# Preprocess Disclosure Tone Data

In [58]:
result_df = pd.read_csv('Result/zs_multilabel_qwen.csv')
grouped_df = result_df.groupby(['Ticker', 'Year']).sum()
drop_cols = [
    'Positive', 'Negative', 'Uncertainty',
    'Litigious', 'Strong_Modal', 'Weak_Modal', 'Constraining', 'Sentence'
]
cols_pred = [
    'Positive_Pred', 'Negative_Pred', 'Uncertainty_Pred',
    'Litigious_Pred', 'Strong_Modal_Pred', 'Weak_Modal_Pred', 'Constraining_Pred'
]

# compute row-sums over those columns
row_sums = grouped_df[cols_pred].sum(axis=1)

# replace each value with its percentage of the row total
grouped_df[cols_pred] = grouped_df[cols_pred].div(row_sums, axis=0)
grouped_df = grouped_df.drop(columns=drop_cols).reset_index()
grouped_df.to_csv('DisclosureTone/tone_zs_multilabel_qwen.csv')

In [66]:
result_df = pd.read_csv('Result/zs_multiclass_fin-r1.csv')
drop_cols = [
    'Positive', 'Negative', 'Uncertainty',
    'Litigious', 'Strong_Modal', 'Weak_Modal', 'Constraining', 'Sentence'
]
cols_pred = [
    'Positive_Pred', 'Negative_Pred', 'Uncertainty_Pred',
    'Litigious_Pred', 'Strong_Modal_Pred', 'Weak_Modal_Pred', 'Constraining_Pred'
]
# replace(2,1) for 2 class classification and replace(2,0) for 1 class classification
result_df[cols_pred] = result_df[cols_pred].replace(2, 1)
grouped_df = result_df.groupby(['Ticker', 'Year']).sum()

# compute row-sums over those columns
row_sums = grouped_df[cols_pred].sum(axis=1)

# replace each value with its percentage of the row total
grouped_df[cols_pred] = grouped_df[cols_pred].div(row_sums, axis=0)
grouped_df = grouped_df.drop(columns=drop_cols).reset_index()
grouped_df.to_csv('DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')

In [55]:
tone_df = pd.read_csv('DisclosureTone/tone_icl_multilabel_qwen.csv')
tone_df['Ticker'] = tone_df['Ticker'] + '.JK'
tone_df

Unnamed: 0.1,Unnamed: 0,Ticker,Year,Positive_Pred,Negative_Pred,Uncertainty_Pred,Litigious_Pred,Strong_Modal_Pred,Weak_Modal_Pred,Constraining_Pred
0,0,MEDC.JK,2004,0.235294,0.147059,0.254902,0.0,0.058824,0.176471,0.127451
1,1,MEDC.JK,2005,0.142132,0.121827,0.299492,0.0,0.055838,0.177665,0.203046
2,2,MEDC.JK,2006,0.0625,0.147727,0.3125,0.0,0.045455,0.1875,0.244318
3,3,MEDC.JK,2007,0.125,0.244565,0.293478,0.0,0.016304,0.146739,0.173913
4,4,MEDC.JK,2008,0.257511,0.154506,0.240343,0.008584,0.064378,0.094421,0.180258
5,5,MEDC.JK,2009,0.134615,0.161538,0.288462,0.019231,0.073077,0.169231,0.153846
6,6,MEDC.JK,2010,0.458015,0.160305,0.152672,0.019084,0.034351,0.076336,0.099237
7,7,MEDC.JK,2011,0.361111,0.069444,0.208333,0.0,0.076389,0.111111,0.173611
8,8,MEDC.JK,2012,0.447619,0.171429,0.12381,0.0,0.07619,0.085714,0.095238
9,9,MEDC.JK,2013,0.446281,0.256198,0.082645,0.0,0.07438,0.07438,0.066116


# Load Data

In [19]:
def create_dataset(price_filename, tone_filename):
   weekly_ff = pd.read_csv(price_filename)
   weekly_ff['Date'] = pd.to_datetime(weekly_ff['Date'])
   tone_df = pd.read_csv(tone_filename)
   tone_df['Ticker'] = tone_df['Ticker'] + '.JK'

   d = weekly_ff['Date']
   weekly_ff['tone_year'] = d.dt.year - 1 - (d.dt.month < 4).astype(int)
   tone_df = tone_df.rename(columns={'Year':'tone_year'})

   merged = (
      weekly_ff
         .merge(
            tone_df,
            on=['Ticker','tone_year'],
            how='left'
         )
         .drop(columns=['tone_year'])
   )
   merged = merged.fillna(0)
   return merged

timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')

In [21]:
def create_dataloader(df, n_lags=12, tone=False, batch_size=64):
    train_start = pd.to_datetime('2005-04-01')
    train_end   = pd.to_datetime('2024-03-31')

    if tone:
        feature_cols = [
            'Open', 'High', 'Low', 'Close', 
            'Positive_Pred', 'Negative_Pred', 'Uncertainty_Pred', 'Litigious_Pred', 
            'Strong_Modal_Pred', 'Weak_Modal_Pred', 'Constraining_Pred'
        ]
    else:
        feature_cols = [
            'Open', 'High', 'Low', 'Close'
        ]
    X_train, X_test = [], []
    y_train, y_test = [], []
    for ticker, group in df.groupby('Ticker'):
        group = group.sort_values('Date')
        feats  = group[feature_cols].values
        dates = group['Date'].values

        # slide over windows
        for i in range(n_lags, len(group)):
            Xw = feats[i-n_lags : i]
            yw = feats[i][3]
            label_date = dates[i]

            if train_start <= label_date <= train_end:
                X_train.append(Xw)
                y_train.append(yw)
            elif label_date > train_end:
                X_test.append(Xw)
                y_test.append(yw)
    
    X_train = np.stack(X_train, axis=0)
    X_test  = np.stack(X_test,  axis=0)
    y_train = np.array(y_train)
    y_test  = np.array(y_test) 

    train_ds = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
    test_ds  = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=batch_size)

    return X_train.shape[2], train_loader, test_loader

input_dim, train_loader, test_loader = create_dataloader(timeseries_df)

# Train Model

In [22]:
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_rate):
        super(BiLSTMModel, self).__init__()
        self.relu1 = nn.ReLU()
        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.relu2 = nn.ReLU()
        self.lstm2 = nn.LSTM(hidden_size * 2, hidden_size, num_layers, batch_first=True, bidirectional=True)  # Input size doubles because it's bidirectional
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden_size * 2, 16)  # First dense layer after second LSTM
        self.fc2 = nn.Linear(16, 1)  # Second dense layer outputs the final prediction

    def forward(self, x):
        x = self.relu1(x)
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x = self.relu2(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = self.fc1(x[:, -1, :])  # Apply first dense layer to the last time step output
        x = self.fc2(x)  # Apply second dense layer
        return x.squeeze(-1)

In [23]:
class RMSELoss(nn.Module):
    def __init__(self):
        super(RMSELoss, self).__init__()
        self.mse = nn.MSELoss()
    
    def forward(self, predicted, actual):
        return torch.sqrt(self.mse(predicted, actual))

In [24]:
device = 'cuda:0'
input_dim = input_dim
hidden_dim = 64
layer_dim  = 2
dropout    = 0.2
lr         = 1e-3
epochs     = 100

In [25]:
def train_model(train_loader, test_loader):
    set_seed()
    model = BiLSTMModel(input_dim, hidden_dim, layer_dim, dropout)
    model.to(device)

    # Regression setup with RMSE loss
    criterion = RMSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    pbar = tqdm(range(1, epochs + 1), desc='Epoch', unit='epoch')
    for epoch in pbar:
        # --- Training ---
        model.train()
        train_loss_accum = 0.0
        train_count = 0
        for xb, yb in train_loader:
            xb = xb.to(device)
            yb = yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

            train_loss_accum += loss.item() * xb.size(0)
            train_count += xb.size(0)

        train_loss = train_loss_accum / train_count
        scheduler.step(train_loss)

        # --- Evaluation ---
        model.eval()
        all_test_preds = []
        all_test_labels = []
        with torch.no_grad():
            for xb, yb in test_loader:
                xb = xb.to(device)
                preds = model(xb).cpu().numpy()
                all_test_preds.extend(preds)
                all_test_labels.extend(yb.numpy())
        test_rmse = np.sqrt(mean_squared_error(all_test_labels, all_test_preds))

        pbar.set_postfix({'Train RMSE': f'{train_loss:.4f}', 'Test RMSE': f'{test_rmse:.4f}'})

        if optimizer.param_groups[0]['lr'] < 1e-07:
            break

    # --- Final detailed report on test set ---
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for xb, yb in test_loader:
            xb = xb.to(device)
            preds = model(xb).cpu().numpy()
            all_preds.append(preds)
            all_labels.append(yb.numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    mse = mean_squared_error(all_labels, all_preds)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(all_labels, all_preds)

    print(f"RMSE: {rmse:.4f}")
    print(f"MAE:  {mae:.4f}")
    return model

In [67]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=False)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/ohlc_12.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s, Train RMSE=0.1835, Test RMSE=0.5011]

Epoch:  45%|████▌     | 45/100 [00:03<00:04, 12.45epoch/s, Train RMSE=0.0377, Test RMSE=0.0423]

RMSE: 0.0423
MAE:  0.0340





In [68]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=False)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/ohlc_24.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.59epoch/s, Train RMSE=0.0671, Test RMSE=0.0704]

Epoch:  39%|███▉      | 39/100 [00:03<00:04, 12.32epoch/s, Train RMSE=0.0392, Test RMSE=0.0426]

RMSE: 0.0426
MAE:  0.0337





# ICL Weekly

## ICL Multilabel

In [69]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multilabel_fin-r1_12.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.56epoch/s, Train RMSE=0.1621, Test RMSE=0.3847]

Epoch:  55%|█████▌    | 55/100 [00:04<00:03, 12.48epoch/s, Train RMSE=0.0382, Test RMSE=0.0460]

RMSE: 0.0460
MAE:  0.0345





In [70]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multilabel_fin-r1_24.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.46epoch/s, Train RMSE=0.1090, Test RMSE=0.1357]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.35epoch/s, Train RMSE=0.0404, Test RMSE=0.0472]

RMSE: 0.0472
MAE:  0.0354





In [71]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multilabel_qwen_12.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.64epoch/s, Train RMSE=0.0679, Test RMSE=0.0690]

Epoch:  61%|██████    | 61/100 [00:04<00:03, 12.60epoch/s, Train RMSE=0.0383, Test RMSE=0.0458]

RMSE: 0.0458
MAE:  0.0345





In [72]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multilabel_qwen_24.pt")

Epoch:   8%|▊         | 8/100 [00:00<00:07, 12.85epoch/s, Train RMSE=0.0585, Test RMSE=0.1142]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.62epoch/s, Train RMSE=0.0414, Test RMSE=0.0477]

RMSE: 0.0477
MAE:  0.0363





## ICL Multiclass-2

In [73]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-2_fin-r1_12.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.85epoch/s, Train RMSE=0.1617, Test RMSE=0.3818]

Epoch:  54%|█████▍    | 54/100 [00:04<00:03, 12.71epoch/s, Train RMSE=0.0375, Test RMSE=0.0443]

RMSE: 0.0443
MAE:  0.0330





In [74]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-2_fin-r1_24.pt")

Epoch:   6%|▌         | 6/100 [00:00<00:07, 12.81epoch/s, Train RMSE=0.0675, Test RMSE=0.0894]

Epoch:  39%|███▉      | 39/100 [00:03<00:04, 12.51epoch/s, Train RMSE=0.0450, Test RMSE=0.0456]

RMSE: 0.0456
MAE:  0.0348





In [75]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-2_qwen_12.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s, Train RMSE=0.2838, Test RMSE=0.5069]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.81epoch/s, Train RMSE=0.0377, Test RMSE=0.0417]

RMSE: 0.0417
MAE:  0.0317





In [76]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-2_qwen_24.pt")

Epoch:   8%|▊         | 8/100 [00:00<00:07, 12.91epoch/s, Train RMSE=0.0611, Test RMSE=0.0721]

Epoch:  39%|███▉      | 39/100 [00:03<00:04, 12.60epoch/s, Train RMSE=0.0452, Test RMSE=0.0441]

RMSE: 0.0441
MAE:  0.0339





## ICL Multiclass-1

In [77]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-1_fin-r1_12.pt")

Epoch:   6%|▌         | 6/100 [00:00<00:07, 12.92epoch/s, Train RMSE=0.0601, Test RMSE=0.0557]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.78epoch/s, Train RMSE=0.0378, Test RMSE=0.0448]

RMSE: 0.0448
MAE:  0.0335





In [78]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-1_fin-r1_24.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.92epoch/s, Train RMSE=0.0836, Test RMSE=0.1230]

Epoch:  54%|█████▍    | 54/100 [00:04<00:03, 12.73epoch/s, Train RMSE=0.0397, Test RMSE=0.0459]

RMSE: 0.0459
MAE:  0.0345





In [79]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
mdoel = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-1_qwen_12.pt")

Epoch:   8%|▊         | 8/100 [00:00<00:07, 13.02epoch/s, Train RMSE=0.0549, Test RMSE=0.0628]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.77epoch/s, Train RMSE=0.0378, Test RMSE=0.0424]

RMSE: 0.0424
MAE:  0.0321





In [80]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/icl_multiclass-1_qwen_24.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.81epoch/s, Train RMSE=0.1076, Test RMSE=0.1341]

Epoch:  53%|█████▎    | 53/100 [00:04<00:03, 12.62epoch/s, Train RMSE=0.0415, Test RMSE=0.0422]

RMSE: 0.0422
MAE:  0.0330





# Zero-Shot Weekly

## ZS Multilabel

In [81]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multilabel_fin-r1_12.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.99epoch/s, Train RMSE=0.0767, Test RMSE=0.1498]

Epoch:  61%|██████    | 61/100 [00:04<00:03, 12.80epoch/s, Train RMSE=0.0381, Test RMSE=0.0498]

RMSE: 0.0498
MAE:  0.0388





In [82]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multilabel_fin-r1_24.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.73epoch/s, Train RMSE=0.1554, Test RMSE=0.3465]

Epoch:  70%|███████   | 70/100 [00:05<00:02, 12.62epoch/s, Train RMSE=0.0404, Test RMSE=0.0536]

RMSE: 0.0536
MAE:  0.0419





In [83]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multilabel_qwen_12.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.81epoch/s, Train RMSE=0.0766, Test RMSE=0.1517]

Epoch:  54%|█████▍    | 54/100 [00:04<00:03, 12.69epoch/s, Train RMSE=0.0376, Test RMSE=0.0493]

RMSE: 0.0493
MAE:  0.0378





In [84]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multilabel_qwen_24.pt")

Epoch:   6%|▌         | 6/100 [00:00<00:07, 12.84epoch/s, Train RMSE=0.0573, Test RMSE=0.0972]

Epoch:  70%|███████   | 70/100 [00:05<00:02, 12.64epoch/s, Train RMSE=0.0403, Test RMSE=0.0499]

RMSE: 0.0499
MAE:  0.0382





## ZS Multiclass-2

In [85]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-2_fin-r1_12.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.81epoch/s, Train RMSE=0.0680, Test RMSE=0.0684]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.68epoch/s, Train RMSE=0.0379, Test RMSE=0.0433]

RMSE: 0.0433
MAE:  0.0326





In [86]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-2_fin-r1_24.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.80epoch/s, Train RMSE=0.0836, Test RMSE=0.1248]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.68epoch/s, Train RMSE=0.0416, Test RMSE=0.0452]

RMSE: 0.0452
MAE:  0.0344





In [87]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-2_qwen_12.pt")

Epoch:   4%|▍         | 4/100 [00:00<00:07, 12.96epoch/s, Train RMSE=0.0766, Test RMSE=0.1475]

Epoch:  55%|█████▌    | 55/100 [00:04<00:03, 12.76epoch/s, Train RMSE=0.0383, Test RMSE=0.0433]

RMSE: 0.0433
MAE:  0.0326





In [88]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-2_qwen_24.pt")

Epoch:   6%|▌         | 6/100 [00:00<00:07, 12.83epoch/s, Train RMSE=0.0667, Test RMSE=0.0845]

Epoch:  39%|███▉      | 39/100 [00:03<00:04, 12.58epoch/s, Train RMSE=0.0452, Test RMSE=0.0448]

RMSE: 0.0448
MAE:  0.0345





## ZS Multiclass-1

In [89]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-1_fin-r1_12.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.92epoch/s, Train RMSE=0.1618, Test RMSE=0.3859]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.70epoch/s, Train RMSE=0.0378, Test RMSE=0.0440]

RMSE: 0.0440
MAE:  0.0331





In [90]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-1_fin-r1_24.pt")

Epoch:  10%|█         | 10/100 [00:00<00:06, 12.90epoch/s, Train RMSE=0.0528, Test RMSE=0.0755]

Epoch:  49%|████▉     | 49/100 [00:03<00:04, 12.61epoch/s, Train RMSE=0.0420, Test RMSE=0.0460]

RMSE: 0.0460
MAE:  0.0351





In [91]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-1_qwen_12.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.86epoch/s, Train RMSE=0.1613, Test RMSE=0.3833]

Epoch:  57%|█████▋    | 57/100 [00:04<00:03, 12.81epoch/s, Train RMSE=0.0378, Test RMSE=0.0442]

RMSE: 0.0442
MAE:  0.0331





In [92]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
# torch.save(model.state_dict(), "Model/zs_multiclass-1_qwen_24.pt")

Epoch:   2%|▏         | 2/100 [00:00<00:07, 12.84epoch/s, Train RMSE=0.1074, Test RMSE=0.1363]

Epoch:  53%|█████▎    | 53/100 [00:04<00:03, 12.71epoch/s, Train RMSE=0.0409, Test RMSE=0.0436]

RMSE: 0.0436
MAE:  0.0333



