In [103]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, mean_squared_error, mean_absolute_error
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from tqdm import tqdm

In [124]:
def set_seed(SEED=42):
    torch.manual_seed(SEED)
    np.random.seed(SEED)
    torch.cuda.manual_seed_all(SEED)

    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

# Preprocess Stock Price Data

In [None]:
page_df = pd.read_csv('Page.csv')
tickers = page_df['ticker'].unique().tolist()
tickers = [ticker + '.JK' for ticker in tickers]

In [None]:
start_date = '2020-04-01'
end_date   = '2025-03-31'
interval   = '1d'

raw = yf.download(
    tickers=tickers,
    start=start_date,
    end=end_date,
    interval=interval,
    group_by='ticker',
    progress=False
)

df = (
    raw
    .stack(level=0)
    .rename_axis(['Date','Ticker'])
    .reset_index()
)

df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

weekly_ff = (
    df
    .groupby('Ticker')
    .resample('W-FRI')
    .agg({
        'Open':   'first',
        'High':   'max',
        'Low':    'min',
        'Close':  'last',
        'Volume': 'sum'
    })
    .dropna()
    .reset_index()
)

weekly_ff


  raw


Price,Ticker,Date,Open,High,Low,Close,Volume
0,ACES.JK,2020-04-03,1121.963865,1152.170585,1044.289313,1104.702881,4.355160e+07
1,ACES.JK,2020-04-10,1100.387580,1160.801021,992.506478,1027.028442,5.390500e+07
2,ACES.JK,2020-04-17,1027.028504,1178.062193,996.821783,1156.485962,1.038959e+08
3,ACES.JK,2020-04-24,1156.485959,1294.573709,1061.550425,1199.638428,6.470390e+07
4,ACES.JK,2020-05-01,1208.268928,1337.726191,1186.692697,1311.834717,6.664000e+07
...,...,...,...,...,...,...,...
7351,UNVR.JK,2025-02-28,1295.000000,1305.000000,1015.000000,1015.000000,1.123386e+09
7352,UNVR.JK,2025-03-07,985.000000,1240.000000,985.000000,1200.000000,1.019657e+09
7353,UNVR.JK,2025-03-14,1205.000000,1335.000000,1145.000000,1300.000000,4.406983e+08
7354,UNVR.JK,2025-03-21,1310.000000,1420.000000,1255.000000,1295.000000,4.444509e+08


In [163]:
# 1) compute month‐over‐month % change on Close
weekly_ff['Pct_Change'] = (
    weekly_ff
    .groupby('Ticker')['Close']
    .pct_change()
)

weekly_ff['Trend'] = np.where(
    weekly_ff['Pct_Change'] >=  0, 1,
    np.where(weekly_ff['Pct_Change'] < 0, 0, 0)
)

# 3) (optional) drop the helper column
weekly_ff.drop(columns='Pct_Change', inplace=True)

# preview
weekly_ff


Price,Ticker,Date,Open,High,Low,Close,Volume,Trend
0,ACES.JK,2020-04-03,1121.963865,1152.170585,1044.289313,1104.702881,4.355160e+07,0
1,ACES.JK,2020-04-10,1100.387580,1160.801021,992.506478,1027.028442,5.390500e+07,0
2,ACES.JK,2020-04-17,1027.028504,1178.062193,996.821783,1156.485962,1.038959e+08,1
3,ACES.JK,2020-04-24,1156.485959,1294.573709,1061.550425,1199.638428,6.470390e+07,1
4,ACES.JK,2020-05-01,1208.268928,1337.726191,1186.692697,1311.834717,6.664000e+07,1
...,...,...,...,...,...,...,...,...
7351,UNVR.JK,2025-02-28,1295.000000,1305.000000,1015.000000,1015.000000,1.123386e+09,0
7352,UNVR.JK,2025-03-07,985.000000,1240.000000,985.000000,1200.000000,1.019657e+09,1
7353,UNVR.JK,2025-03-14,1205.000000,1335.000000,1145.000000,1300.000000,4.406983e+08,1
7354,UNVR.JK,2025-03-21,1310.000000,1420.000000,1255.000000,1295.000000,4.444509e+08,0


In [164]:
# assume weekly_ff is your DataFrame
weekly_ff['Date'] = pd.to_datetime(weekly_ff['Date'])
train_start = '2020-04-01'
train_end   = '2024-03-31'

# store scalers if you need to inverse_transform later
scalers = {}

for ticker, grp in weekly_ff.groupby('Ticker'):
    # boolean masks for this ticker
    mask_all   = weekly_ff['Ticker'] == ticker
    mask_train = mask_all & weekly_ff['Date'].between(train_start, train_end)
    mask_test  = mask_all & (weekly_ff['Date'] > train_end)
    
    # fit scaler on TRAIN close prices
    for feature in ['Open', 'High', 'Low', 'Close']:
        scaler = MinMaxScaler(feature_range=(0,1))
        scaler.fit(weekly_ff.loc[mask_train, [feature]])
        scalers[ticker] = scaler
        
        # transform both train and test
        weekly_ff.loc[mask_train, feature] = scaler.transform(weekly_ff.loc[mask_train, [feature]])
        weekly_ff.loc[mask_test,  feature] = scaler.transform(weekly_ff.loc[mask_test,  [feature]])

weekly_ff[['Ticker', 'Date', 'Open', 'High', 'Low', 'Close']]


Price,Ticker,Date,Open,High,Low,Close
0,ACES.JK,2020-04-03,0.606009,0.617279,0.597809,0.592202
1,ACES.JK,2020-04-10,0.588750,0.624423,0.552645,0.530070
2,ACES.JK,2020-04-17,0.530070,0.638712,0.556409,0.633623
3,ACES.JK,2020-04-24,0.633623,0.735158,0.612864,0.668141
4,ACES.JK,2020-05-01,0.675044,0.770879,0.722010,0.757887
...,...,...,...,...,...,...
7351,UNVR.JK,2025-02-28,-0.255596,-0.273823,-0.324789,-0.326897
7352,UNVR.JK,2025-03-07,-0.320412,-0.287634,-0.331575,-0.286645
7353,UNVR.JK,2025-03-14,-0.274413,-0.267449,-0.295380,-0.264888
7354,UNVR.JK,2025-03-21,-0.252460,-0.249389,-0.270496,-0.265976


In [161]:
weekly_ff[['Ticker', 'Date', 'Open', 'High', 'Low', 'Close', 'Trend']].to_csv('Monthly_Stock_Price.csv', index=False)

# Preprocess Disclosure Tone Data

In [None]:
result_df = pd.read_csv('Result/icl_multilabel_fin-r1.csv')
grouped_df = result_df.groupby(['Ticker', 'Year']).sum()
drop_cols = [
    'Positive', 'Negative', 'Uncertainty',
    'Litigious', 'Strong_Modal', 'Weak_Modal', 'Constraining', 'Sentence'
]
cols_pred = [
    'Positive_Pred', 'Negative_Pred', 'Uncertainty_Pred',
    'Litigious_Pred', 'Strong_Modal_Pred', 'Weak_Modal_Pred', 'Constraining_Pred'
]

# compute row-sums over those columns
row_sums = grouped_df[cols_pred].sum(axis=1)

# replace each value with its percentage of the row total
grouped_df[cols_pred] = grouped_df[cols_pred].div(row_sums, axis=0)
grouped_df = grouped_df.drop(columns=drop_cols).reset_index()
grouped_df.to_csv('DisclosureTone/tone_icl_multilabel_fin-r1.csv')

In [None]:
result_df = pd.read_csv('Result/zs_multiclass_fin-r1.csv')
drop_cols = [
    'Positive', 'Negative', 'Uncertainty',
    'Litigious', 'Strong_Modal', 'Weak_Modal', 'Constraining', 'Sentence'
]
cols_pred = [
    'Positive_Pred', 'Negative_Pred', 'Uncertainty_Pred',
    'Litigious_Pred', 'Strong_Modal_Pred', 'Weak_Modal_Pred', 'Constraining_Pred'
]
# replace(2,1) for 2 class classification and replace(2,0) for 1 class classification
result_df[cols_pred] = result_df[cols_pred].replace(2, 1)
grouped_df = result_df.groupby(['Ticker', 'Year']).sum()

# compute row-sums over those columns
row_sums = grouped_df[cols_pred].sum(axis=1)

# replace each value with its percentage of the row total
grouped_df[cols_pred] = grouped_df[cols_pred].div(row_sums, axis=0)
grouped_df = grouped_df.drop(columns=drop_cols).reset_index()
grouped_df.to_csv('DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')

In [10]:
tone_df = pd.read_csv('DisclosureTone/tone_icl_multilabel_fin-r1.csv')
tone_df['Ticker'] = tone_df['Ticker'] + '.JK'
tone_df

Unnamed: 0.1,Unnamed: 0,Ticker,Year,Positive_Pred,Negative_Pred,Uncertainty_Pred,Litigious_Pred,Strong_Modal_Pred,Weak_Modal_Pred,Constraining_Pred
0,0,ACES.JK,2020,0.393939,0.181818,0.161616,0.000000,0.060606,0.141414,0.060606
1,1,ACES.JK,2021,0.439024,0.195122,0.085366,0.000000,0.036585,0.170732,0.073171
2,2,ACES.JK,2022,0.450980,0.215686,0.058824,0.000000,0.078431,0.117647,0.078431
3,3,ACES.JK,2023,0.455696,0.088608,0.196203,0.000000,0.037975,0.139241,0.082278
4,4,ADRO.JK,2019,0.329060,0.158120,0.158120,0.000000,0.081197,0.222222,0.051282
...,...,...,...,...,...,...,...,...,...,...
139,139,UNVR.JK,2019,0.554430,0.078481,0.101266,0.000000,0.048101,0.172152,0.045570
140,140,UNVR.JK,2020,0.473538,0.116992,0.144847,0.002786,0.047354,0.130919,0.083565
141,141,UNVR.JK,2021,0.436242,0.098434,0.123043,0.000000,0.069351,0.210291,0.062640
142,142,UNVR.JK,2022,0.540650,0.060976,0.089431,0.000000,0.063008,0.203252,0.042683


# Load Data

In [113]:
def create_dataset(price_filename, tone_filename):
   weekly_ff = pd.read_csv(price_filename)
   weekly_ff['Date'] = pd.to_datetime(weekly_ff['Date'])
   tone_df = pd.read_csv(tone_filename)
   tone_df['Ticker'] = tone_df['Ticker'] + '.JK'

   d = weekly_ff['Date']
   weekly_ff['tone_year'] = d.dt.year - 1 - (d.dt.month < 4).astype(int)
   tone_df = tone_df.rename(columns={'Year':'tone_year'})

   merged = (
      weekly_ff
         .merge(
            tone_df,
            on=['Ticker','tone_year'],
            how='left'
         )
         .drop(columns=['tone_year'])
   )
   merged = merged.fillna(0)
   return merged

timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')

In [157]:
def create_dataloader(df, n_lags=12, tone=False, batch_size=64):
    train_start = pd.to_datetime('2020-04-01')
    train_end   = pd.to_datetime('2024-03-31')

    if tone:
        feature_cols = [
            'Open', 'High', 'Low', 'Close', 
            'Positive_Pred', 'Negative_Pred', 'Uncertainty_Pred', 'Litigious_Pred', 
            'Strong_Modal_Pred', 'Weak_Modal_Pred', 'Constraining_Pred'
        ]
    else:
        feature_cols = [
            'Open', 'High', 'Low', 'Close'
        ]
    X_train, X_test = [], []
    y_train, y_test = [], []
    for ticker, group in df.groupby('Ticker'):
        group = group.sort_values('Date')
        feats  = group[feature_cols].values
        dates = group['Date'].values

        # slide over windows
        for i in range(n_lags, len(group)):
            Xw = feats[i-n_lags : i]
            yw = feats[i][3]
            label_date = dates[i]

            if train_start <= label_date <= train_end:
                X_train.append(Xw)
                y_train.append(yw)
            elif label_date > train_end:
                X_test.append(Xw)
                y_test.append(yw)
    
    X_train = np.stack(X_train, axis=0)
    X_test  = np.stack(X_test,  axis=0)
    y_train = np.array(y_train)
    y_test  = np.array(y_test) 

    train_ds = TensorDataset(torch.from_numpy(X_train).float(), torch.from_numpy(y_train).float())
    test_ds  = TensorDataset(torch.from_numpy(X_test).float(), torch.from_numpy(y_test).float())
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
    test_loader  = DataLoader(test_ds,  batch_size=batch_size)

    return X_train.shape[2], train_loader, test_loader

input_dim, train_loader, test_loader = create_dataloader(timeseries_df)

# Train Model

In [98]:
class BiLSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, dropout_rate):
        super(BiLSTMModel, self).__init__()
        self.relu1 = nn.ReLU()
        self.lstm1 = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        self.dropout1 = nn.Dropout(dropout_rate)
        self.relu2 = nn.ReLU()
        self.lstm2 = nn.LSTM(hidden_size * 2, hidden_size, num_layers, batch_first=True, bidirectional=True)  # Input size doubles because it's bidirectional
        self.dropout2 = nn.Dropout(dropout_rate)
        self.fc1 = nn.Linear(hidden_size * 2, 16)  # First dense layer after second LSTM
        self.fc2 = nn.Linear(16, 1)  # Second dense layer outputs the final prediction

    def forward(self, x):
        x = self.relu1(x)
        x, _ = self.lstm1(x)
        x = self.dropout1(x)
        x = self.relu2(x)
        x, _ = self.lstm2(x)
        x = self.dropout2(x)
        x = self.fc1(x[:, -1, :])  # Apply first dense layer to the last time step output
        x = self.fc2(x)  # Apply second dense layer
        return x.squeeze(-1)

In [99]:
class RMSELoss(nn.Module):
    def __init__(self):
        super(RMSELoss, self).__init__()
        self.mse = nn.MSELoss()
    
    def forward(self, predicted, actual):
        return torch.sqrt(self.mse(predicted, actual))

In [121]:
device = 'cuda:0'
input_dim = input_dim
hidden_dim = 64
layer_dim  = 2
dropout    = 0.2
lr         = 1e-3
epochs     = 100

In [None]:
def train_model(train_loader, test_loader):
    set_seed()
    model = BiLSTMModel(input_dim, hidden_dim, layer_dim, dropout)
    model.to(device)

    # Regression setup with RMSE loss
    criterion = RMSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    pbar = tqdm(range(1, epochs + 1), desc='Epoch', unit='epoch')
    for epoch in pbar:
        # --- Training ---
        model.train()
        train_loss_accum = 0.0
        train_count = 0
        for xb, yb in train_loader:
            xb = xb.to(device)
            yb = yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()

            train_loss_accum += loss.item() * xb.size(0)
            train_count += xb.size(0)

        train_loss = train_loss_accum / train_count
        scheduler.step(train_loss)

        # --- Evaluation ---
        model.eval()
        all_test_preds = []
        all_test_labels = []
        with torch.no_grad():
            for xb, yb in test_loader:
                xb = xb.to(device)
                preds = model(xb).cpu().numpy()
                all_test_preds.extend(preds)
                all_test_labels.extend(yb.numpy())
        test_rmse = np.sqrt(mean_squared_error(all_test_labels, all_test_preds))

        pbar.set_postfix({'Train RMSE': f'{train_loss:.4f}', 'Test RMSE': f'{test_rmse:.4f}'})

        if optimizer.param_groups[0]['lr'] < 1e-07:
            break

    # --- Final detailed report on test set ---
    model.eval()
    all_preds = []
    all_labels = []
    with torch.no_grad():
        for xb, yb in test_loader:
            xb = xb.to(device)
            preds = model(xb).cpu().numpy()
            all_preds.append(preds)
            all_labels.append(yb.numpy())

    all_preds = np.concatenate(all_preds)
    all_labels = np.concatenate(all_labels)

    mse = mean_squared_error(all_labels, all_preds)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(all_labels, all_preds)

    print(f"RMSE: {rmse:.4f}")
    print(f"MAE:  {mae:.4f}")
    return model

In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=False)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/ohlc_12.pt")

Epoch:  51%|█████     | 51/100 [00:22<00:21,  2.31epoch/s, Train RMSE=0.0513, Test RMSE=0.0996]

RMSE: 0.0996
MAE:  0.0587





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=False)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/ohlc_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.46epoch/s, Train RMSE=0.0510, Test RMSE=0.1001]

RMSE: 0.1001
MAE:  0.0589





# ICL Weekly

## ICL Multilabel

In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multilabel_fin-r1_12.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  42%|████▏     | 42/100 [00:18<00:25,  2.30epoch/s, Train RMSE=0.0522, Test RMSE=0.1000]

RMSE: 0.1000
MAE:  0.0594





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multilabel_fin-r1_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.42epoch/s, Train RMSE=0.0515, Test RMSE=0.1006]

RMSE: 0.1006
MAE:  0.0598





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multilabel_qwen_12.pt")

Epoch:  42%|████▏     | 42/100 [00:18<00:25,  2.27epoch/s, Train RMSE=0.0523, Test RMSE=0.1003]

RMSE: 0.1003
MAE:  0.0595





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multilabel_qwen_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.41epoch/s, Train RMSE=0.0516, Test RMSE=0.1009]

RMSE: 0.1009
MAE:  0.0600





## ICL Multiclass-2

In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-2_fin-r1_12.pt")

Epoch:  42%|████▏     | 42/100 [00:18<00:25,  2.26epoch/s, Train RMSE=0.0523, Test RMSE=0.1002]

RMSE: 0.1002
MAE:  0.0592





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-2_fin-r1_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.42epoch/s, Train RMSE=0.0516, Test RMSE=0.1014]

RMSE: 0.1014
MAE:  0.0601





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-2_qwen_12.pt")

Epoch:  52%|█████▏    | 52/100 [00:22<00:20,  2.31epoch/s, Train RMSE=0.0507, Test RMSE=0.1020]

RMSE: 0.1020
MAE:  0.0603





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-2_qwen_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.44epoch/s, Train RMSE=0.0515, Test RMSE=0.1011]

RMSE: 0.1011
MAE:  0.0597





## ICL Multiclass-1

In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-1_fin-r1_12.pt")

Epoch:  42%|████▏     | 42/100 [00:18<00:25,  2.30epoch/s, Train RMSE=0.0522, Test RMSE=0.1009]

RMSE: 0.1009
MAE:  0.0597





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-1_fin-r1_24.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.45epoch/s, Train RMSE=0.0515, Test RMSE=0.1015]

RMSE: 0.1015
MAE:  0.0601





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-1_qwen_12.pt")

Epoch:  42%|████▏     | 42/100 [00:18<00:25,  2.27epoch/s, Train RMSE=0.0522, Test RMSE=0.1014]

RMSE: 0.1014
MAE:  0.0598





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/icl_multiclass-1_qwen_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.45epoch/s, Train RMSE=0.0515, Test RMSE=0.1024]

RMSE: 0.1024
MAE:  0.0607





# Zero-Shot Weekly

## ZS Multilabel

In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multilabel_fin-r1_12.pt")

Epoch:  52%|█████▏    | 52/100 [00:22<00:21,  2.28epoch/s, Train RMSE=0.0507, Test RMSE=0.1018]

RMSE: 0.1018
MAE:  0.0604





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multilabel_fin-r1_24.pt")

Epoch:  49%|████▉     | 49/100 [00:20<00:21,  2.39epoch/s, Train RMSE=0.0526, Test RMSE=0.1007]

RMSE: 0.1007
MAE:  0.0597





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multilabel_qwen_12.pt")

Epoch:  52%|█████▏    | 52/100 [00:22<00:20,  2.31epoch/s, Train RMSE=0.0506, Test RMSE=0.1012]

RMSE: 0.1012
MAE:  0.0599





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multilabel_qwen_24.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  55%|█████▌    | 55/100 [00:22<00:18,  2.44epoch/s, Train RMSE=0.0520, Test RMSE=0.1007]

RMSE: 0.1007
MAE:  0.0597





## ZS Multiclass-2

In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-2_fin-r1_12.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  52%|█████▏    | 52/100 [00:22<00:20,  2.31epoch/s, Train RMSE=0.0506, Test RMSE=0.1016]

RMSE: 0.1016
MAE:  0.0602





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-2_fin-r1_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.44epoch/s, Train RMSE=0.0515, Test RMSE=0.1009]

RMSE: 0.1009
MAE:  0.0599





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-2_qwen_12.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  56%|█████▌    | 56/100 [00:24<00:19,  2.30epoch/s, Train RMSE=0.0510, Test RMSE=0.1015]

RMSE: 0.1015
MAE:  0.0600





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-2_qwen_24.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.43epoch/s, Train RMSE=0.0516, Test RMSE=0.1014]

RMSE: 0.1014
MAE:  0.0600





## ZS Multiclass-1

In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-1_fin-r1_12.pt")

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  42%|████▏     | 42/100 [00:18<00:25,  2.30epoch/s, Train RMSE=0.0525, Test RMSE=0.1000]

RMSE: 0.1000
MAE:  0.0594





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-1_fin-r1_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.44epoch/s, Train RMSE=0.0517, Test RMSE=0.1005]

RMSE: 0.1005
MAE:  0.0596





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-1_qwen_12.pt")

Epoch:  52%|█████▏    | 52/100 [00:22<00:20,  2.31epoch/s, Train RMSE=0.0507, Test RMSE=0.1017]

RMSE: 0.1017
MAE:  0.0604





In [None]:
timeseries_df = create_dataset('Weekly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=24, tone=True)
model = train_model(train_loader, test_loader)
torch.save(model.state_dict(), "Model/zs_multiclass-1_qwen_24.pt")

Epoch:  60%|██████    | 60/100 [00:24<00:16,  2.42epoch/s, Train RMSE=0.0517, Test RMSE=0.1012]

RMSE: 0.1012
MAE:  0.0601





# ICL Monthly

In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=False)
model = train_model(train_loader, test_loader)

Epoch:  70%|███████   | 70/100 [00:06<00:02, 10.28epoch/s, Train RMSE=0.1068, Test RMSE=0.1562]


RMSE: 0.1562
MAE:  0.1132


In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=False)
model = train_model(train_loader, test_loader)

Epoch:  42%|████▏     | 42/100 [00:03<00:04, 11.88epoch/s, Train RMSE=0.0998, Test RMSE=0.1554]

RMSE: 0.1554
MAE:  0.1097





## ICL Multilabel

In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   1%|          | 1/100 [00:00<00:09,  9.96epoch/s, Train RMSE=0.4371, Test RMSE=0.3875]

Epoch:  49%|████▉     | 49/100 [00:04<00:04, 10.27epoch/s, Train RMSE=0.1103, Test RMSE=0.1555]

RMSE: 0.1555
MAE:  0.1116





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.97epoch/s, Train RMSE=0.1000, Test RMSE=0.1574]

RMSE: 0.1574
MAE:  0.1117





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  49%|████▉     | 49/100 [00:04<00:04, 10.25epoch/s, Train RMSE=0.1104, Test RMSE=0.1560]

RMSE: 0.1560
MAE:  0.1115





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)



Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.99epoch/s, Train RMSE=0.1001, Test RMSE=0.1568]

RMSE: 0.1568
MAE:  0.1113





## ICL Multiclass-2

In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  49%|████▉     | 49/100 [00:04<00:04, 10.27epoch/s, Train RMSE=0.1101, Test RMSE=0.1547]

RMSE: 0.1547
MAE:  0.1111





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.97epoch/s, Train RMSE=0.1004, Test RMSE=0.1566]

RMSE: 0.1566
MAE:  0.1109





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  51%|█████     | 51/100 [00:05<00:04, 10.18epoch/s, Train RMSE=0.1099, Test RMSE=0.1545]

RMSE: 0.1545
MAE:  0.1111





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s, Train RMSE=0.4563, Test RMSE=0.3789]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.94epoch/s, Train RMSE=0.0998, Test RMSE=0.1569]

RMSE: 0.1569
MAE:  0.1118





## ICL Multiclass-1

In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  41%|████      | 41/100 [00:04<00:05, 10.22epoch/s, Train RMSE=0.1111, Test RMSE=0.1587]

RMSE: 0.1587
MAE:  0.1139





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.87epoch/s, Train RMSE=0.0998, Test RMSE=0.1576]

RMSE: 0.1576
MAE:  0.1115





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  51%|█████     | 51/100 [00:04<00:04, 10.27epoch/s, Train RMSE=0.1107, Test RMSE=0.1559]

RMSE: 0.1559
MAE:  0.1117





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_icl_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 12.00epoch/s, Train RMSE=0.0998, Test RMSE=0.1580]

RMSE: 0.1580
MAE:  0.1121





# ZS Monthly

## zs Multilabel

In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  49%|████▉     | 49/100 [00:04<00:05, 10.15epoch/s, Train RMSE=0.1102, Test RMSE=0.1555]

RMSE: 0.1555
MAE:  0.1115





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.91epoch/s, Train RMSE=0.1002, Test RMSE=0.1571]

RMSE: 0.1571
MAE:  0.1115





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  49%|████▉     | 49/100 [00:04<00:04, 10.22epoch/s, Train RMSE=0.1100, Test RMSE=0.1555]

RMSE: 0.1555
MAE:  0.1115





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multilabel_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.99epoch/s, Train RMSE=0.1005, Test RMSE=0.1567]

RMSE: 0.1567
MAE:  0.1109





## ZS Multiclass-2

In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  45%|████▌     | 45/100 [00:04<00:05, 10.25epoch/s, Train RMSE=0.1112, Test RMSE=0.1577]

RMSE: 0.1577
MAE:  0.1135





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  55%|█████▌    | 55/100 [00:04<00:03, 11.97epoch/s, Train RMSE=0.1013, Test RMSE=0.1574]

RMSE: 0.1574
MAE:  0.1116





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  49%|████▉     | 49/100 [00:04<00:04, 10.24epoch/s, Train RMSE=0.1103, Test RMSE=0.1549]


RMSE: 0.1549
MAE:  0.1104


In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-2_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.99epoch/s, Train RMSE=0.1008, Test RMSE=0.1568]

RMSE: 0.1568
MAE:  0.1106





## ZS Multiclass-1

In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  49%|████▉     | 49/100 [00:04<00:04, 10.27epoch/s, Train RMSE=0.1103, Test RMSE=0.1550]

RMSE: 0.1550
MAE:  0.1108





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_fin-r1.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.99epoch/s, Train RMSE=0.1006, Test RMSE=0.1573]

RMSE: 0.1573
MAE:  0.1110





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=6, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s]

Epoch:  45%|████▌     | 45/100 [00:04<00:05, 10.23epoch/s, Train RMSE=0.1112, Test RMSE=0.1578]

RMSE: 0.1578
MAE:  0.1132





In [None]:
timeseries_df = create_dataset('Monthly_Stock_Price.csv', 'DisclosureTone/tone_zs_multiclass-1_qwen.csv')
input_dim, train_loader, test_loader = create_dataloader(timeseries_df, n_lags=12, tone=True)
model = train_model(train_loader, test_loader)

Epoch:   0%|          | 0/100 [00:00<?, ?epoch/s, Train RMSE=0.4570, Test RMSE=0.3797]

Epoch:  59%|█████▉    | 59/100 [00:04<00:03, 11.99epoch/s, Train RMSE=0.1003, Test RMSE=0.1571]

RMSE: 0.1571
MAE:  0.1111



