In [None]:
import pandas as pd
import numpy as np
from darts import TimeSeries
from darts.models import TiDEModel
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pandas_ta as ta

In [188]:
# Load and preprocess data for multiple stocks
file_paths = ["wavelet_reconstructed/Chunghwa_wavelet_reconstructed_only.csv",
              "wavelet_reconstructed/FET_wavelet_reconstructed_only.csv",
              "wavelet_reconstructed/Syscom_wavelet_reconstructed_only.csv"
]

stock_names = ["Chunghua", "FET", "Syscom"]

stock_data = []

for file_path in file_paths:
    data = pd.read_csv(file_path, parse_dates=["Date"], index_col="Date")
    data.columns = ["Adj Close", "Close", "High", "Low", "Open", "Volume"]

    # Feature engineering
    data['MA_5'] = data['Close'].rolling(window=5).mean()
    data['MA_10'] = data['Close'].rolling(window=10).mean()
    data['RSI'] = ta.rsi(data['Close'], length=14)
    data['Volume_MA_5'] = data['Volume'].rolling(window=5).mean()
    data['Price_Range'] = data['High'] - data['Low']
    data['Daily_Return'] = ((data['Close'] - data['Open']) / data['Open']) * 100
    data['Volume_Change'] = data['Volume'].pct_change() * 100

    # Drop NaN values and retain numerical columns
    data = data.dropna().select_dtypes(include=[np.number]).astype(np.float32)
    stock_data.append(data)

In [189]:
# Align data by date and ensure the same length
aligned_data = pd.concat(stock_data, axis=1, keys=[f"{stock_names[i]}" for i in range(len(stock_data))])
aligned_data = aligned_data.dropna()

# Flatten multi-index columns for simplicity
aligned_data.columns = ['_'.join(col) for col in aligned_data.columns]

# Define features and target (for all stocks)
features = [col for col in aligned_data.columns if not col.endswith('_Close')]
targets = [col for col in aligned_data.columns if col.endswith('_Close')]

# Scale data
scaler = MinMaxScaler()
scaled_data = pd.DataFrame(scaler.fit_transform(aligned_data), columns=aligned_data.columns, index=aligned_data.index)

# Convert data to TimeSeries (multi-variate)
multi_series = TimeSeries.from_dataframe(scaled_data, freq='B', fill_missing_dates=True, fillna_value=0)


In [None]:
# Split data into training, validation, and test sets
train_size = int(len(multi_series) * 0.9)

train_series = multi_series[:train_size]
test_series = multi_series[train_size:]

In [None]:
# TiDE model setup and training
model = TiDEModel(input_chunk_length=30, output_chunk_length=5)
model.fit(train_series)

In [None]:
# Define separate scalers for each target
target_scalers = {target: MinMaxScaler() for target in targets}

# Scale individual targets
for target in targets:
    scaled_data[target] = target_scalers[target].fit_transform(aligned_data[[target]])

# Convert data to TimeSeries (multi-variate)
multi_series = TimeSeries.from_dataframe(scaled_data, freq='B', fill_missing_dates=False, fillna_value=0)

# Test predictions
y_test_pred_scaled = model.predict(n=len(test_series))

# Calculate test MSE for each stock
test_mse = {}
for i, target in enumerate(targets):
    # Inverse transform the specific target
    actual_values = target_scalers[target].inverse_transform(test_series.univariate_component(i).values())
    predicted_values = target_scalers[target].inverse_transform(y_test_pred_scaled.univariate_component(i).values())
    mse = mean_squared_error(actual_values.flatten(), predicted_values.flatten())
    test_mse[target] = mse

print("Test MSE for each stock:")
for stock, mse in test_mse.items():
    print(f"{stock}: {mse:.4f}")

In [None]:
# Plot test predictions and actual values for each stock
plt.figure(figsize=(10, 8))

for i, target in enumerate(targets):
    # Inverse transform the actual and predicted values for the current stock
    actual_values = target_scalers[target].inverse_transform(test_series.univariate_component(i).values())
    predicted_values = target_scalers[target].inverse_transform(y_test_pred_scaled.univariate_component(i).values())

    # Create a subplot for each stock
    plt.subplot(len(targets), 1, i + 1)
    plt.plot(test_series.time_index, actual_values, label="Actual", color="blue")
    plt.plot(test_series.time_index, predicted_values, label="Predicted", color="orange")
    plt.title(f"{target} - Actual vs Predicted")
    plt.xlabel("Time")
    plt.ylabel("Close Price")
    plt.legend()
    plt.tight_layout()

plt.show()

In [None]:
# Predict the next 5 days
future_predictions_scaled = model.predict(n=5)
future_predictions = {}

for i, target in enumerate(targets):
    # Use the correct scaler for the specific target
    future_predictions[target] = target_scalers[target].inverse_transform(
        future_predictions_scaled.univariate_component(i).values().reshape(-1, 1)
    ).flatten()

# Save predictions to CSV
future_dates = pd.date_range(start=aligned_data.index[-1] + pd.Timedelta(days=1), periods=5, freq='B')
future_df = pd.DataFrame(future_predictions, index=future_dates)

# Ensure DataFrame columns are meaningful stock names
future_df.columns = [f"Predicted_{col}" for col in targets]

future_df.to_csv("future_predictions_multi_stock.csv", index_label="Date")

# Print predictions
print("Predicted Close Prices for the Next 5 Days:")
print(future_df)