# Model and dataset Folder

# External datasets created - merged, train and test

In [None]:
# Perform an inner join to merge on 'Symbol' (this only keeps rows where there is a match)
merged_data = pd.merge(company_stock_details, company_info, on='Symbol', how='inner')

# Save the merged data into a new CSV file
merged_data.to_csv('merged_company_data.csv', index=False)

In [None]:
# Sort by date to maintain chronological order
merged_data.sort_values(by=['Symbol', 'Date'], inplace=True)

# Define the split date
train_data = merged_data[merged_data['Date'] < '2022-01-01']
test_data = merged_data[merged_data['Date'] >= '2022-01-01']

train_data.to_csv('train_data.csv', index=False)
test_data.to_csv('test_data.csv', index=False)

# Load libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.holtwinters import ExponentialSmoothing
from sklearn.metrics import root_mean_squared_error
import torch
import torch.nn as nn
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from prophet import Prophet
from statsmodels.tsa.arima.model import ARIMA

# ARIMA model

In [None]:
model = ARIMA(train_data, order=(1, 1, 3)).fit()
forecast = model.forecast(steps=len(test_data))

# Holt-winter method model

In [None]:
model = ExponentialSmoothing(
    train_data,
    trend='add',
    seasonal='add',
    seasonal_periods=7
).fit()

forecast = model.forecast(steps=len(test_data))

# LSTM model

In [None]:
model = AttentionLSTM(
    input_size=input_size,
    hidden_layer_size=hidden_layer_size,
    output_size=1,
    num_layers=num_layers
)

model.fit(
    X_train_tensor,
    y_train_tensor,
    epochs=epochs,
    batch_size=batch_size,
    validation_data=(X_val_tensor, y_val_tensor),
    callbacks=[early_stopping, scheduler]
)

forecast = []
current_sequence = X_val[0]
with torch.no_grad():
    for i in range(len(X_val)):
        current_sequence_tensor = torch.from_numpy(current_sequence).float().unsqueeze(0)
        prediction = model(current_sequence_tensor)
        forecast.append(prediction.item())
        new_prediction = np.array([[prediction.item()] + [0] * (current_sequence.shape[1] - 1)])
        current_sequence = np.vstack((current_sequence[1:], new_prediction))

# prophet model

In [None]:
def create_forecast(df, company, use_regressors=False):
    df_prophet = df[['Date', 'Close']].rename(columns={'Date': 'ds', 'Close': 'y'})

    model = Prophet(
        yearly_seasonality=False,
        weekly_seasonality=False,
        holidays=holidays,
        seasonality_mode='multiplicative',
        changepoint_prior_scale=0.05
    )

    for name, params in custom_seasonalities.items():
        model.add_seasonality(name=name, period=params['period'], fourier_order=params['fourier_order'])

    if use_regressors:
        df_prophet['News - Positive Sentiment'] = df['News - Positive Sentiment'].fillna(0)
        df_prophet['News - Negative Sentiment'] = df['News - Negative Sentiment'].fillna(0)
        model.add_regressor('News - Positive Sentiment')
        model.add_regressor('News - Negative Sentiment')

    model.fit(df_prophet)
    future = model.make_future_dataframe(periods=future_periods)

    if use_regressors:
        future = future.merge(df_prophet[['ds', 'News - Positive Sentiment', 'News - Negative Sentiment']], on='ds', how='left')
        future['News - Positive Sentiment'] = future['News - Positive Sentiment'].fillna(0)
        future['News - Negative Sentiment'] = future['News - Negative Sentiment'].fillna(0)

    forecast = model.predict(future)
    
    return forecast
