### dependencies

In [9]:

import pandas as pd
import numpy as np
from pandas import read_csv, Series
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error



In [10]:
# Define global variables
target_variable = 'Adj Close'
lag_steps = 3


In [11]:

df = read_csv('../../data/clean/djia_stock_data.csv',index_col=0)
df.dropna(inplace=True)
print(df.shape)
df = df.iloc[:,:31]


(2516, 180)


In [12]:
newDfForApple = df[['Adj Close']].copy()
newDfForApple.index.name = 'Date'
newDfForApple.drop(index= newDfForApple.index[0], inplace=True)
newDfForApple.index = pd.to_datetime(newDfForApple.index)
type(newDfForApple.index)

pandas.core.indexes.datetimes.DatetimeIndex

In [13]:
for i in range(1, lag_steps + 1):
    newDfForApple[f'lag_{i}'] = newDfForApple['Adj Close'].shift(i)
newDfForApple.dropna(inplace=True)
newDfForApple

Unnamed: 0_level_0,Adj Close,lag_1,lag_2,lag_3
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2015-01-07,23.96895980834961,23.637510299682617,23.635284423828125,24.3204345703125
2015-01-08,24.889904022216797,23.96895980834961,23.637510299682617,23.635284423828125
2015-01-09,24.916597366333008,24.889904022216797,23.96895980834961,23.637510299682617
2015-01-12,24.30263328552246,24.916597366333008,24.889904022216797,23.96895980834961
2015-01-13,24.518413543701172,24.30263328552246,24.916597366333008,24.889904022216797
...,...,...,...,...
2024-12-23,254.98965454101562,254.21051025390625,249.51565551757812,247.77757263183594
2024-12-24,257.91644287109375,254.98965454101562,254.21051025390625,249.51565551757812
2024-12-26,258.7355041503906,257.91644287109375,254.98965454101562,254.21051025390625
2024-12-27,255.30929565429688,258.7355041503906,257.91644287109375,254.98965454101562


## test split

In [14]:
train_data, test_data = train_test_split(newDfForApple, test_size=0.2, random_state=42) # Adjust test size as needed
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42) # 0.25 of train is 0.2 of total.

## Separating x and y

In [15]:
X_train = train_data.drop('Adj Close', axis=1)
y_train = train_data['Adj Close']
X_val = val_data.drop('Adj Close', axis=1)
y_val = val_data['Adj Close']
X_test = test_data.drop('Adj Close', axis=1)
y_test = test_data['Adj Close']

## Scaling

In [16]:

scaler_X = MinMaxScaler()
X_train = scaler_X.fit_transform(X_train)
X_val = scaler_X.transform(X_val)
X_test = scaler_X.transform(X_test)
scaler_y = MinMaxScaler()
y_train = scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_val = scaler_y.transform(y_val.values.reshape(-1, 1))
y_test = scaler_y.transform(y_test.values.reshape(-1, 1))

In [17]:
# 2. Model Building
model = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', random_state=42, max_iter=500)  

# 3. Model Training
model.fit(X_train, y_train.ravel())

## Model Eval

In [18]:

y_pred = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred.reshape(-1, 1)) 
y_test = scaler_y.inverse_transform(y_test) 

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Squared Error: {mse}")
print(f"Mean Absolute Error: {mae}")

Mean Squared Error: 8.165484613301286
Mean Absolute Error: 2.044259344146165


### Prediction

In [25]:
new_data = pd.DataFrame({'lag_1': [newDfForApple['Adj Close'].iloc[-1]], 'lag_2': [newDfForApple['Adj Close'].iloc[-2]], 'lag_3': [newDfForApple['Adj Close'].iloc[-3]]}) # Example: using the last 3 values
new_data_scaled = scaler_X.transform(new_data)
predictions = model.predict(new_data_scaled)
predictions = scaler_y.inverse_transform(predictions.reshape(-1, 1))
print(f"Prediction: {predictions}")

Prediction: [[250.96302192]]
