# CPE 695 -  Applied Machine Learning: Final Project
## Stock Prediction using MLP

### Name: Johnny Guamanquispe

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score
from sklearn.neural_network import MLPClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

In [2]:
#get the stock data for S&P500 from Yahoo Finance
sp500_data = yf.download(tickers = '^GSPC', start = '2013-08-10',end = '2023-08-10')
sp500_data.head(10)

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2013-08-12,1688.369995,1691.48999,1683.349976,1689.469971,1689.469971,2789160000
2013-08-13,1690.650024,1696.810059,1682.619995,1694.160034,1694.160034,3035560000
2013-08-14,1693.880005,1695.52002,1684.829956,1685.390015,1685.390015,2871430000
2013-08-15,1679.609985,1679.609985,1658.589966,1661.319946,1661.319946,3426690000
2013-08-16,1661.219971,1663.599976,1652.609985,1655.829956,1655.829956,3211450000
2013-08-19,1655.25,1659.180054,1645.839966,1646.060059,1646.060059,2904530000
2013-08-20,1646.810059,1658.920044,1646.079956,1652.349976,1652.349976,2994090000
2013-08-21,1650.660034,1656.98999,1639.430054,1642.800049,1642.800049,2932180000
2013-08-22,1645.030029,1659.550049,1645.030029,1656.959961,1656.959961,2537460000
2013-08-23,1659.920044,1664.849976,1654.810059,1663.5,1663.5,2582670000


In [3]:
#calculate the difference between the closing prices of consecutive days
sp500_data["Diff"] = sp500_data.Close.diff()

#calculate the simple moving average of the closing prices over a window of 2 days
sp500_data["SMA_2"] = sp500_data.Close.rolling(2).mean()

#calculate the force index (product of closing price and volume)
sp500_data["Force_Index"] = sp500_data["Close"] * sp500_data["Volume"]

In [4]:
#create a new column 'y' which is 1 if the difference between consecutive days' 
#closing prices is positive, else 0. Shift it by one day.
sp500_data["y"] = sp500_data["Diff"].apply(lambda x: 1 if x > 0 else 0).shift(-1)

In [5]:
#display the dataset with all columns
sp500_data.head(10)

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,Diff,SMA_2,Force_Index,y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2013-08-12,1688.369995,1691.48999,1683.349976,1689.469971,1689.469971,2789160000,,,4712202000000.0,1.0
2013-08-13,1690.650024,1696.810059,1682.619995,1694.160034,1694.160034,3035560000,4.690063,1691.815002,5142724000000.0,0.0
2013-08-14,1693.880005,1695.52002,1684.829956,1685.390015,1685.390015,2871430000,-8.77002,1689.775024,4839479000000.0,0.0
2013-08-15,1679.609985,1679.609985,1658.589966,1661.319946,1661.319946,3426690000,-24.070068,1673.35498,5692828000000.0,0.0
2013-08-16,1661.219971,1663.599976,1652.609985,1655.829956,1655.829956,3211450000,-5.48999,1658.574951,5317615000000.0,0.0
2013-08-19,1655.25,1659.180054,1645.839966,1646.060059,1646.060059,2904530000,-9.769897,1650.945007,4781031000000.0,1.0
2013-08-20,1646.810059,1658.920044,1646.079956,1652.349976,1652.349976,2994090000,6.289917,1649.205017,4947285000000.0,0.0
2013-08-21,1650.660034,1656.98999,1639.430054,1642.800049,1642.800049,2932180000,-9.549927,1647.575012,4816985000000.0,1.0
2013-08-22,1645.030029,1659.550049,1645.030029,1656.959961,1656.959961,2537460000,14.159912,1649.880005,4204470000000.0,1.0
2013-08-23,1659.920044,1664.849976,1654.810059,1663.5,1663.5,2582670000,6.540039,1660.22998,4296272000000.0,0.0


In [6]:
#drop columns that are not needed for training and testing the model and drop rows with missing values
sp500_data = sp500_data.drop(
   ["Open", "High", "Low", "Close", "Volume", "Diff", "Adj Close"],
   axis=1,
).dropna()

In [7]:
#display the dataset without dropped columns
sp500_data.head(10)

Unnamed: 0_level_0,SMA_2,Force_Index,y
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2013-08-13,1691.815002,5142724000000.0,0.0
2013-08-14,1689.775024,4839479000000.0,0.0
2013-08-15,1673.35498,5692828000000.0,0.0
2013-08-16,1658.574951,5317615000000.0,0.0
2013-08-19,1650.945007,4781031000000.0,1.0
2013-08-20,1649.205017,4947285000000.0,0.0
2013-08-21,1647.575012,4816985000000.0,1.0
2013-08-22,1649.880005,4204470000000.0,1.0
2013-08-23,1660.22998,4296272000000.0,0.0
2013-08-26,1660.140015,4027086000000.0,0.0


In [8]:
# drop the target variable
X = sp500_data.drop(["y"], axis=1).values
y = sp500_data["y"].values

#split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False,
)

In [9]:
#create a pipeline that scales the data and trains an MLPClassifier model with custom parameters
model = make_pipeline(
    StandardScaler(),
    MLPClassifier(
        random_state=0,
        hidden_layer_sizes=(128, 64, 32),
        activation='relu',
        solver='adam',
        alpha=0.0001,
        batch_size='auto',
        learning_rate='constant',
        learning_rate_init=0.001,
        power_t=0.5,
        max_iter=200,
        shuffle=False,
        tol=0.0001,
        verbose=False,
        warm_start=False,
        momentum=0.9,
        nesterovs_momentum=True,
        early_stopping=False,
        validation_fraction=0.1,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-08,
    )
)

In [10]:
#fit the model
model.fit(X_train,y_train,)

In [11]:
#show the score
y_pred = model.predict(X_test)
accuracy = (accuracy_score(y_test, y_pred))
precision = precision_score(y_test, y_pred)

print("accuracy: ", accuracy)
print("precision: ", precision)


accuracy:  0.4831013916500994
precision:  0.4703196347031963
