# COMPARING THE PERFORMANCE BETWEEN MODELS

In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns  # for nicer plots

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow import keras

from catboost import CatBoostClassifier
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

from sklearn.metrics import accuracy_score

In [2]:
def preprocess_data(data):
    lag = 7
    result = data.copy()
    result['ema'] = result['close'].ewm(span=lag).mean()
    for i in range(1, lag+1):
        result['lag_' + str(i)] = result['close'].shift(i)

    result = result.iloc[lag:]
    # Assuming df is your DataFrame and it has 'day' and 'month' columns
    result['month_sin'] = np.sin(2 * np.pi * result['month']/12)
    result['month_cos'] = np.cos(2 * np.pi * result['month']/12)
    result['day_sin'] = np.sin(2 * np.pi * result['day']/31)
    result['day_cos'] = np.cos(2 * np.pi * result['day']/31)
    target = result['is_up']
    result.drop(['is_up', 'Date', 'news', 'day', 'month', 'total_vol', 'mean_vol', 'std_vol', 'open', 'lowest', 'highest'], inplace=True, axis=1)
    return result, target

def train_test_split(data, target, split = [0.8, 0.2]):
    train_idx, test_idx = split
    n = data.shape[0]
    train_end = int(n * train_idx)
    x_train, y_train = data[:train_end], target[:train_end]
    x_test, y_test = data[train_end:], target[train_end:]
    return x_train, y_train, x_test, y_test

In [3]:
apple_results = pd.DataFrame({'ML Algo' : [], 'Train Accuracy' : [], 'Test Accuracy' : []})
tesla_results = pd.DataFrame({'ML Algo' : [], 'Train Accuracy' : [], 'Test Accuracy' : []})
apple_data = pd.read_csv("apple_processed_data.csv", index_col=0)
tesla_data = pd.read_csv("tesla_processed_data.csv", index_col=0)

processed_apple, target_apple = preprocess_data(apple_data)
processed_tesla, target_tesla = preprocess_data(tesla_data)

apple_sc = MinMaxScaler()
tesla_sc = MinMaxScaler()

x_train_apple, y_train_apple, x_test_apple, y_test_apple = train_test_split(processed_apple, target_apple)
x_train_tesla, y_train_tesla, x_test_tesla, y_test_tesla = train_test_split(processed_tesla, target_tesla)

x_train_apple = apple_sc.fit_transform(x_train_apple)
x_test_apple = apple_sc.transform(x_test_apple)

x_train_apple = tesla_sc.fit_transform(x_train_tesla)
x_test_tesla = apple_sc.transform(x_test_tesla)

random_state = 7

## Baseline model

In [288]:
def add_result(y_train_true, y_test_true, y_train_pred, y_test_pred, result: pd.DataFrame, model_name):
    train_acc = accuracy_score(y_train_true, y_train_pred)
    test_acc = accuracy_score(y_test_true, y_test_pred)

    return pd.concat([result, pd.DataFrame({
                'ML Algo': [model_name], 
                'Train Accuracy': [train_acc], 
                'Test Accuracy': [test_acc]})], ignore_index=True)
    

In [289]:
bl_y_train_pred = [1] * y_train_apple.shape[0]
bl_y_test_pred = [1] * y_test_apple.shape[0]

apple_results = add_result(y_train_apple, y_test_apple, bl_y_train_pred, bl_y_test_pred, apple_results, "Baseline")
apple_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018


In [290]:
bl_y_train_pred = [1] * y_train_tesla.shape[0]
bl_y_test_pred = [1] * y_test_tesla.shape[0]

tesla_results = add_result(y_train_tesla, y_test_tesla, bl_y_train_pred, bl_y_test_pred, tesla_results, "Baseline")
tesla_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514


## Logistic Regression

In [291]:
# Apple
lr_apple = LogisticRegression(penalty='l2', max_iter=1000, random_state=random_state)
lr_apple.fit(x_train_apple, y_train_apple)
y_train_pred = lr_apple.predict(x_train_apple)
y_test_pred = lr_apple.predict(x_test_apple)

apple_results = add_result(y_train_apple, y_test_apple, y_train_pred, y_test_pred, apple_results, "Logistic Regression")
apple_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018
1,Logistic Regression,0.539503,0.477477


In [292]:
# Tesla
lr_tesla = LogisticRegression(penalty='l2', max_iter=1000, random_state=random_state)
lr_tesla.fit(x_train_tesla, y_train_tesla)
y_train_pred = lr_apple.predict(x_train_tesla)
y_test_pred = lr_apple.predict(x_test_tesla)

tesla_results = add_result(y_train_tesla, y_test_tesla, y_train_pred, y_test_pred, tesla_results, "Logstic Regression")
tesla_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514
1,Logstic Regression,0.477991,0.490991


## Random Forest

In [293]:
rf_apple = RandomForestClassifier(n_estimators=40, criterion='gini', bootstrap=False, random_state=random_state)
rf_apple.fit(x_train_apple, y_train_apple)
y_train_pred = rf_apple.predict(x_train_apple)
y_test_pred = rf_apple.predict(x_test_apple)

apple_results = add_result(y_train_apple, y_test_apple, y_train_pred, y_test_pred, apple_results, "Random Forest")
apple_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018
1,Logistic Regression,0.539503,0.477477
2,Random Forest,1.0,0.495495


In [294]:
rf_tesla = RandomForestClassifier(n_estimators=40, criterion='gini', bootstrap=False, random_state=random_state)
rf_tesla.fit(x_train_tesla, y_train_tesla)
y_train_pred = rf_apple.predict(x_train_tesla)
y_test_pred = rf_apple.predict(x_test_tesla)

tesla_results = add_result(y_train_tesla, y_test_tesla, y_train_pred, y_test_pred, tesla_results, "Random Forest")
tesla_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514
1,Logstic Regression,0.477991,0.490991
2,Random Forest,0.48307,0.493243


## CatBoost

In [295]:
catboost_apple = CatBoostClassifier(iterations=802, depth=8, learning_rate=0.211, random_strength=2, bagging_temperature=0.5197456418335619, 
                                    l2_leaf_reg = 0.6986463160504206, border_count = 7, random_state=random_state)
catboost_apple.fit(x_train_apple, y_train_apple, early_stopping_rounds=100, verbose=0)
y_train_pred = catboost_apple.predict(x_train_apple)
y_test_pred = catboost_apple.predict(x_test_apple)

apple_results = add_result(y_train_apple, y_test_apple, y_train_pred, y_test_pred, apple_results, "CatBoost")
apple_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018
1,Logistic Regression,0.539503,0.477477
2,Random Forest,1.0,0.495495
3,CatBoost,0.93623,0.47973


In [296]:
catboost_tesla = CatBoostClassifier(iterations=802, depth=8, learning_rate=0.211, random_strength=2, bagging_temperature=0.5197456418335619, 
                                    l2_leaf_reg = 0.6986463160504206, border_count = 7, random_state=random_state)
catboost_tesla.fit(x_train_tesla, y_train_tesla, early_stopping_rounds=100, verbose=0)
y_train_pred = catboost_tesla.predict(x_train_tesla)
y_test_pred = catboost_tesla.predict(x_test_tesla)

tesla_results = add_result(y_train_tesla, y_test_tesla, y_train_pred, y_test_pred, tesla_results, "Catboost")
tesla_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514
1,Logstic Regression,0.477991,0.490991
2,Random Forest,0.48307,0.493243
3,Catboost,0.931151,0.522523


## LSTM

In [297]:
# Apple
keras.utils.set_random_seed(random_state)
x_train_lstm_apple = np.reshape(x_train_apple, (x_train_apple.shape[0], x_train_apple.shape[1], 1))

def create_lstm(units, x_train_lstm):
    model = tf.keras.Sequential()
    model.add(LSTM(units, return_sequences=True, input_shape=(x_train_lstm.shape[1], 1)))
    model.add(LSTM(units))
    model.add(Dropout(0.2))
    model.add(Dense(units=units//2, activation = 'tanh'))
    model.add(Dense(1, activation = 'sigmoid'))
    return model

model = create_lstm(43, x_train_lstm_apple)
optimizer = Adagrad(learning_rate=0.006957392490504836)
model.compile(optimizer='adagrad', loss="binary_crossentropy", metrics=['accuracy'])
model.fit(x_train_lstm_apple, y_train_apple, batch_size=13, epochs=1, verbose=0)
threshold = 0.5
y_test_pred = (model.predict(x_test_apple) >= threshold).astype(int)
y_train_pred = (model.predict(x_train_apple) >= threshold).astype(int)

apple_results = add_result(y_train_apple, y_test_apple, y_train_pred, y_test_pred, apple_results, "LSTM")
apple_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018
1,Logistic Regression,0.539503,0.477477
2,Random Forest,1.0,0.495495
3,CatBoost,0.93623,0.47973
4,LSTM,0.528217,0.518018


In [298]:
x_train_lstm_tesla = np.reshape(x_train_tesla, (x_train_tesla.shape[0], x_train_tesla.shape[1], 1))
model = create_lstm(43, x_train_lstm_tesla)
optimizer = Adagrad(learning_rate=0.006957392490504836)
model.compile(optimizer='adagrad', loss="binary_crossentropy", metrics=['accuracy'])
model.fit(x_train_lstm_tesla, y_train_tesla, batch_size=13, epochs=1, verbose=0)
threshold = 0.5
y_test_pred = (model.predict(x_test_tesla) >= threshold).astype(int)
y_train_pred = (model.predict(x_train_tesla) >= threshold).astype(int)

tesla_results = add_result(y_train_tesla, y_test_tesla, y_train_pred, y_test_pred, tesla_results, "LSTM")
tesla_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514
1,Logstic Regression,0.477991,0.490991
2,Random Forest,0.48307,0.493243
3,Catboost,0.931151,0.522523
4,LSTM,0.522009,0.497748


## Neural Networks

In [299]:
def create_neural_network_model(x_train_scaled):
    nn = tf.keras.Sequential()
    nn.add(Dense(32, activation='relu', input_shape=(len(x_train_scaled[0]),)))
    nn.add(Dense(1, activation = "sigmoid"))
    return nn

model = create_neural_network_model(x_train_lstm_apple)
optimizer = SGD(learning_rate=0.010489360088417304)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy'])
model.fit(x_train_lstm_apple, y_train_apple, batch_size=12, epochs=3, verbose=0)
threshold = 0.5
y_test_pred = (model.predict(x_test_apple) >= threshold).astype(int)
y_train_pred = (model.predict(x_train_apple) >= threshold).astype(int)

apple_results = add_result(y_train_apple, y_test_apple, y_train_pred, y_test_pred, apple_results, "Neural Network")
apple_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018
1,Logistic Regression,0.539503,0.477477
2,Random Forest,1.0,0.495495
3,CatBoost,0.93623,0.47973
4,LSTM,0.528217,0.518018
5,Neural Network,0.527088,0.486486


In [300]:
model = create_neural_network_model(x_train_lstm_tesla)
optimizer = SGD(learning_rate=0.010489360088417304)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy'])
model.fit(x_train_lstm_tesla, y_train_tesla, batch_size=13, epochs=1, verbose=0)
threshold = 0.5
y_test_pred = (model.predict(x_test_tesla) >= threshold).astype(int)
y_train_pred = (model.predict(x_train_tesla) >= threshold).astype(int)

tesla_results = add_result(y_train_tesla, y_test_tesla, y_train_pred, y_test_pred, tesla_results, 'Neural Network')
tesla_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514
1,Logstic Regression,0.477991,0.490991
2,Random Forest,0.48307,0.493243
3,Catboost,0.931151,0.522523
4,LSTM,0.522009,0.497748
5,Neural Network,0.520316,0.529279


## LSTM - GRU

In [301]:
def create_lstm_gru(units, x_train_lstm):
    model = tf.keras.Sequential()
    model.add(LSTM(units, return_sequences=True, input_shape=(x_train_lstm.shape[1], 1)))
    model.add(Dropout(0.2))
    model.add(GRU(units))
    model.add(Dropout(0.2))
    model.add(Dense(units//2, activation = "tanh"))
    model.add(Dense(1, activation = 'sigmoid'))
    return model

model = create_lstm_gru(41, x_train_lstm_apple)
optimizer = SGD(learning_rate=0.011238488176878645)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy'])
model.fit(x_train_lstm_apple, y_train_apple, batch_size=2, epochs=1, verbose=0)
threshold = 0.5
y_test_pred = (model.predict(x_test_apple) >= threshold).astype(int)
y_train_pred = (model.predict(x_train_apple) >= threshold).astype(int)

apple_results = add_result(y_train_apple, y_test_apple, y_train_pred, y_test_pred, apple_results, "LSTM - GRU")
apple_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018
1,Logistic Regression,0.539503,0.477477
2,Random Forest,1.0,0.495495
3,CatBoost,0.93623,0.47973
4,LSTM,0.528217,0.518018
5,Neural Network,0.527088,0.486486
6,LSTM - GRU,0.528217,0.518018


In [302]:
model = create_neural_network_model(x_train_lstm_tesla)
optimizer = SGD(learning_rate=0.011238488176878645)
model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=['accuracy'])
model.fit(x_train_lstm_tesla, y_train_tesla, batch_size=2, epochs=1, verbose=0)
threshold = 0.5
y_test_pred = (model.predict(x_test_tesla) >= threshold).astype(int)
y_train_pred = (model.predict(x_train_tesla) >= threshold).astype(int)

tesla_results = add_result(y_train_tesla, y_test_tesla, y_train_pred, y_test_pred, tesla_results, 'LSTM - GRU')
tesla_results



Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514
1,Logstic Regression,0.477991,0.490991
2,Random Forest,0.48307,0.493243
3,Catboost,0.931151,0.522523
4,LSTM,0.522009,0.497748
5,Neural Network,0.520316,0.529279
6,LSTM - GRU,0.522009,0.488739


## Result

### Apple

In [304]:
apple_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.528217,0.518018
1,Logistic Regression,0.539503,0.477477
2,Random Forest,1.0,0.495495
3,CatBoost,0.93623,0.47973
4,LSTM,0.528217,0.518018
5,Neural Network,0.527088,0.486486
6,LSTM - GRU,0.528217,0.518018


### Tesla 

In [305]:
tesla_results

Unnamed: 0,ML Algo,Train Accuracy,Test Accuracy
0,Baseline,0.522009,0.513514
1,Logstic Regression,0.477991,0.490991
2,Random Forest,0.48307,0.493243
3,Catboost,0.931151,0.522523
4,LSTM,0.522009,0.497748
5,Neural Network,0.520316,0.529279
6,LSTM - GRU,0.522009,0.488739
