In [19]:
import xgboost 
from xgboost import XGBClassifier
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

In [20]:
data = pd.read_csv('filtered_tickers.csv')
features = [
 'Date',
 'Ticker',
 'close_lag', 
 'close_mean_10_days',
 'close_std_10_days',
 'close_max_10_days',
 'close_mean_30_days',
 'close_std_30_days',
 'close_max_30_days',
 'EMA_close_10_days', 
 'EMA_close_30_days',
 'bb_upper_20',
 'bb_lower_20',
 'bb_middle_20',
 'bb_upper_50',
 'bb_lower_50',
 'bb_middle_50',
 'timestamp',
 'Target',
 'EPS_Estimate',
 'EPS_Actual', 
 'EPS_Surprise', 
 'Surprise(%)'
 ]
data = data[features]

In [21]:
data['Ticker'].unique()

array(['AAPL', 'AMD', 'QCOM'], dtype=object)

In [22]:
data.fillna(value=0, inplace=True)

In [23]:
data['Date'] = pd.to_datetime(data['Date'])
data.set_index('Date', inplace=True)

In [24]:
aapl_data = data[data['Ticker'] == 'AAPL']
amd_data = data[data['Ticker'] == 'AMD']
qualcomm_data = data[data['Ticker'] == 'QCOM']
aapl_data['direction'] = (aapl_data['Target'].shift(-1) > aapl_data['Target']).astype(int)
amd_data['direction'] = (amd_data['Target'].shift(-1) > amd_data['Target']).astype(int)
qualcomm_data['direction'] = (qualcomm_data['Target'].shift(-1) > qualcomm_data['Target']).astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  aapl_data['direction'] = (aapl_data['Target'].shift(-1) > aapl_data['Target']).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  amd_data['direction'] = (amd_data['Target'].shift(-1) > amd_data['Target']).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  qualcomm_data['direction']

In [30]:
X_aapl = aapl_data.drop(columns=['Target', 'Ticker', 'timestamp', 'direction'])
y_aapl = aapl_data['direction']
X_train_aapl, X_test_aapl, y_train_aapl, y_test_aapl = train_test_split(X_aapl, y_aapl, test_size=0.2, shuffle=False)

X_amd = amd_data.drop(columns= ['Target','Ticker', 'timestamp', 'direction'])
y_amd = amd_data['direction']
X_train_amd, X_test_amd, y_train_amd, y_test_amd = train_test_split(X_amd, y_amd, test_size=0.2, shuffle=False)

X_qualcomm = qualcomm_data.drop(columns= ['Target','Ticker', 'timestamp', 'direction'])
y_qualcomm = qualcomm_data['direction']
X_train_qualcomm, X_test_qualcomm, y_train_qualcomm, y_test_qualcomm = train_test_split(X_qualcomm, y_qualcomm, test_size=0.2, shuffle=False)

In [31]:
scaler_qualcomm = MinMaxScaler()
X_train_qualcomm_scaled = scaler_qualcomm.fit_transform(X_train_qualcomm)
X_test_qualcomm_scaled = scaler_qualcomm.transform(X_test_qualcomm)

scaler_amd = MinMaxScaler()
X_train_amd_scaled = scaler_amd.fit_transform(X_train_amd)
X_test_amd_scaled = scaler_amd.transform(X_test_amd)

scaler_aapl = MinMaxScaler()
X_train_aapl_scaled = scaler_aapl.fit_transform(X_train_aapl)
X_test_aapl_scaled = scaler_aapl.transform(X_test_aapl)

Model training

In [34]:
aapl_xgb_model = XGBClassifier(
    n_estimators=1600, 
    max_depth=9, 
    learning_rate=0.05, 
    scale_pos_weight = 3, 
    random_state=42, 
    reg_alpha=0.7,
    reg_lambda=0.8,
    gamma=0.02)
aapl_xgb_model.fit(X_train_aapl_scaled, y_train_aapl)
y_pred_aapl = aapl_xgb_model.predict(X_test_aapl_scaled)
print(classification_report(y_test_aapl, y_pred_aapl))
print(confusion_matrix(y_test_aapl, y_pred_aapl))

              precision    recall  f1-score   support

           0       0.50      0.65      0.57       228
           1       0.62      0.46      0.53       275

    accuracy                           0.55       503
   macro avg       0.56      0.56      0.55       503
weighted avg       0.56      0.55      0.55       503

[[149  79]
 [148 127]]


In [36]:
amd_xgb_model = XGBClassifier(
    n_estimators=1200,
    learning_rate=0.025,
    max_depth=8,
    min_child_weight=8,
    gamma=0.6,
    subsample=0.65,
    colsample_bytree=0.65,
    scale_pos_weight=0.83,
    reg_alpha=1.4,
    reg_lambda=8,
    random_state=42
)
amd_xgb_model.fit(X_train_amd_scaled, y_train_amd, verbose=True)
y_pred_googl = amd_xgb_model.predict(X_test_amd_scaled)
print("AMD Stock Prediction:")
print(classification_report(y_test_amd, y_pred_googl))
print(confusion_matrix(y_test_amd, y_pred_googl))

AMD Stock Prediction:
              precision    recall  f1-score   support

           0       0.52      0.80      0.63       255
           1       0.55      0.25      0.35       248

    accuracy                           0.53       503
   macro avg       0.54      0.53      0.49       503
weighted avg       0.54      0.53      0.49       503

[[204  51]
 [185  63]]


In [37]:
qualcomm_xgb_model = XGBClassifier(
    n_estimators=1600,
    learning_rate=0.02,
    max_depth=6,
    min_child_weight=3,
    gamma=0.15,
    subsample=0.85,
    colsample_bytree=0.85,
    scale_pos_weight=1.2,
    reg_alpha=0.7,
    reg_lambda=6,
    random_state=42
)
qualcomm_xgb_model.fit(X_train_qualcomm_scaled, y_train_qualcomm)
y_pred_qualcomm = qualcomm_xgb_model.predict(X_test_qualcomm_scaled)
print("Qualcomm Stock Prediction:")
print(classification_report(y_test_qualcomm, y_pred_qualcomm))
print(confusion_matrix(y_test_qualcomm, y_pred_qualcomm))

Qualcomm Stock Prediction:
              precision    recall  f1-score   support

           0       0.48      0.49      0.48       233
           1       0.55      0.54      0.55       270

    accuracy                           0.52       503
   macro avg       0.51      0.52      0.51       503
weighted avg       0.52      0.52      0.52       503

[[114 119]
 [124 146]]
