In [11]:
import yfinance as yf
import numpy as np
import pandas as pd

In [35]:
SYMBOL = "TSLA"
HISTORY = "10y"

all_day_k = yf.Ticker(SYMBOL).history(period=HISTORY, interval="1d")

# REmove meaningless columns
all_day_k = all_day_k.drop(columns=["Dividends", "Stock Splits"])

#Remove latest row
all_day_k = all_day_k[:-1]

PAST_WIN_LEN = 100
CLASSES = ['Buy', 'Bid']
LABEL_BUY = CLASSES[0]
LABEL_BID = CLASSES[1]

x, y = [], []
for today_i in range(len(all_day_k)):
  # Get day_k in the pass 100-day window and the forward 1-day window
  day_k_past = all_day_k[:today_i + 1]
  day_k_forward = all_day_k[today_i+1:]
  if len(day_k_past) < PAST_WIN_LEN or len(day_k_forward) < 1:
    continue
  day_k_past_win = day_k_past[-PAST_WIN_LEN:]
  day_k_forward_win = day_k_forward[:1]
  
  # Find label
  today_price = day_k_past_win.iloc[-1]["Close"]
  tomorrow_price = day_k_forward_win.iloc[0]["Close"]
  label = LABEL_BUY if tomorrow_price > today_price else LABEL_BID

  # Store
  x.append(day_k_past_win.values)
  y.append(label)

x, y = np.array(x), np.array(y)
all_day_k = yf.Ticker(SYMBOL).history(period=HISTORY, interval="1d")

In [36]:
TRAIN_SPLIT, VAL_SPLIT, TEST_SPLIT = 0.7, 0.2, 0.1

# Take the last portion to be the test dataset
test_split_index = -round(len(x) * TEST_SPLIT)
x_other, x_test = np.split(x, [test_split_index])
y_other, y_test = np.split(y, [test_split_index])

# Shuffle the remaining portion and split into training and validation datasets
train_split_index = round(len(x) * TRAIN_SPLIT)
indexes = np.arange(len(x_other))
np.random.shuffle(indexes)
train_indexes, val_indexes = np.split(indexes, [train_split_index])
x_train, x_val = x_other[train_indexes], x_other[val_indexes]
y_train, y_val = y_other[train_indexes], y_other[val_indexes]

In [37]:
# Show label distribution
label_distribution = pd.DataFrame([{"Dataset": "train",
                                    "Buy": np.count_nonzero(y_train == LABEL_BUY),"Bid": np.count_nonzero(y_train == LABEL_BID)},
                                   {"Dataset": "valid",
                                    "Buy": np.count_nonzero(y_val == LABEL_BUY),"Bid": np.count_nonzero(y_val == LABEL_BID)},
                                   {"Dataset": "test",
                                    "Buy": np.count_nonzero(y_test == LABEL_BUY),"Bid": np.count_nonzero(y_test == LABEL_BID)}])
label_distribution

Unnamed: 0,Dataset,Buy,Bid
0,train,874,819
1,valid,248,235
2,test,130,112


In [38]:
# Balance labewls of test dataset ( Make it fair with equal amount of buy/bid label)
x_test_buy = x_test[y_test == LABEL_BUY]
x_test_bid = x_test[y_test == LABEL_BID]

min_n_label = min(len(x_test_buy), len(x_test_bid))

x_test_buy_even = x_test_buy[np.random.choice(len(x_test_buy), min_n_label, replace=False), :]
x_test_bid_even = x_test_bid[np.random.choice(len(x_test_bid), min_n_label, replace=False), :]
x_test_even = np.vstack([x_test_buy_even, x_test_bid_even])

y_test_even = np.array([LABEL_BUY] * min_n_label + [LABEL_BID] * min_n_label)

pd.DataFrame([{"Dataset": "test",
               "Buy": np.count_nonzero(y_test_even == LABEL_BUY),
               "Bid": np.count_nonzero(y_test_even == LABEL_BID)}])

Unnamed: 0,Dataset,Buy,Bid
0,test,112,112


In [39]:
np.savez("datasets.npz", x_train=x_train, y_train=y_train, x_val = x_val, y_val = y_val, x_test = x_test_even, y_test = y_test_even)


# Construct Model

In [None]:
from tensorflow.keras.layers import Input, Flatten, Dense
from tensorflow.keras.models import Model
# Build model
n_time_steps = x_train.shape[1]
n_features = x_train.shape[2]

# print(n_time_steps)
# print(n_features)

input_layer = Input(shape=(n_time_steps, n_features))
x = Flatten()(input_layer)
x = Dense(256, activation="relu")(x)
x = Dense(256, activation="relu")(x)
output_layer = Dense(len(CLASSES), activation="softmax")(x)

model = Model(inputs= input_layer, outputs = output_layer)

model.summary()

# Train Model

In [56]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical

model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

model_checkpoint = ModelCheckpoint(filepath="best_model.hdf5", monitor="val_loss", save_best_only=True)

early_stopping = EarlyStopping(monitor="val_loss", patience=100, restore_best_weights=True)
callbacks = [model_checkpoint, early_stopping]

train_history = model.fit(x_train, to_categorical(y_train, 2),
                          validation_data=(x_val, to_categorical(y_val)),
                          batch_size=2048, epochs=1000, callbacks=[callbacks])

ValueError: invalid literal for int() with base 10: 'Buy'

In [None]:
import matplotlib.pyplot as plt
plt.style.use("seaborm-v0_8")
fig, axes = plt.subplot(2, 1, figsize=(16,12))

axes[0].set_title("Loss")
axes[0].set_yscale("Log")
axes[0].plot(train_history, history["loss"], label="Training")
axes[0].plot(train_history, history["val_loss"], label="Validation")
axes[0].legend()

axes[1].set_title("Accuracy")
axes[1].plot(train_history, history["accuracy"], label="Training")
axes[1].plot(train_history, history["val_accuracy"], label="Validation")
axes[1].set_xlabel("Epoch")
axes[1].legend()
