In [19]:
import yfinance as yf
import pandas as pd
import seaborn as sns
import mplfinance as mpf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import torch
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


# NEW THINGS

In [20]:
stock_name = "AAPL"

In [28]:
def prepare_daily_data(stock_name, period="5d", interval="1m"):
    recent_data = yf.download(stock_name, period=period, interval=interval)
    recent_data = recent_data.drop(columns=["Adj Close", "Volume"])

    segment = 390 if interval == "1m" else 7

    for i in range(1, segment):
        recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
        recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
        recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
        recent_data[f"Low t - {i}"] = recent_data["Low"].shift(i)

    recent_data.dropna(inplace=True)
    first_timestamp = recent_data.index[0]

    if first_timestamp.time() != pd.Timestamp("09:30:00").time():
        index = recent_data.index.get_loc(
            recent_data[
                recent_data.index.time == pd.Timestamp("09:30:00").time()
            ].index[0]
        )
        recent_data = recent_data.iloc[index:]

    # Split data into daily segments
    recent_data["day_index"] = recent_data.index.date
    recent_data["day_index"], _ = pd.factorize(recent_data["day_index"])
    daily_data = [group for _, group in recent_data.groupby("day_index")]

    # Add movement column to each daily DataFrame
    for day_data in daily_data:
        day_data["movement"] = (
            day_data["Close"].iloc[-1] > day_data["Open"].iloc[0]
        ).astype(int)

    day_data["movement"] = day_data["movement"].shift(-1)

    combined_data = pd.concat(daily_data)

    last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])
    last_hour_data.dropna(inplace=True)

    return last_hour_data

daily_data = prepare_daily_data(stock_name, period="1y", interval="1h")

print(daily_data.shape)

[*********************100%%**********************]  1 of 1 completed


(249, 30)


In [29]:
from sklearn.model_selection import train_test_split
import torch
from torch import nn

#device = "cuda" if torch.cuda.is_available() else "cpu"
device = "cpu"

X = daily_data.drop(columns=["movement"])
y = daily_data["movement"]

X_tensor = torch.from_numpy(X.values)
# X_tensor.requires_grad=True
y_tensor = torch.from_numpy(y.values)
# y_tensor.requires_grad = True

# print(X)

# print(split_data(X_tensor, interval="1h").shape)

#print(split_data(X_tensor, interval="1h").shape)

X_train,X_test,y_train,y_test = train_test_split(X_tensor,y_tensor,test_size=0.2)
X_train = X_train.to(torch.float32).to(device)
# X_time_step = [i for i in range(len(X_train))]
X_test= X_test.to(torch.float32).to(device)
y_train = y_train.to(torch.float32).to(device)
y_test =y_test.to(torch.float32).to(device)

print(X_test.shape)
print(X_test.shape)
print(X_train)

torch.Size([50, 29])
torch.Size([50, 29])
tensor([[188.9600, 188.9900, 188.5100,  ..., 187.6500, 185.8300, 107.0000],
        [171.8550, 172.5200, 171.7800,  ..., 173.6300, 170.8200,  34.0000],
        [226.8100, 227.8400, 226.7601,  ..., 227.3500, 223.2500, 224.0000],
        ...,
        [171.5800, 171.6400, 170.4400,  ..., 170.0100, 167.6200,  31.0000],
        [224.6800, 225.0400, 223.7800,  ..., 227.7800, 225.5800, 234.0000],
        [218.6200, 218.6200, 217.1300,  ..., 219.1499, 216.0100, 238.0000]])


In [31]:
# class LSTMPredictor(nn.Module):
#     def __init__(self, input_size, hidden_size, n_layers=2):
#         super(LSTMPredictor, self).__init__()

#         self.ltsm = nn.LSTM(
#             input_size=input_size,
#             hidden_size=hidden_size,
#             num_layers=n_layers,
#             batch_first=True,
#         )
#     def forward(self, sequences):
#         lstm_out, (hn, cn) = self.ltsm(sequences)
#         return lstm_out

model = nn.Sequential(
    nn.Linear(29, 128),
    nn.ReLU(),
    nn.Linear(128, 256),
    nn.ReLU(),
    nn.Linear(256, 1),
    # nn.Sigmoid(),
    
).to(device)

model(X_train)

tensor([[-2.8957],
        [-2.5601],
        [-3.5439],
        [-2.5493],
        [-2.6961],
        [-3.3406],
        [-3.4617],
        [-2.5509],
        [-2.9211],
        [-2.6819],
        [-2.5985],
        [-2.5631],
        [-3.0135],
        [-2.8569],
        [-2.7235],
        [-2.6451],
        [-2.8470],
        [-3.0090],
        [-2.6483],
        [-3.4969],
        [-2.9705],
        [-2.5311],
        [-2.5234],
        [-2.6296],
        [-2.5152],
        [-2.6522],
        [-2.6092],
        [-2.8203],
        [-2.8187],
        [-2.5085],
        [-3.0385],
        [-2.5927],
        [-2.9402],
        [-3.0706],
        [-3.5742],
        [-2.5162],
        [-2.6793],
        [-2.4932],
        [-2.8577],
        [-3.3101],
        [-2.7650],
        [-3.3834],
        [-2.8281],
        [-2.8764],
        [-2.7050],
        [-2.6361],
        [-3.3310],
        [-2.4996],
        [-2.7468],
        [-2.5675],
        [-2.6221],
        [-3.4675],
        [-2.

In [32]:
model

Sequential(
  (0): Linear(in_features=29, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=1, bias=True)
)

In [35]:
epochs = 1000

loss_fn = nn.BCEWithLogitsLoss()
print(loss_fn)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(epochs):
    model.train()
    y_logits = model(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits)).float()
    loss = loss_fn(y_logits, y_train)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model.eval()
    with torch.inference_mode():
        y_test_logits = model(X_test).squeeze()
        y_test_preds = torch.round(torch.sigmoid(y_test_logits)).float()

        # print(y_test_preds.shape)
        # print(y_test.shape)
        test_loss = loss_fn(y_test_logits, y_test)
        if epoch % 100 == 0:
            print(f"epoch: {epoch} loss = {loss}, test loss = {test_loss}")

BCEWithLogitsLoss()
epoch: 0 loss = 0.27131471037864685, test loss = 6.1112236976623535
epoch: 100 loss = 0.29311394691467285, test loss = 0.3599291145801544
epoch: 200 loss = 0.28533080220222473, test loss = 0.3751828670501709
epoch: 300 loss = 0.3075217604637146, test loss = 0.32357585430145264
epoch: 400 loss = 0.2144707888364792, test loss = 0.28057345747947693
epoch: 500 loss = 0.2338893711566925, test loss = 0.351945161819458
epoch: 600 loss = 0.2870742082595825, test loss = 0.4330994784832001
epoch: 700 loss = 0.2800848186016083, test loss = 0.39690959453582764
epoch: 800 loss = 0.2982621192932129, test loss = 0.43733519315719604
epoch: 900 loss = 0.19521553814411163, test loss = 0.2449607402086258


In [37]:
print(torch.round(torch.sigmoid(model(X_train).squeeze())).float())
print(y_train)

tensor([1., 1., 1., 1., 1., 1., 1., 1., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0.,
        0., 1., 0., 1., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 1., 0.,
        0., 1., 0., 0., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 0., 0., 1., 1.,
        0., 0., 0., 0., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0.,
        1., 1., 1., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1., 1., 0., 1.,
        1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 1., 0., 0., 1., 1., 1., 0.,
        1., 1., 1., 1., 0., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 0.,
        1., 0., 0., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 1.,
        0., 1., 0., 1., 1., 0., 1., 1., 0., 0., 0., 1., 1., 0., 1., 1., 0., 1.,
        1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 0., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 0., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1., 1., 0., 1., 0.,
        1.], grad_fn=<RoundBackward0>)
tensor([1., 1., 1., 1., 1., 1., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 0., 0.,
 

In [94]:
def prepare_forcast_data(stock_name, period="5d", interval="1m"):
    recent_data = yf.download(stock_name, period=period, interval=interval)
    recent_data = recent_data.drop(columns=["Adj Close", "Volume"])

    segment = 390 if interval == "1m" else 7

    for i in range(1, segment):
        recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
        recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
        recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
        recent_data[f"Low t - {i}"] = recent_data["Low"].shift(i)

    recent_data.dropna(inplace=True)
    first_timestamp = recent_data.index[0]

    if first_timestamp.time() != pd.Timestamp("09:30:00").time():
        index = recent_data.index.get_loc(
            recent_data[
                recent_data.index.time == pd.Timestamp("09:30:00").time()
            ].index[0]
        )
        recent_data = recent_data.iloc[index:]

    # Split data into daily segments
    recent_data["day_index"] = recent_data.index.date
    recent_data["day_index"], _ = pd.factorize(recent_data["day_index"])
    daily_data = [group for _, group in recent_data.groupby("day_index")]

    # Add movement column to each daily DataFrame


    combined_data = pd.concat(daily_data)

    last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])
    last_hour_data.dropna(inplace=True)

    return last_hour_data.iloc[-1]




In [96]:
pred_data = prepare_forcast_data(stock_name, period="1y", interval="1h")
input_tensor = torch.from_numpy(pred_data.values)
input_tensor =input_tensor.to(torch.float32)
raw_answer = model(input_tensor).squeeze()
answer = torch.round(torch.sigmoid(raw_answer)).float()
print(answer)


[*********************100%%**********************]  1 of 1 completed

tensor(1., grad_fn=<RoundBackward0>)





In [None]:
torch.save(model, "model1.pt")

In [26]:
def prepare_test_loop_data(stock_name, period="5d", interval="1m",back_time=5, answers =[]):

    recent_data = yf.download(stock_name, period=period, interval=interval)

    data_200 = yf.download(stock_name, period=period, interval=interval).tail(len(recent_data) + 201)
    data_200 = data_200.drop(columns=["Adj Close"])

    data_200.fillna(method="bfill", inplace=True)

    #recent_data = data_200.tail(len(recent_data))


    recent_data = recent_data.dropna()
    for i in range(back_time):
        recent_data[f"Open t - {i+1}"] = recent_data["Open"].shift((i+1))
        recent_data[f"Close t - {i+1}"] = recent_data["Close"].shift((i+1))
        recent_data[f"High t - {i+1}"] = recent_data["High"].shift((i+1))
        recent_data[f"Low t - {i+1}"] = recent_data["Low"].shift((i+1))
    recent_data["movement"] = (recent_data["Close"] > recent_data["Open"]).astype(int)

    # recent_data = recent_data.tail(1)
    # add_plot = mpf.make_addplot(recent_data["200_day"], color="blue", linestyle="--")

    return recent_data

#recent_data = prepare_test_loop_data(stock_name,back_time=1,answers=[answer])
#recent_data.head()


In [27]:
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

answers = []
forcast = 10
check_data = prepare_test_loop_data(
    interval="1m", back_time=10, period="1d", stock_name=stock_name
)
date_range = pd.date_range(
    start=check_data.index[-1], periods=forcast, freq="1T"
)  # Generate a date range



for i in range(forcast):
    # for i in answers:

    check_data_last = check_data.tail(1)
    check_X = check_data_last.drop(["Close", "Volume", "High", "Low","movement"], axis=1)
    check_y = check_data_last[["movement"]]

    # print(check_data.tail(6).to_markdown())
    # print(X.shape)

    check_X_tensor = torch.from_numpy(check_X.values)
    check_y_tensor = torch.from_numpy(check_y.values)

    check_X_tensor = check_X_tensor.to(torch.float32).to(device)
    check_y_tensor = check_y_tensor.to(torch.float32).to(device)

    # print(check_data_last.to_markdown(), "\n\n\n")

    answer = model(check_X_tensor)
    a = torch.softmax(answer, dim=1).argmax(dim=1)
    print(a)

    answer = answer.detach().cpu().numpy()

    answer = np.append(answer[0],check_X["Close t - 1"])
    answers.append(answer)
    df = pd.DataFrame(
        {
            "Close": [answers[-1][0]],
            "High": [answers[-1][1]],
            "Low": [answers[-1][2]],
            "Open": [answers[-1][3]],
            "Adj Close": [0],
            "Volume": [0],
        },
        index=[date_range[i]],
    )

    check_data = check_data._append(df)
    for i in range(10):
        check_data[f"Open t - {i+1}"] = check_data["Open"].shift((i + 1))
        check_data[f"Close t - {i+1}"] = check_data["Close"].shift((i + 1))
        check_data[f"High t - {i+1}"] = check_data["High"].shift((i + 1))
        check_data[f"Low t - {i+1}"] = check_data["Low"].shift((i + 1))
    recent_data["movement"] = (recent_data["Close"] > recent_data["Open"]).astype(int)

    # check_data.dropna(inplace=True)
# print(check_data.to_markdown())

# print(check_data)
# print(answers)

# sns.catplot(answer[0][:],label="Predicted")
last_elements = [arr[0] for arr in answers]

recent_data = prepare_data(stock_name, period="1d", interval="1m")
# print("Converted index to datetime")

answers_df = pd.concat(
    [
        pd.DataFrame(
            recent_data,
            columns=["Close", "High", "Low", "Open","movement"],
        ),
        pd.DataFrame(
            answers, columns=["movement"], index=date_range
        ),
    ]
).iloc[200:]

# print(answers_df.tail(100))

mpf.plot(
    answers_df,
    type="candle",
    style="charles",
    title=f"{stock_name} Candlestick Chart",
    ylabel="Price",
)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


RuntimeError: mat1 and mat2 shapes cannot be multiplied (1x42 and 29x128)

In [62]:
HIDDEN_SIZE = 64
EPOCHS = 20
BATCH_SIZE = 24
LEARNING_RATE = 0.001

In [64]:
from torch import nn

# Build model
class stonks(nn.Module):
    def __init__(self, input_features, output_features, hidden_units=8):

        super().__init__()
        self.linear_layer_stack = nn.Sequential(
            nn.Linear(in_features=input_features, out_features=hidden_units),
             nn.ReLU(), 
            nn.Linear(in_features=hidden_units, out_features=hidden_units),
            nn.ReLU(),
            nn.Linear(in_features=hidden_units, out_features=output_features), 
            nn.Sigmoid(),
     
        )
    
    def forward(self, x):
        return self.linear_layer_stack(x)

# Create an instance of BlobModel and send it to the target device
model = stonks(input_features=X_train.size()[1], 
                    output_features=1, 
                    hidden_units=8).to(device)


In [65]:
y_train[0]

tensor([1.])

In [82]:
# Create loss and optimizer
loss_fn = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters(), 
                            lr=0.1) # exercise: try changing the learning rate here and seeing what happens to the model's performance

In [86]:
# Make prediction logits with model
y_logits = model(X_test.to(device))

# Perform softmax calculation on logits across dimension 1 to get prediction probabilities
y_pred_probs = torch.softmax(y_logits, dim=1)
print(y_logits[:5])
print(y_pred_probs[:5])

tensor([[2.1976e-05],
        [6.7180e-05],
        [2.2683e-05],
        [2.2327e-05]], grad_fn=<SliceBackward0>)
tensor([[1.],
        [1.],
        [1.],
        [1.]], grad_fn=<SliceBackward0>)


In [85]:
# Fit the model
#torch.manual_seed(42)

# Set number of epochs
epochs = 1000

# Put data to target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

for epoch in range(epochs):
    ### Training
    model.train()

    # 1. Forward pass
    y_logits = model(X_train) # model outputs raw logits 
    y_pred = torch.softmax(y_logits, dim=1).argmax(dim=1) # go from logits -> prediction probabilities -> prediction labels
    # print(y_logits)
    # 2. Calculate loss and accuracy
  
    loss = loss_fn(y_pred.to(torch.float32), y_train.squeeze()) 
  


    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backwards
    loss.requires_grad = True
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    ### Testing
    model.eval()
    with torch.inference_mode():
      # 1. Forward pass
      test_logits = model(X_test)
      test_pred = torch.softmax(test_logits, dim=1).argmax(dim=1)
      # 2. Calculate test loss and accuracy
      test_loss = loss_fn(test_logits, y_test)


    # Print out what's happening
      if epoch % 100 == 0:
          print(f"Epoch: {epoch} | Loss: {loss:.5f},| Test Loss: {test_loss:.5f}") 

Epoch: 0 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 100 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 200 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 300 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 400 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 500 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 600 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 700 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 800 | Loss: 75.00000,| Test Loss: 2.40205
Epoch: 900 | Loss: 75.00000,| Test Loss: 2.40205
