In [2]:
import yfinance as yf
import pandas as pd
import seaborn as sns
import mplfinance as mpf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import torch
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


# NEW THINGS

In [3]:
stock_name = "AAPL"

In [37]:
def prepare_daily_data(stock_name, period="5d", interval="1m"):
    recent_data = yf.download(stock_name, period=period, interval=interval)
    recent_data = recent_data.drop(columns=["Adj Close", "Volume"])
    SNP = yf.download("^GSPC", period=period, interval=interval)

    match interval:
        case "1m":
            segment = 390
        case "5m":
            segment = 78
        case "15m":
            segment = 26
        case "30m":
            segment = 13
        case "1h":
            segment = 7
        case "1d":
            segment = 1

    for i in range(1, segment):
        recent_data[f"Day"] = recent_data.index.dayofweek
        recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
        recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
        recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
        recent_data[f"SNP Low t - {i}"] = recent_data["Low"].shift(i)
        recent_data[f"SNP Open t - {i}"] = SNP["Open"].shift(i)
        recent_data[f"SNP Close t - {i}"] = SNP["Close"].shift(i)
        recent_data[f"SNP High t - {i}"] = SNP["High"].shift(i)
        recent_data[f"SNP Low t - {i}"] = SNP["Low"].shift(i)

    recent_data.dropna(inplace=True)
    first_timestamp = recent_data.index[0]

    if first_timestamp.time() != pd.Timestamp("09:30:00").time():
        index = recent_data.index.get_loc(
            recent_data[
                recent_data.index.time == pd.Timestamp("09:30:00").time()
            ].index[0]
        )
        recent_data = recent_data.iloc[index:]

    # Split data into daily segments
    recent_data["day_index"] = recent_data.index.date
    recent_data["day_index"], _ = pd.factorize(recent_data["day_index"])
    daily_data = [group for _, group in recent_data.groupby("day_index")]

    # Add movement column to each daily DataFrame
    for day_data in daily_data:
        day_data["movement"] = (
            day_data["Close"].iloc[-1] > day_data["Open"].iloc[0]
        ).astype(int)

    day_data["movement"] = day_data["movement"].shift(-1)

    combined_data = pd.concat(daily_data)

    last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])
    last_hour_data.dropna(inplace=True)

    return last_hour_data

daily_data = prepare_daily_data(stock_name, period="1mo", interval="15m")

print(daily_data.to_markdown())

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
  recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
  recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
  recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
  recent_data[f"SNP Low t - {i}"] = recent_data["Low"].shift(i)
  recent_data[f"SNP Open t - {i}"] = SNP["Open"].shift(i)
  recent_data[f"SNP Close t - {i}"] = SNP["Close"].shift(i)
  recent_data[f"SNP High t - {i}"] = SNP["High"].shift(i)
  recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
  recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
  recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
  recent_data[f"SNP Low t - {i}"] = recent_data["Low"].shift(i)
  recent_data[f"SNP Open t - {i}"] = SNP["Open"].shift(i)
  recent_data[f"SNP Close t - {i}"] = SNP["Close"].shift(i)
  recent_data[f"SNP High t - {i}"] = SNP["High"].shift(i)
  rece

|   day_index |    Open |    High |     Low |   Close |   Day |   Open t - 1 |   Close t - 1 |   High t - 1 |   SNP Low t - 1 |   SNP Open t - 1 |   SNP Close t - 1 |   SNP High t - 1 |   Open t - 2 |   Close t - 2 |   High t - 2 |   SNP Low t - 2 |   SNP Open t - 2 |   SNP Close t - 2 |   SNP High t - 2 |   Open t - 3 |   Close t - 3 |   High t - 3 |   SNP Low t - 3 |   SNP Open t - 3 |   SNP Close t - 3 |   SNP High t - 3 |   Open t - 4 |   Close t - 4 |   High t - 4 |   SNP Low t - 4 |   SNP Open t - 4 |   SNP Close t - 4 |   SNP High t - 4 |   Open t - 5 |   Close t - 5 |   High t - 5 |   SNP Low t - 5 |   SNP Open t - 5 |   SNP Close t - 5 |   SNP High t - 5 |   Open t - 6 |   Close t - 6 |   High t - 6 |   SNP Low t - 6 |   SNP Open t - 6 |   SNP Close t - 6 |   SNP High t - 6 |   Open t - 7 |   Close t - 7 |   High t - 7 |   SNP Low t - 7 |   SNP Open t - 7 |   SNP Close t - 7 |   SNP High t - 7 |   Open t - 8 |   Close t - 8 |   High t - 8 |   SNP Low t - 8 |   SNP Open t - 8 |

  recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
  recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
  recent_data[f"SNP Low t - {i}"] = recent_data["Low"].shift(i)
  recent_data[f"SNP Open t - {i}"] = SNP["Open"].shift(i)
  recent_data[f"SNP Close t - {i}"] = SNP["Close"].shift(i)
  recent_data[f"SNP High t - {i}"] = SNP["High"].shift(i)
  recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
  recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
  recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
  recent_data[f"SNP Low t - {i}"] = recent_data["Low"].shift(i)
  recent_data[f"SNP Open t - {i}"] = SNP["Open"].shift(i)
  recent_data[f"SNP Close t - {i}"] = SNP["Close"].shift(i)
  recent_data[f"SNP High t - {i}"] = SNP["High"].shift(i)
  recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
  recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
  recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
  recent_data[f"SN

In [12]:
from sklearn.model_selection import train_test_split
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"
# device = "cpu"

X = daily_data.drop(columns=["movement"])
y = daily_data["movement"]

X_tensor = torch.from_numpy(X.values)
# X_tensor.requires_grad=True
y_tensor = torch.from_numpy(y.values)
# y_tensor.requires_grad = True

# print(X)

# print(split_data(X_tensor, interval="1h").shape)

#print(split_data(X_tensor, interval="1h").shape)

X_train,X_test,y_train,y_test = train_test_split(X_tensor,y_tensor,test_size=0.2)
X_train = X_train.to(torch.float32).to(device)
# X_time_step = [i for i in range(len(X_train))]
X_test= X_test.to(torch.float32).to(device)
y_train = y_train.to(torch.float32).to(device)
y_test =y_test.to(torch.float32).to(device)

print(X_test.shape)
print(X_test.shape)
print(X_train)

torch.Size([100, 48])
torch.Size([100, 48])
tensor([[178.2100, 178.6700, 177.9450,  ..., 516.3500, 516.4000, 400.0000],
        [143.5957, 143.7300, 142.0700,  ..., 395.8000, 396.7500,  81.0000],
        [205.7300, 207.1600, 205.4900,  ..., 533.7200, 533.8250, 457.0000],
        ...,
        [224.6800, 225.0400, 223.7800,  ..., 553.2000, 554.4101, 484.0000],
        [157.0000, 157.5900, 156.6200,  ..., 384.0000, 385.3000,  24.0000],
        [145.7300, 146.2200, 145.5200,  ..., 376.0800, 377.2400,  34.0000]],
       device='cuda:0')


In [14]:
# class LSTMPredictor(nn.Module):
#     def __init__(self, input_size, hidden_size, n_layers=2):
#         super(LSTMPredictor, self).__init__()

#         self.ltsm = nn.LSTM(
#             input_size=input_size,
#             hidden_size=hidden_size,
#             num_layers=n_layers,
#             batch_first=True,
#         )
#     def forward(self, sequences):
#         lstm_out, (hn, cn) = self.ltsm(sequences)
#         return lstm_out

model = nn.Sequential(
    nn.Linear(X_test.shape[1], 256),
    nn.ReLU(),
    nn.Linear(256, 512),
    nn.ReLU(),
    nn.Linear(512, 256),
    nn.ReLU(),
    nn.Linear(256, 1),
    # nn.Sigmoid(),
).to(device)

model(X_train)

tensor([[ 0.7198],
        [ 2.8445],
        [ 0.5470],
        [ 2.6208],
        [ 2.8796],
        [ 1.4084],
        [ 2.6080],
        [ 1.6942],
        [ 0.5099],
        [ 2.6280],
        [ 2.6147],
        [ 3.0300],
        [ 0.8652],
        [ 0.6225],
        [ 1.5721],
        [ 3.0900],
        [ 2.8004],
        [ 1.6875],
        [ 2.7429],
        [ 1.7636],
        [ 3.0886],
        [ 2.6331],
        [ 1.5785],
        [ 2.4275],
        [ 2.6943],
        [ 2.6939],
        [ 0.9849],
        [ 1.0740],
        [ 0.2002],
        [ 2.6803],
        [ 1.9571],
        [ 0.5421],
        [ 0.5946],
        [ 0.7155],
        [ 2.8354],
        [ 2.6996],
        [ 2.2254],
        [ 3.0838],
        [ 2.6617],
        [ 2.6259],
        [ 1.1713],
        [ 1.3045],
        [ 1.0470],
        [ 2.9489],
        [ 2.5509],
        [ 1.2556],
        [ 2.6651],
        [ 1.6524],
        [ 0.9654],
        [ 2.6834],
        [ 1.6910],
        [ 1.6697],
        [ 2.

In [15]:
model

Sequential(
  (0): Linear(in_features=48, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=512, bias=True)
  (3): ReLU()
  (4): Linear(in_features=512, out_features=256, bias=True)
  (5): ReLU()
  (6): Linear(in_features=256, out_features=1, bias=True)
)

In [16]:
epochs = 10000

loss_fn = nn.BCEWithLogitsLoss()
print(loss_fn)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(epochs):
    model.train()
    y_logits = model(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits)).float()
    loss = loss_fn(y_logits, y_train)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model.eval()
    with torch.inference_mode():
        y_test_logits = model(X_test).squeeze()
        y_test_preds = torch.round(torch.sigmoid(y_test_logits)).float()

        # print(y_test_preds.shape)
        # print(y_test.shape)
        test_loss = loss_fn(y_test_logits, y_test)
        if epoch % 100 == 0:
            print(f"epoch: {epoch} loss = {loss}, test loss = {test_loss}")

BCEWithLogitsLoss()
epoch: 0 loss = 1.1322321891784668, test loss = 55.53317642211914
epoch: 100 loss = 0.6827588677406311, test loss = 0.682798445224762
epoch: 200 loss = 0.6779730319976807, test loss = 0.6779671907424927
epoch: 300 loss = 0.6620371341705322, test loss = 0.6641864776611328
epoch: 400 loss = 0.6544424891471863, test loss = 0.6687939167022705
epoch: 500 loss = 0.6363928914070129, test loss = 0.6458770632743835
epoch: 600 loss = 0.6205778121948242, test loss = 0.6351369023323059
epoch: 700 loss = 0.6162852048873901, test loss = 0.6562323570251465
epoch: 800 loss = 0.7265897393226624, test loss = 0.8298308253288269
epoch: 900 loss = 0.6027101278305054, test loss = 0.6169350147247314
epoch: 1000 loss = 0.6074324250221252, test loss = 0.5961612462997437
epoch: 1100 loss = 0.7378715872764587, test loss = 0.9058393836021423
epoch: 1200 loss = 0.6012803316116333, test loss = 0.5935922861099243
epoch: 1300 loss = 0.5188423991203308, test loss = 0.586777925491333
epoch: 1400 los

In [17]:
print(torch.round(torch.sigmoid(model(X_train).squeeze())).float())
print(y_train)

tensor([1., 0., 1., 1., 0., 1., 0., 0., 1., 1., 1., 0., 1., 0., 1., 0., 0., 0.,
        0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1.,
        0., 1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
        0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 1.,
        1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 1.,
        0., 0., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0.,
        0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0.,
        0., 1., 0., 0., 0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 1., 0.,
        1., 0., 1., 1., 1., 0., 1., 0., 1., 0., 0., 1., 1., 0., 0., 0., 0., 0.,
        0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0.,
        0., 0., 1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 0.,
        0., 1., 0., 1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
        0., 1., 0., 0., 1., 0., 1., 1., 

In [20]:
def prepare_daily_data(stock_name, period="5d", interval="1m"):
    recent_data = yf.download(stock_name, period=period, interval=interval)
    recent_data = recent_data.drop(columns=["Adj Close", "Volume"])
    SNP = yf.download("SPY", period=period, interval=interval)

    segment = 390 if interval == "1m" else 7

    for i in range(1, segment):
        recent_data[f"Day"] = recent_data.index.dayofweek
        recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
        recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
        recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
        recent_data[f"SNP Low t - {i}"] = recent_data["Low"].shift(i)
        recent_data[f"SNP Open t - {i}"] = SNP["Open"].shift(i)
        recent_data[f"SNP Close t - {i}"] = SNP["Close"].shift(i)
        recent_data[f"SNP High t - {i}"] = SNP["High"].shift(i)
        recent_data[f"SNP Low t - {i}"] = SNP["Low"].shift(i)

    recent_data.dropna(inplace=True)
    first_timestamp = recent_data.index[0]

    if first_timestamp.time() != pd.Timestamp("09:30:00").time():
        index = recent_data.index.get_loc(
            recent_data[
                recent_data.index.time == pd.Timestamp("09:30:00").time()
            ].index[0]
        )
        recent_data = recent_data.iloc[index:]

    # Split data into daily segments
    recent_data["day_index"] = recent_data.index.date
    recent_data["day_index"], _ = pd.factorize(recent_data["day_index"])
    daily_data = [group for _, group in recent_data.groupby("day_index")]

    # Add movement column to each daily DataFrame
    for day_data in daily_data:
        day_data["movement"] = (
            day_data["Close"].iloc[-1] > day_data["Open"].iloc[0]
        ).astype(int)

    day_data["movement"] = day_data["movement"].shift(-1)

    combined_data = pd.concat(daily_data)

    last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])
    last_hour_data.dropna(inplace=True)

    return last_hour_data

daily_data = prepare_daily_data(stock_name, period="2y", interval="1h")

print(daily_data.to_markdown())

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
  last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])


|   day_index |    Open |    High |     Low |   Close |   Day |   Open t - 1 |   Close t - 1 |   High t - 1 |   SNP Low t - 1 |   SNP Open t - 1 |   SNP Close t - 1 |   SNP High t - 1 |   Open t - 2 |   Close t - 2 |   High t - 2 |   SNP Low t - 2 |   SNP Open t - 2 |   SNP Close t - 2 |   SNP High t - 2 |   Open t - 3 |   Close t - 3 |   High t - 3 |   SNP Low t - 3 |   SNP Open t - 3 |   SNP Close t - 3 |   SNP High t - 3 |   Open t - 4 |   Close t - 4 |   High t - 4 |   SNP Low t - 4 |   SNP Open t - 4 |   SNP Close t - 4 |   SNP High t - 4 |   Open t - 5 |   Close t - 5 |   High t - 5 |   SNP Low t - 5 |   SNP Open t - 5 |   SNP Close t - 5 |   SNP High t - 5 |   Open t - 6 |   Close t - 6 |   High t - 6 |   SNP Low t - 6 |   SNP Open t - 6 |   SNP Close t - 6 |   SNP High t - 6 |   day_index |   movement |
|------------:|--------:|--------:|--------:|--------:|------:|-------------:|--------------:|-------------:|----------------:|-----------------:|------------------:|-----------

In [31]:
pred_data = prepare_daily_data(stock_name, period="2y", interval="1h").drop(columns=["movement"]).iloc[-1]
input_tensor = torch.from_numpy(pred_data.values)
input_tensor =input_tensor.to(torch.float32).to(device)
raw_answer = model(input_tensor).squeeze()
answer = torch.round(torch.sigmoid(raw_answer)).float()
print(answer)

[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


tensor(1., device='cuda:0', grad_fn=<RoundBackward0>)


  last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])


In [24]:
torch.save(model, "model.pt")