In [1]:
import yfinance as yf
import pandas as pd
import seaborn as sns
import mplfinance as mpf
from tensorflow import keras
from sklearn.model_selection import train_test_split
import torch
import numpy as np
pd.options.mode.chained_assignment = None  # default='warn'
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)


# NEW THINGS

In [2]:
stock_name = "AAPL"

In [15]:
def prepare_daily_data(stock_name, period="5d", interval="1m"):
    recent_data = yf.download(stock_name, period=period, interval=interval)
    recent_data = recent_data.drop(columns=["Adj Close", "Volume"])

    segment = 390 if interval == "1m" else 7

    for i in range(1, segment):
        recent_data[f"Open t - {i}"] = recent_data["Open"].shift(i)
        recent_data[f"Close t - {i}"] = recent_data["Close"].shift(i)
        recent_data[f"High t - {i}"] = recent_data["High"].shift(i)
        recent_data[f"Low t - {i}"] = recent_data["Low"].shift(i)

    recent_data.dropna(inplace=True)
    first_timestamp = recent_data.index[0]

    if first_timestamp.time() != pd.Timestamp("09:30:00").time():
        index = recent_data.index.get_loc(
            recent_data[
                recent_data.index.time == pd.Timestamp("09:30:00").time()
            ].index[0]
        )
        recent_data = recent_data.iloc[index:]

    # Split data into daily segments
    recent_data["day_index"] = recent_data.index.date
    recent_data["day_index"], _ = pd.factorize(recent_data["day_index"])
    daily_data = [group for _, group in recent_data.groupby("day_index")]

    # Add movement column to each daily DataFrame
    for day_data in daily_data:
        day_data["movement"] = (
            day_data["Close"].iloc[-1] > day_data["Open"].iloc[0]
        ).astype(int)

    day_data["movement"] = day_data["movement"].shift(-1)

    combined_data = pd.concat(daily_data)

    last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])
    last_hour_data.dropna(inplace=True)

    return last_hour_data

daily_data = prepare_daily_data(stock_name, period="1y", interval="1h")

print(daily_data.shape)

[*********************100%%**********************]  1 of 1 completed

(249, 30)



  last_hour_data = combined_data.groupby("day_index").apply(lambda x: x.iloc[-1])


In [16]:
from sklearn.model_selection import train_test_split
import torch
from torch import nn

device = "cuda" if torch.cuda.is_available() else "cpu"

X = daily_data.drop(columns=["movement"])
y = daily_data["movement"]

X_tensor = torch.from_numpy(X.values)
# X_tensor.requires_grad=True
y_tensor = torch.from_numpy(y.values)
# y_tensor.requires_grad = True

# print(X)

# print(split_data(X_tensor, interval="1h").shape)


X_train,X_test,y_train,y_test = train_test_split(X_tensor,y_tensor,test_size=0.2)
X_train = X_train.to(torch.float32).to(device)
# X_time_step = [i for i in range(len(X_train))]
X_test= X_test.to(torch.float32).to(device)
y_train = y_train.to(torch.float32).to(device)
y_test =y_test.to(torch.float32).to(device)

print(X_test.shape)
print(X_test.shape)
print(X_train)

torch.Size([50, 29])
torch.Size([50, 29])
tensor([[192.8700, 193.3100, 192.8300,  ..., 193.4100, 192.0200,  82.0000],
        [205.7300, 207.1600, 205.4900,  ..., 201.0450, 193.6400, 207.0000],
        [192.4250, 192.5900, 191.9400,  ..., 194.7600, 193.7500, 113.0000],
        ...,
        [173.1450, 173.4150, 172.5400,  ..., 172.6600, 170.8900, 180.0000],
        [195.2100, 195.7500, 194.8400,  ..., 197.6800, 196.6900,  89.0000],
        [167.3100, 167.4400, 166.3750,  ..., 171.3775, 168.1300,  51.0000]],
       device='cuda:0')


In [25]:
# class LSTMPredictor(nn.Module):
#     def __init__(self, input_size, hidden_size, n_layers=2):
#         super(LSTMPredictor, self).__init__()

#         self.ltsm = nn.LSTM(
#             input_size=input_size,
#             hidden_size=hidden_size,
#             num_layers=n_layers,
#             batch_first=True,
#         )
#     def forward(self, sequences):
#         lstm_out, (hn, cn) = self.ltsm(sequences)
#         return lstm_out

model = nn.Sequential(
    nn.Linear(29, 128),
    nn.ReLU(),
    nn.Linear(128, 256),
    nn.ReLU(),
    nn.Linear(256, 1),
    # nn.Sigmoid(),
    
).to(device)

model(X_train)

tensor([[-4.5783],
        [-3.3510],
        [-4.2638],
        [-2.7605],
        [-2.7527],
        [-4.2682],
        [-3.1452],
        [-4.3598],
        [-3.6725],
        [-3.0311],
        [-4.2120],
        [-4.4507],
        [-2.8915],
        [-4.6359],
        [-4.7025],
        [-4.0488],
        [-4.4520],
        [-3.3679],
        [-4.3329],
        [-3.5709],
        [-3.9205],
        [-3.4709],
        [-3.4633],
        [-3.8081],
        [-3.1182],
        [-4.0531],
        [-3.5946],
        [-4.6021],
        [-4.8463],
        [-3.0891],
        [-5.1932],
        [-3.3674],
        [-2.7235],
        [-4.4966],
        [-4.3392],
        [-4.6183],
        [-3.1384],
        [-3.0966],
        [-4.6371],
        [-3.3482],
        [-4.1990],
        [-4.6225],
        [-3.0835],
        [-4.5433],
        [-4.4929],
        [-4.6839],
        [-4.0468],
        [-4.3515],
        [-4.0655],
        [-4.4910],
        [-4.5398],
        [-2.8423],
        [-4.

In [6]:
model

Sequential(
  (0): Linear(in_features=29, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=256, bias=True)
  (3): ReLU()
  (4): Linear(in_features=256, out_features=1, bias=True)
)

In [27]:
epochs = 10000

loss_fn = nn.BCEWithLogitsLoss()
print(loss_fn)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for epoch in range(epochs):
    model.train()
    y_logits = model(X_train).squeeze()
    y_preds = torch.round(torch.sigmoid(y_logits)).float()
    print(y_preds)
    loss = loss_fn(y_logits, y_train)

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

    model.eval()
    with torch.inference_mode():
        y_test_logits = model(X_test).squeeze()
        y_test_preds = torch.round(torch.sigmoid(y_test_logits)).float()

        # print(y_test_preds.shape)
        # print(y_test.shape)
        test_loss = loss_fn(y_test_logits, y_test)
        if epoch % 100 == 0:
            print(f"epoch: {epoch} loss = {loss}, test loss = {test_loss}")

BCEWithLogitsLoss()
tensor([0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0.,
        1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1.,
        1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0.,
        0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1.,
        1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0.,
        1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1.,
        0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 0.,
        0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 1.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1., 0., 0.,
        1., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0.,
        0.], device='cuda:0', grad_fn=<RoundBackward0>)
epoch: 0 loss = 0.3102302849292755, test los

KeyboardInterrupt: 

In [30]:
print(torch.round(torch.sigmoid(model(X_train).squeeze())).float())
print(y_train)

tensor([0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0.,
        0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 0.,
        1., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 0., 0., 0., 0., 0., 1.,
        1., 1., 1., 1., 0., 0., 1., 0., 1., 0., 0., 1., 0., 1., 0., 0., 0., 0.,
        0., 1., 1., 0., 0., 1., 1., 0., 0., 0., 0., 1., 0., 1., 1., 0., 0., 1.,
        1., 1., 0., 1., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 1., 0., 1., 0.,
        1., 1., 1., 0., 1., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0., 0., 1., 1.,
        0., 0., 0., 1., 1., 1., 0., 0., 0., 1., 0., 0., 1., 1., 1., 0., 1., 0.,
        0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 1., 0., 1.,
        0., 1., 0., 0., 0., 0., 0., 1., 0., 1., 1., 1., 0., 1., 1., 1., 0., 0.,
        1., 0., 1., 0., 1., 0., 1., 1., 1., 0., 0., 0., 0., 0., 1., 0., 1., 0.,
        0.], device='cuda:0', grad_fn=<RoundBackward0>)
tensor([0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 0., 1., 0., 1., 

In [None]:
torch.save(model, "model1.pt")

In [None]:
def prepare_test_loop_data(stock_name, period="5d", interval="1m",back_time=5, answers =[]):

    recent_data = yf.download(stock_name, period=period, interval=interval)

    data_200 = yf.download(stock_name, period=period, interval=interval).tail(len(recent_data) + 201)
    data_200 = data_200.drop(columns=["Adj Close"])

    data_200.fillna(method="bfill", inplace=True)

    #recent_data = data_200.tail(len(recent_data))


    recent_data = recent_data.dropna()
    for i in range(back_time):
        recent_data[f"Open t - {i+1}"] = recent_data["Open"].shift((i+1))
        recent_data[f"Close t - {i+1}"] = recent_data["Close"].shift((i+1))
        recent_data[f"High t - {i+1}"] = recent_data["High"].shift((i+1))
        recent_data[f"Low t - {i+1}"] = recent_data["Low"].shift((i+1))
    recent_data["movement"] = (recent_data["Close"] > recent_data["Open"]).astype(int)

    # recent_data = recent_data.tail(1)
    # add_plot = mpf.make_addplot(recent_data["200_day"], color="blue", linestyle="--")

    return recent_data

#recent_data = prepare_test_loop_data(stock_name,back_time=1,answers=[answer])
#recent_data.head()


In [14]:
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime

answers = []
forcast = 10
check_data = prepare_test_loop_data(
    interval="1m", back_time=10, period="1d", stock_name=stock_name
)
date_range = pd.date_range(
    start=check_data.index[-1], periods=forcast, freq="1T"
)  # Generate a date range



for i in range(forcast):
    # for i in answers:

    check_data_last = check_data.tail(1)
    check_X = check_data_last.drop(["Close", "Volume", "High", "Low","movement"], axis=1)
    check_y = check_data_last[["movement"]]

    # print(check_data.tail(6).to_markdown())
    # print(X.shape)

    check_X_tensor = torch.from_numpy(check_X.values)
    check_y_tensor = torch.from_numpy(check_y.values)

    check_X_tensor = check_X_tensor.to(torch.float32).to(device)
    check_y_tensor = check_y_tensor.to(torch.float32).to(device)

    # print(check_data_last.to_markdown(), "\n\n\n")

    answer = model(check_X_tensor)

    print(answer)

    answer = answer.detach().cpu().numpy()

    answer = np.append(answer[0],check_X["Close t - 1"])
    answers.append(answer)
    df = pd.DataFrame(
        {
            "Close": [answers[-1][0]],
            "High": [answers[-1][1]],
            "Low": [answers[-1][2]],
            "Open": [answers[-1][3]],
            "Adj Close": [0],
            "Volume": [0],
        },
        index=[date_range[i]],
    )

    check_data = check_data._append(df)
    for i in range(10):
        check_data[f"Open t - {i+1}"] = check_data["Open"].shift((i + 1))
        check_data[f"Close t - {i+1}"] = check_data["Close"].shift((i + 1))
        check_data[f"High t - {i+1}"] = check_data["High"].shift((i + 1))
        check_data[f"Low t - {i+1}"] = check_data["Low"].shift((i + 1))
    recent_data["movement"] = (recent_data["Close"] > recent_data["Open"]).astype(int)

    # check_data.dropna(inplace=True)
# print(check_data.to_markdown())

# print(check_data)
# print(answers)

# sns.catplot(answer[0][:],label="Predicted")
last_elements = [arr[0] for arr in answers]

recent_data = prepare_data(stock_name, period="1d", interval="1m")
# print("Converted index to datetime")

answers_df = pd.concat(
    [
        pd.DataFrame(
            recent_data,
            columns=["Close", "High", "Low", "Open","movement"],
        ),
        pd.DataFrame(
            answers, columns=["movement"], index=date_range
        ),
    ]
).iloc[200:]

# print(answers_df.tail(100))

mpf.plot(
    answers_df,
    type="candle",
    style="charles",
    title=f"{stock_name} Candlestick Chart",
    ylabel="Price",
)

NameError: name 'prepare_test_loop_data' is not defined