In [2]:
import warnings
warnings.filterwarnings("ignore")

!pip install pandas -q

import pandas as pd
# Хак чтобы работало на Anaconda под MacOs 10.15.7 Catalina
pd.DataFrame.iteritems = pd.DataFrame.items

In [3]:
!pip install plotly -q

import plotly.graph_objects as go

def linear_plot(df, title):
    fig = go.Figure([go.Scatter(x=df['date'], y=df['close'], mode='lines')])
    fig.update_layout(plot_bgcolor='white', 
                      xaxis_title='Date',
                      yaxis_title='Price',
                      title=title)
    fig.show()

def candlestick_plot(df, title):
    fig = go.Figure([go.Candlestick(x=df['date'], 
                                open=df['open'],
                                high=df['high'],
                                low=df['low'],
                                close=df['close'])])
    fig.update_layout(xaxis_rangeslider_visible=False,
                      plot_bgcolor='white',
                      xaxis_title='Date',
                      yaxis_title='Price',
                      title=title)
    fig.update_yaxes(fixedrange=False)
    fig.show()

In [2]:
!pip install yfinance -q

import yfinance as yf

ticker = "AAPL"
data = yf.download(ticker, start="2020-01-01", end="2023-01-01", interval="1d")
data

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-01-02,72.796013,72.856606,71.545380,71.799866,135480400
2020-01-03,72.088272,72.851738,71.862869,72.020409,146322800
2020-01-06,72.662720,72.701500,70.954010,71.206077,118387200
2020-01-07,72.320976,72.929322,72.100418,72.672409,108872000
2020-01-08,73.484344,73.787308,72.022850,72.022850,132079200
...,...,...,...,...,...
2022-12-23,130.487793,131.041963,128.290894,129.557573,63814900
2022-12-27,128.676834,130.042478,127.380469,130.012791,69007800
2022-12-28,124.728363,129.666432,124.560134,128.320585,85438400
2022-12-29,128.261215,129.122157,126.400782,126.658071,75703700


In [3]:
data.dtypes

Price   Ticker
Close   AAPL      float64
High    AAPL      float64
Low     AAPL      float64
Open    AAPL      float64
Volume  AAPL        int64
dtype: object

In [4]:
data = data[['Open', 'High', 'Low', 'Close', 'Volume']]
data

Price,Open,High,Low,Close,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-01-02,71.799866,72.856606,71.545380,72.796013,135480400
2020-01-03,72.020409,72.851738,71.862869,72.088272,146322800
2020-01-06,71.206077,72.701500,70.954010,72.662720,118387200
2020-01-07,72.672409,72.929322,72.100418,72.320976,108872000
2020-01-08,72.022850,73.787308,72.022850,73.484344,132079200
...,...,...,...,...,...
2022-12-23,129.557573,131.041963,128.290894,130.487793,63814900
2022-12-27,130.012791,130.042478,127.380469,128.676834,69007800
2022-12-28,128.320585,129.666432,124.560134,124.728363,85438400
2022-12-29,126.658071,129.122157,126.400782,128.261215,75703700


In [5]:
data = data.pct_change().dropna()  # Use percent change as features, then drop NaNs
data

Price,Open,High,Low,Close,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2020-01-03,0.003072,-0.000067,0.004438,-0.009722,0.080029
2020-01-06,-0.011307,-0.002062,-0.012647,0.007969,-0.190918
2020-01-07,0.020593,0.003134,0.016157,-0.004703,-0.080374
2020-01-08,-0.008938,0.011765,-0.001076,0.016086,0.213160
2020-01-09,0.033921,0.019676,0.030422,0.021241,0.287927
...,...,...,...,...,...
2022-12-23,-0.025530,-0.015904,-0.005065,-0.002798,-0.180306
2022-12-27,0.003514,-0.007627,-0.007097,-0.013878,0.081374
2022-12-28,-0.013016,-0.002892,-0.022141,-0.030685,0.238098
2022-12-29,-0.012956,-0.004198,0.014777,0.028324,-0.113938


In [6]:
data.dtypes

Price   Ticker
Open    AAPL      float64
High    AAPL      float64
Low     AAPL      float64
Close   AAPL      float64
Volume  AAPL      float64
dtype: object

In [7]:
!pip install torch -q

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [11]:
# Convert to PyTorch Tensors
window_size = 30
X, y = [], []
for i in range(len(data) - window_size - 1):
    X.append(data.iloc[i:i + window_size].values)

    # Define labels based on Close price comparison for the window
    if data['Close'].iloc[i + window_size].values > data['Close'].iloc[i + window_size - 1].values:
        y.append(2)  # Buy
    elif data['Close'].iloc[i + window_size].values < data['Close'].iloc[i + window_size - 1].values:
        y.append(0)  # Sell
    else:
        y.append(1)  # Hold

# Now convert lists to tensors
X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

# DataLoader
dataset = TensorDataset(X, y)
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

In [12]:
X.shape

torch.Size([724, 30, 5])

In [13]:
y.shape

torch.Size([724])