In [None]:
import pandas as pd
import plotly.express as px
from copy import copy
from scipy import stats
import matplotlib.pyplot as plt
import numpy as np
import plotly.figure_factory as ff
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from tensorflow import keras

In [None]:
stock_price_df = pd.read_csv('/content/drive/MyDrive/stock data/stock (4).csv')
stock_vol_df = pd.read_csv("/content/drive/MyDrive/stock data/stock_volume (1).csv")
stock_price_df = stock_price_df.sort_values(by = ['Date'])
stock_vol_df = stock_vol_df.sort_values(by = ['Date'])

In [None]:
def individual_stock(price_df, vol_df, name):
    return pd.DataFrame({'Date': price_df['Date'], 'Close': price_df[name], 'Volume': vol_df[name]})

In [None]:
price_volume_df = individual_stock(stock_price_df, stock_vol_df, 'AAPL')
price_volume_df


Unnamed: 0,Date,Close,Volume
0,2012-01-12,60.198570,53146800
1,2012-01-13,59.972858,56505400
2,2012-01-17,60.671429,60724300
3,2012-01-18,61.301430,69197800
4,2012-01-19,61.107143,65434600
...,...,...,...
2154,2020-08-05,440.250000,30498000
2155,2020-08-06,455.609985,50607200
2156,2020-08-07,444.450012,49453300
2157,2020-08-10,450.910004,53100900


In [None]:
def trading_window(data):
  
  # 1 day window 
  n = 1

  # Create a column containing the prices for the next 1 days
  data['Target'] = data[['Close']].shift(-n)
  
  # return the new dataset 
  return data

In [None]:
price_volume_target_df = trading_window(price_volume_df)
price_volume_target_df = price_volume_target_df[:-1]

In [None]:
price_volume_target_df

Unnamed: 0,Date,Close,Volume,Target
0,2012-01-12,60.198570,53146800,59.972858
1,2012-01-13,59.972858,56505400,60.671429
2,2012-01-17,60.671429,60724300,61.301430
3,2012-01-18,61.301430,69197800,61.107143
4,2012-01-19,61.107143,65434600,60.042858
...,...,...,...,...
2153,2020-08-04,438.660004,43267900,440.250000
2154,2020-08-05,440.250000,30498000,455.609985
2155,2020-08-06,455.609985,50607200,444.450012
2156,2020-08-07,444.450012,49453300,450.910004


In [None]:
y = price_volume_target_df['Target']
X = price_volume_target_df.iloc[:,:3]


In [None]:
split = int(0.80 * len(X))
X_train_df = X[:split]
y_train_df = y[:split].tolist()
X_test_df = X[split:]
y_test_df = y[split:].tolist()
X_train_df.shape


(1726, 3)

In [None]:
X_test_df

Unnamed: 0,Date,Close,Volume
1726,2018-11-20,176.979996,67825200
1727,2018-11-21,176.779999,31124200
1728,2018-11-23,172.289993,23624000
1729,2018-11-26,174.619995,44998500
1730,2018-11-27,174.240005,41387400
...,...,...,...
2153,2020-08-04,438.660004,43267900
2154,2020-08-05,440.250000,30498000
2155,2020-08-06,455.609985,50607200
2156,2020-08-07,444.450012,49453300


In [None]:
def to_sequences(seq_size, obs):
    x = []
    y = []

    for i in range(len(obs)-SEQUENCE_SIZE):
        #print(i)
        window = obs[i:(i+SEQUENCE_SIZE)]
        after_window = obs[i+SEQUENCE_SIZE]
        window = [[x] for x in window]
        #print("{} - {}".format(window,after_window))
        x.append(window)
        y.append(after_window)
        
    return np.array(x),np.array(y)
    
    
SEQUENCE_SIZE = 8
x_train,y_train = to_sequences(SEQUENCE_SIZE,y_train_df)
x_test,y_test = to_sequences(SEQUENCE_SIZE,y_test_df)

print("Shape of training set: {}".format(x_train.shape))
print("Shape of test set: {}".format(x_test.shape))
x_test

Shape of training set: (1718, 8, 1)
Shape of test set: (424, 8, 1)


array([[[176.779999],
        [172.289993],
        [174.619995],
        ...,
        [179.550003],
        [178.580002],
        [184.820007]],

       [[172.289993],
        [174.619995],
        [174.240005],
        ...,
        [178.580002],
        [184.820007],
        [176.690002]],

       [[174.619995],
        [174.240005],
        [180.940002],
        ...,
        [184.820007],
        [176.690002],
        [174.720001]],

       ...,

       [[373.01001 ],
        [380.160004],
        [384.76001 ],
        ...,
        [438.660004],
        [440.25    ],
        [455.609985]],

       [[380.160004],
        [384.76001 ],
        [425.040009],
        ...,
        [440.25    ],
        [455.609985],
        [444.450012]],

       [[384.76001 ],
        [425.040009],
        [435.75    ],
        ...,
        [455.609985],
        [444.450012],
        [450.910004]]])

In [None]:
y_test

array([176.690002, 174.720001, 168.490005, 169.600006, 168.630005,
       169.100006, 170.949997, 165.479996, 163.940002, 166.070007,
       160.889999, 156.830002, 150.729996, 146.830002, 157.169998,
       156.149994, 156.229996, 157.740005, 157.919998, 142.190002,
       148.259995, 147.929993, 150.75    , 153.309998, 153.800003,
       152.289993, 150.      , 153.070007, 154.940002, 155.860001,
       156.820007, 153.300003, 153.919998, 152.699997, 157.759995,
       156.300003, 154.679993, 165.25    , 166.440002, 166.520004,
       171.25    , 174.179993, 174.240005, 170.940002, 170.410004,
       169.429993, 170.889999, 170.179993, 170.800003, 170.419998,
       170.929993, 172.029999, 171.059998, 172.970001, 174.229996,
       174.330002, 174.869995, 173.149994, 174.970001, 175.850006,
       175.529999, 174.520004, 172.5     , 172.910004, 178.899994,
       180.910004, 181.710007, 183.729996, 186.119995, 188.020004,
       186.529999, 188.160004, 195.089996, 191.050003, 188.740

In [None]:
from tensorflow import keras
from tensorflow.keras import layers

def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [None]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(1)(x)
    return keras.Model(inputs, outputs)

In [None]:
input_shape = x_train.shape[1:]

model = build_model(
    input_shape,
    head_size=256,
    num_heads=4,
    ff_dim=4,
    num_transformer_blocks=4,
    mlp_units=[128],
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="mean_squared_error",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4)
)
#model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=10, \
    restore_best_weights=True)]

model.fit(
    x_train,
    y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=64,
    callbacks=callbacks,
)

model.evaluate(x_test, y_test, verbose=1)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200


113.86941528320312

In [None]:
from sklearn import metrics

pred = model.predict(x_test)
score = np.sqrt(metrics.mean_squared_error(pred,y_test))
print("Score (RMSE): {}".format(score))

Score (RMSE): 10.670962290152126


In [None]:
pred = model.predict(x_test)




In [None]:
test_predicted = []

for i in pred:
  test_predicted.append([i][0][0])
len(test_predicted)  

424

In [None]:
len(y[:split])

1726

In [None]:
df_pred =price_volume_df[split+SEQUENCE_SIZE:][['Date']]
df_pred = df_pred[:-1]


In [None]:
df_pred['prediction'] = test_predicted
df_pred

Unnamed: 0,Date,prediction
1734,2018-12-03,176.087189
1735,2018-12-04,176.939331
1736,2018-12-06,179.337219
1737,2018-12-07,178.951660
1738,2018-12-10,176.709320
...,...,...
2153,2020-08-04,392.870605
2154,2020-08-05,397.236084
2155,2020-08-06,410.386810
2156,2020-08-07,429.138092


In [None]:
df_pred['target'] = y_test
df_pred

Unnamed: 0,Date,prediction,target
1734,2018-12-03,176.087189,176.690002
1735,2018-12-04,176.939331,174.720001
1736,2018-12-06,179.337219,168.490005
1737,2018-12-07,178.951660,169.600006
1738,2018-12-10,176.709320,168.630005
...,...,...,...
2153,2020-08-04,392.870605,440.250000
2154,2020-08-05,397.236084,455.609985
2155,2020-08-06,410.386810,444.450012
2156,2020-08-07,429.138092,450.910004


In [None]:
def interactive_plot(df, title):
  fig = px.line(title = title)
  for i in df.columns[1:]:
    fig.add_scatter(x = df['Date'], y = df[i], name = i)
  fig.show()

In [None]:
interactive_plot(df_pred, "yash")