## LSTM Model 

In [None]:
import tensorflow as tf
import sklearn 
import keras
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense,Dropout, LSTM
from sklearn.model_selection import TimeSeriesSplit

In [None]:
# get MotherDuck token from env
load_dotenv('.env')
token = os.getenv('motherduck_token')

# connect to the database
con = duckdb.connect(f"md:?motherduck_token={token}")

# define your SQL query
sql_query = "SELECT * FROM stocks_clouddb.msft_data"

# execute the query and fetch the result into a DataFrame
df = con.sql("SELECT * FROM stocks_clouddb.msft_data").fetchdf().copy()

# display the first few rows of the dataset
print("First few rows of the dataset:")
print(df.head())

In [None]:
data = df.filter(['close_price'])
# convert the dataframe to a numpy array 
dataset = data.values 

# get the number of rows to train the model on 
training_data_len = math.ceil(len(dataset)*0.8)
training_data_len

In [None]:
# scale the data 
scaler = MinMaxScaler(feature_range=(0,1))
scaled_data = scaler.fit_transform(dataset)
scaled_data

In [None]:
# create the scaled training dataset 
train_data = scaled_data[0:training_data_len, :]

# split the data into x_train and y_train dataset 
X_train=[]
y_train =[]

for i in range(60, len(train_data)):
    X_train.append(train_data[i-60:i,0])
    y_train.append(train_data[i,0])
    # set 60 as the time step 
    if i < 61: 
        print(X_train)
        print(y_train)
        print()

In [None]:
# convert the x_train and y_train to numpy arrays 
X_train, y_train = np.array(X_train), np.array(y_train)

# reshape the data 
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

In [None]:
# build the LSTM Model
model = Sequential()
model.add(LSTM(50, return_sequences=True , input_shape = (X_train.shape[1],1))) 
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

# compile model 
model.compile(optimizer='adam', loss='mse')

# model training 
history = model.fit(X_train, y_train, epochs =25, batch_size = 8, verbose =1, shuffle = False)

In [None]:
loss_per_epoch = history.history['loss']
plt.plot(range(len(loss_per_epoch)),loss_per_epoch)

plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss per Epoch')
plt.show()

In [None]:
# create the testing dataset 
# create a new array containing scaled values 
test_data = scaled_data[training_data_len -60:, :]

# create the dataset x_test and y_test 
x_test =[]
y_test = dataset[training_data_len:,:]

for i in range(60, len(test_data)):
    x_test.append(test_data[i-60:i,0])


# convert the data to a numpy array
x_test = np.array(x_test )

# reshape the data 
x_test= np.reshape(x_test, (x_test.shape[0], x_test.shape[1],1))

In [None]:
# get the model predicted price values
predictions = model.predict(x_test)
predictions = scaler.inverse_transform(predictions)

In [None]:
# get the root mean squared error (RMSE)
rmse= np.sqrt(np.mean(predictions -y_test)**2) 
rmse 

In [None]:
# plot the data 
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predictions 

# visualize the data 
plt.figure (figsize = (16,8))
plt.title('Model ')
plt.xlabel('Data', fontsize =18)
plt.ylabel('Close Price USD $', fontsize=18)

plt.plot(train['close_price'])
plt.plot(valid[['close_price', 'Predictions']])
plt.legend(['Train','Valid','Predictions'], loc='lower right')
plt.show()

In [None]:
# show the valid and predicted prices 
valid