In [None]:
import pandas as pd
from datetime import timedelta
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from matplotlib import pyplot as plt
from typing import List
import datetime
import os
from tensorflow import keras
from tensorflow.keras import callbacks

In [None]:
# function calculate the execution time
start_time = datetime.datetime.now()
def execution_time_ml():
  stop_time = datetime.datetime.now()
  temp = stop_time-start_time
  return temp

In [None]:
# pd.options.display.max_columns = None # show all columns when they are not visible
# pd.options.display.max_rows = None # show all rows when they are not visible

In [None]:
# log into Google Disk for Google Colab/ if you use file on ssd on your computer, this code removes
from google.colab import drive
drive.mount('/content/drive')

# set default folder for python
import sys
os.chdir('/content/drive/MyDrive/Colab Notebooks/')

In [None]:
df = pd.read_csv('data_set.csv', index_col='Date')
df = df.drop(['Close_W'], axis=1) # this column is used in the predict so it needs to be removed

In [None]:
def split_data(batch_size):
  # split data into train and values
  train_size = int(df.shape[0] * 0.8)

  train_df = df.iloc[:train_size]
  val_df = df.iloc[train_size + batch_size:] # create space in front of training and values so that batch_size does not go over the values
  return train_df, val_df

In [None]:
# Parameters model ML
patience = 75
# min_delta = 0.001
shuffle = True
use_scaler = True
optimizer = 'adam'
loss = 'MeanAbsolutePercentageError'
metrics='mae'
# dropout = 0.3
# lstm = 200
return_sequences = False
activation = 'relu'
dense_output = 1

In [None]:
def creation_scaler(train_df):
  scaler = StandardScaler()
  scaler.fit(train_df[["Close"]])
  return scaler

def make_dataset(
    scaler,
    df,
    window_size,
    batch_size,
    use_scaler=use_scaler,
    shuffle=shuffle
    ):
  features = df[["Close"]].iloc[:-window_size]
  if use_scaler:
    features = scaler.transform(features)
  data = np.array(features, dtype=np.float32)
  ds = tf.keras.preprocessing.timeseries_dataset_from_array(
      data=data,
      targets=df["Close"].iloc[window_size:],
      sequence_length=window_size,
      sequence_stride=1,
      shuffle=shuffle,
      batch_size=batch_size)
  return ds

In [None]:
# make compile and fit model ML
def compile_and_fit(model, train_ds, val_ds, num_epochs, callbacks):
  model.compile(
        optimizer=optimizer,
        loss=loss,
        metrics=metrics,
      )
  history = model.fit(
      train_ds,
      epochs=num_epochs,
      validation_data=val_ds,
      callbacks=callbacks,
      verbose=1
      )

  return history

In [None]:
# make layers model ML
def lstm_model(lstm, dropout, dense, use_load_model, directory_model, len_train_df):
  if use_load_model == True: # you can use load model ML from file
    model = tf.keras.models.load_model(directory_model) # you can use checpoint model for example training_step_1/cp-0024.ckpt
  else:
    model = tf.keras.models.Sequential([
      tf.keras.layers.LSTM(lstm, return_sequences=return_sequences),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dropout(dropout),
      tf.keras.layers.Dense(dense, activation=activation, input_shape=[len_train_df-2]), # (len(train_df.columns)-2) calculate amount columns in train data_set
      tf.keras.layers.Dropout(dropout),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(dense, activation=activation),
      tf.keras.layers.Dropout(dropout),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(dense, activation=activation),
      tf.keras.layers.Dropout(dropout),
      tf.keras.layers.BatchNormalization(),
      tf.keras.layers.Dense(dense_output),
    ])
  return model

In [None]:
# function early stopping ML on overfitting
def early_stopping_ml(min_delta):
  early_stopping = callbacks.EarlyStopping(
    patience=patience,
    min_delta=min_delta,
    restore_best_weights=True,
  )
  return early_stopping

In [None]:
# function saving ML results to a file
from csv import writer
import os
def append_list_as_row(file_name, list_of_elem, header):
    with open(file_name, 'a+', newline='') as write_obj:
        list_of_elem.to_csv(write_obj, mode='a', header=header)
        write_obj.close()

def saving_results_ml(result, parametr_save_result, directory_result):
  if parametr_save_result == True:
    if os.path.isfile(directory_result) == True: # checking there is a file in the folder
      append_list_as_row(directory_result, result, header=False) # the parameters header enables or disables the output of the header in the file
    else:
      append_list_as_row(directory_result, result, header=True)

In [None]:
# function saving full model ML to a file
def saving_full_model(parametr_save_model, model_ml, directory_model):
  if parametr_save_model == True:
    model_ml.save(directory_model)

In [None]:
def predict_calculate(lstm,
                      dropout,
                      dense,
                      batch_size,
                      min_delta,
                      parametr_save_model,
                      parametr_save_result,
                      epochs,
                      checkpoint_use,
                      use_load_model,
                      directory_model,
                      directory_result,
                      ):
  train_df, val_df = split_data(batch_size) # split data on train_df and val_df
  len_train_df = len(train_df.columns) # calculate amount columns in train data_set
  
  model_ml = lstm_model(lstm, dropout, dense, use_load_model, directory_model, len_train_df)    
  window_size = batch_size + 2
  
  if checkpoint_use == True: # function use checpoint model (step by step) in training ML
    checkpoint_path = "training_step_2/cp-{epoch:04d}.ckpt"
    checkpoint_dir = os.path.dirname(checkpoint_path)
    callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_path, 
    verbose=1,
    )
  else:
    callback = early_stopping_ml(min_delta) # function early stopping ML on overfitting

  scaler = creation_scaler(train_df)
  train_ds = make_dataset(scaler,
                          df=train_df,
                          window_size=window_size,
                          batch_size=batch_size,
                          use_scaler=use_scaler,
                          shuffle=shuffle,
                          )
  val_ds = make_dataset(scaler,
                        df=val_df,
                        window_size=window_size,
                        batch_size=batch_size,
                        use_scaler=use_scaler,
                        shuffle=shuffle,
                        )    
  history =  compile_and_fit(model_ml, train_ds, val_ds, num_epochs=epochs, callbacks=callback)

  history_df = pd.DataFrame(history.history)
  history_df.loc[:, ['loss', 'val_loss']].plot()
  print()
  print("Minimum Loss: {:0.4f}".format(history_df['loss'].min()))
  print("Minimum Validation Loss: {:0.4f}".format(history_df['val_loss'].min()))

  metrics_names = metrics
  metrics_names_val = 'val_' + metrics
  history_df.loc[:, [metrics_names, metrics_names_val]].plot() # select values from history columns
  print()
  print("Minimum metrics: {:0.4f}".format(history_df[metrics_names].min()))
  print("Minimum Validation metrics: {:0.4f}".format(history_df[metrics_names_val].min()))



  # function execution_time_ml() calculate the execution time code
  temp = execution_time_ml()
  print('Time to complete (h:m):', ':'.join(str(temp).split(':')[:2])) # show time in format h:m

  # DataFrame creation with evaluation of results ML
  result = pd.DataFrame({'Minimum_Loss': [history_df['loss'].min()],
    'Minimum_Validation_Loss': [history_df['val_loss'].min()],
    'Batch_size': [batch_size],
    'Window_size': [window_size],
    'Epochs': [epochs],
    'Patience': [patience],
    'Min_delta': [min_delta],                       
    'Shuffle': [shuffle],
    'Use_scaler': [use_scaler],
    'Optimizer': [optimizer],
    'Loss': [loss],
    'Dropout': [dropout],
    'LSTM': [lstm],
    'Return_sequences': [return_sequences],
    'Activation': [activation],
    'Dense': [dense],
    'Dense_output': [dense_output],
    'Time_to_complete': [temp],
    })

  saving_results_ml(result, parametr_save_result, directory_result) # function saving ML results to a file

  saving_full_model(parametr_save_model, model_ml, directory_model) # function saving full model ML to a file
  