In [4]:
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
import datetime as dt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard
import seaborn as sns

### 訓練集&測試集

In [5]:
def train_test_split(df):
    test_samples = int(df.shape[0]*0.2)
    
    # 訓練集
    train_data = df.iloc[:-test_samples, :]
    train_set = train_data.values   # 取得train_set(array)
 
    
    # 測試集
    test_data = df.iloc[-test_samples: , :]
    test_set = test_data.values             # 取得test_set(array)
       
    return train_set, test_set

### data normalization

In [6]:
def data_normalization(input_set):
    
    # 訓練集(scaler)
    sc = StandardScaler()
    input_set_sc = sc.fit_transform(input_set[:, :])

    sc_target = StandardScaler()
    sc_target.fit_transform(input_set[:, 0:1])
    
    return input_set_sc, sc_target

#### 創造X、Y資料

In [7]:
def split_Xy(input_set_sc, n_future, n_past):
    X = []
    y = []

    for i in range(n_past, len(input_set_sc)-n_future+1): # 扣掉最後預測天數已符合y_test的length 
        X.append(input_set_sc[i-n_past:i, 1:])       # 利用前30天資料來預測後7天價格，特徵值排除banana價格本身
        y.append(input_set_sc[i:i+n_future, 0])      # 後7天價格

    X, y = np.array(X), np.array(y) 
    
    print("X_test's shape: {}".format(X.shape))
    print("y_test's shape: {}".format(y.shape))
    
    return X, y

### model biluder

In [8]:
def model_creator(n_steps, n_feature):
    model = Sequential()
    
    # 1st layer 
    # input_shape => n_step, n_feature 


    model.add(LSTM(units=64, activation="relu", input_shape=(n_steps, n_features), return_sequences=True)) 

    model.add(LSTM(units=32, activation="relu", return_sequences=False))          

    model.add(Dropout(0.2))

    model.add(Dense(y_train.shape[1]))          

    model.summary() 
     
    return model

In [9]:
def model_trained(model, X_train, y_train, epoch, batch_size):
    
    model.compile(optimizer="adam", loss="mean_squared_error")
    
    # 提升訓練效率
    es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
    rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
    mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)
    
    history = model.fit(X_train, y_train, epochs=epoch, 
                        batch_size=batch_size,
                        callbacks=[es, rlr, mcp], 
                        validation_split=0.1, 
                        verbose=1)
    # 視覺化
    plt.plot(history.history['loss'], label='Training loss')
    plt.plot(history.history['val_loss'], label='Validation loss')
    plt.legend()
    
    return model