In [None]:
import os
import urllib.request
from datetime import datetime, timezone, timedelta
import pandas as pd 
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM, GRU, TimeDistributed, RepeatVector, Lambda, Bidirectional
from keras.layers.normalization import BatchNormalization
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential, load_model, clone_model
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from keras.backend import clear_session
import time
import csv
from sklearn.externals import joblib

%matplotlib inline

In [None]:
# 所有城市對照英文代碼
city = {
    '基隆市':'KLU',
    '臺北市':'TPE',
    '新北市':'TPH',
    '桃園市':'TYC',
    '新竹市':'HSC',
    '新竹縣':'HSH',
    '苗栗縣':'MAL',
    '臺中市':'TXG',
    '彰化縣':'CWH',
    '南投縣':'NTO',
    '雲林縣':'YLH',
    '嘉義市':'CYI',
    '嘉義縣':'CHY',
    '臺南市':'TNN',
    '高雄市':'KHH',
    '屏東縣':'IUH',
    '宜蘭縣':'ILN',
    '花蓮縣':'HWA',
    '臺東縣':'TTT'
}

# 資料與模型參數確認

In [None]:
# 農作物
crop_dict = {
    1 : ['cabbage', '高麗菜', '(LA1 甘藍 初秋)'],
    2 : ['carrot', '胡蘿蔔', '(SB2 胡蘿蔔 清洗)'],
    3 : ['beeftomato', '牛番茄', '(FJ3 番茄 牛蕃茄)'],
    4 : ['cucumber', '胡瓜', '(FD1 花胡瓜)'],
    5 : ['loofah', '絲瓜', '(FF1 絲瓜)'],
    6 : ['cabbage2', '包心白菜', '(LC1 包心白 包白)'],
    7 : ['shallots', '青蔥', '(SE6 青蔥 粉蔥)'],
    8 : ['bittergourd', '苦瓜', '(FG1 苦瓜 白大米)'],
    9 : ['onion', '洋蔥', '(SD1 洋蔥 本產)'],
    10 : ['waterspinach', '空心菜', '(LF2 蕹菜 小葉)'],

    11 : ['guava', '番石榴', '(P1 番石榴 珍珠芭)'],
    12 : ['pineapple', '鳳梨', '(B2 鳳梨 金鑽鳳梨)'],
    13 : ['papaya', '木瓜', '(I1 木瓜 網室紅肉)'],
    14 : ['watermelon', '西瓜', '(T1 西瓜 大西瓜)'],
    15 : ['banana', '香蕉', '(A1 香蕉)'],
    16 : ['apple', '蘋果', '(X69 蘋果 富士進口)'],
    17 : ['pear', '梨子', '(O4 梨 新興梨)'],
    18 : ['grape', '葡萄', '(S1 葡萄 巨峰)'],
    19 : ['dragonfruit', '火龍果', '(812 火龍果 紅肉)'],
    20 : ['mango', '芒果', '(R1 芒果 愛文)'],
    21 : ['pakchoy', '青江菜', '(LD1 青江白菜 小梗)'], # 此處與colab有變動
    22 : ['cauliflower', '花椰菜', '(FB11 花椰菜 青梗 留梗炳)'],
    23 : ['lemon', '檸檬', '(F1 雜柑 檸檬)'],
    24 : ['tomato', '小番茄', '(74 小番茄 玉女)'], # 此處與colab有變動
}


In [None]:
# 市場選定
market_dict = {
    1 : '台北一'
}

1. 時間設定
2. 資料區間
3. 模型相關選擇
4. 作圖
5. 儲存

In [None]:
# 1. 
# 取得現在時間(TP) 並設定為 +8 時區
time_now = datetime.now(timezone(timedelta(hours=+8))).isoformat(timespec="seconds")[5:16].replace('-', '', 1).replace('T', '-')

# 2.
# 資料區間
# 選擇農產品，(13-24為水果)
crop_no = 17
dataset_path = '/content/drive/MyDrive/Ai團專_農時_共享資料夾/Dataset_資料集/模型訓練之資料集/fruit model vseion/model_dataset_'
print(dataset_path + crop_dict[crop_no][1] + '.csv')
# 選擇市場，目前只有1
market_no = 1
# 是否要刪除價格空值
price_na_del = True
ohe = True

# 是否要加入天氣資料
# add_weather_data = True
add_weather_data = False
# # 是否要加入颱風資料
add_typhoon_data = True
# add_typhoon_data = False
# 訂定訓練資料的期間、測試資料的期間
train_start_date = '2013-01-02'
train_end_date = '2020-05-31'
test_start_date = '2020-06-01'
test_end_date = '2021-06-18'
# 設定往前以及往後看的天數, 若many to many, past_day, future_day要設定一樣的值
pastDay = 10
futureDay = 7

# 3.
# 使用哪一個模型，目前有1-3
model_no = 3
repeat_train = True
# 模型每層參數 數量
LSTM_unit_1 = 10
LSTM_unit_2 = ''
# LSTM_unit_1 = 256
# LSTM_unit_2 = 16
# LSTM_unit_3 = 64
# 其它參數
batch_size = 30
# epochs = 1000
epochs = 500
validation_split = 0.1
patience = 50

# 4.
# 作圖
# 畫多少天的預測圖，要小於或等於上面的數字
plotDay = 1
# 畫到數一共幾天的預測圖
pic_days = 300

# 5.
# 是否要存入google drive
save_google = True
# save_route = '/content/drive/MyDrive/Ai團專_農時_共享資料夾/模型與成果/'
save_google_dir = '/content/drive/MyDrive/AI_project/result/'
dev_notes = ''

/content/drive/MyDrive/Ai團專_農時_共享資料夾/Dataset_資料集/模型訓練之資料集/fruit model vseion/model_dataset_梨子.csv


# 版本介紹與環境

價格預測版本s6，
此版本為 最終版
以市場為核心，選擇性與颱風氣候資料合併
訓練過程自動抓取五次訓練中的最佳結果進行儲存

重要資訊簡介:
1. 氣候資料為?日 vs 1天價格
2. 進行shift
3. 2000.01.02 or 2013.01.02開始
4. 所有參數數據及重要模型參數將自動儲存


In [None]:
from google.colab import drive
if save_google: drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
def download_data():
    # 檔案下載url   
    weather_data_url = 'https://github.com/Yi-Wei-Lin/Tibame_AI_Project/raw/main/userdata/amoswu/dataset/reportdaily_mean_fillna.csv'
    typhoon_data_url = 'https://github.com/Yi-Wei-Lin/Tibame_AI_Project/raw/main/userdata/amoswu/dataset/TyphoonDatabase.csv'
    price_data_url = 'https://github.com/Yi-Wei-Lin/Tibame_AI_Project/raw/main/userdata/lynnbai/dataset/Banana.csv'
    # 將檔案下載至colab
    if not os.path.exists('weather.csv'): urllib.request.urlretrieve(weather_data_url, 'weather.csv') 
    if not os.path.exists('typhoon.csv'): urllib.request.urlretrieve(typhoon_data_url, 'typhoon.csv') 
    if not os.path.exists('banana.csv'): urllib.request.urlretrieve(price_data_url, 'banana.csv')
download_data()

In [None]:
# for i in city_drop_list:
#     city_drop_columns.append([s for s in list(df_date.columns) if s.__contains__(i)])
# city_drop_columns = list(np.array(city_drop_columns).reshape(-1))

# dataset_path = '/content/drive/MyDrive/Ai團專_農時_共享資料夾/Dataset_資料集/模型訓練之資料集/fruit model vseion/model_dataset_木瓜.csv'
# dataset_path = '/content/drive/MyDrive/Ai團專_農時_共享資料夾/Dataset_資料集/模型訓練之資料集/fruit model vseion/model_dataset_'
# asdf = pd.read_csv(dataset_path + crop_dict[crop_no+1][1] + '.csv', encoding='utf-8')
# asdf
# crop_dict[13][1]


# 資料預處理 - 天氣

In [None]:
# 要移除的欄位列表
# weather columb全部列表: 'date', 'city', 'StnPres', 'SeaPres', 'StnPresMax', 'StnPresMaxTime', 'StnPresMin', 'StnPresMinTime', 'Temperature', 'TMax', 'TMaxTime', 'TMin', 'TMinTime', 'TdDewPoint', 'RH', 'RHMin', 'RHMinTime', 'WS', 'WD', 'WSGust', 'WDGust', 'WGustTime', 'Precp', 'PrecpHour', 'PrecpMax10', 'PrecpMax10Time', 'PrecpMax60', 'PrecpMax60Time', 'SunShine', 'SunShineRate', 'GloblRad', 'VisbMean', 'EvapA', 'UVIMax', 'UVIMaxTime', 'CloudAmount'
weather_drop_columns = [
              'StnPres', 'SeaPres', 'StnPresMax', 'StnPresMaxTime', 
              'StnPresMin', 'StnPresMinTime', 'RHMin',  'WSGust', 
              'GloblRad', 'VisbMean', 'UVIMax', 'UVIMaxTime', 'CloudAmount'
]

In [None]:
df = pd.read_csv('weather.csv', encoding='utf-8')
df = df.drop(weather_drop_columns, axis=1)
df.head(3)

Unnamed: 0,date,city,Temperature,TMax,TMaxTime,TMin,TMinTime,TdDewPoint,RH,RHMinTime,WS,WD,WDGust,WGustTime,Precp,PrecpHour,PrecpMax10,PrecpMax10Time,PrecpMax60,PrecpMax60Time,SunShine,SunShineRate,EvapA
0,2000-01-01,南投縣,9.0,14.3,2000-01-01 13:53:00,6.5,2000-01-01 19:22:00,1.1,60.0,2000-01-01 12:46:00,7.5,301.0,262.5,2000-01-01 17:41:00,0.0,0.0,0.0,,0.0,,9.8,91.5,2.8
1,2000-01-01,嘉義市,18.9,26.0,2000-01-01 13:37:00,13.4,2000-01-01 23:44:00,14.3,77.0,2000-01-01 15:53:00,2.0,20.0,30.0,2000-01-01 11:57:00,0.0,0.0,0.0,,0.0,,7.6,71.2,2.8
2,2000-01-01,嘉義縣,14.0,14.9,2000-01-01 11:45:00,3.7,2000-01-01 21:50:00,-3.6,44.0,2000-01-01 01:25:00,1.8,300.0,90.0,2000-01-01 06:18:00,0.0,0.0,0.0,,0.0,,9.2,96.5,2.1


In [None]:
# 使用index做merge，將df表格依日期拉平
df_date = df['date'].drop_duplicates().to_frame().set_index('date')

for cityname, citycode in city.items():
    df_city = df.loc[df['city'] == cityname].add_suffix('_' + citycode).set_index('date' + '_' + citycode)
    df_date = pd.merge(df_date, df_city, how='left', left_index = True, right_index = True)
# 將城市名稱欄位移除
df_date = df_date[df_date.columns.drop(list(df_date.filter(regex='city')))]
df_weather = df_date
df_weather.head(3)

Unnamed: 0_level_0,Temperature_KLU,TMax_KLU,TMaxTime_KLU,TMin_KLU,TMinTime_KLU,TdDewPoint_KLU,RH_KLU,RHMinTime_KLU,WS_KLU,WD_KLU,WDGust_KLU,WGustTime_KLU,Precp_KLU,PrecpHour_KLU,PrecpMax10_KLU,PrecpMax10Time_KLU,PrecpMax60_KLU,PrecpMax60Time_KLU,SunShine_KLU,SunShineRate_KLU,EvapA_KLU,Temperature_TPE,TMax_TPE,TMaxTime_TPE,TMin_TPE,TMinTime_TPE,TdDewPoint_TPE,RH_TPE,RHMinTime_TPE,WS_TPE,WD_TPE,WDGust_TPE,WGustTime_TPE,Precp_TPE,PrecpHour_TPE,PrecpMax10_TPE,PrecpMax10Time_TPE,PrecpMax60_TPE,PrecpMax60Time_TPE,SunShine_TPE,...,TMaxTime_HWA,TMin_HWA,TMinTime_HWA,TdDewPoint_HWA,RH_HWA,RHMinTime_HWA,WS_HWA,WD_HWA,WDGust_HWA,WGustTime_HWA,Precp_HWA,PrecpHour_HWA,PrecpMax10_HWA,PrecpMax10Time_HWA,PrecpMax60_HWA,PrecpMax60Time_HWA,SunShine_HWA,SunShineRate_HWA,EvapA_HWA,Temperature_TTT,TMax_TTT,TMaxTime_TTT,TMin_TTT,TMinTime_TTT,TdDewPoint_TTT,RH_TTT,RHMinTime_TTT,WS_TTT,WD_TTT,WDGust_TTT,WGustTime_TTT,Precp_TTT,PrecpHour_TTT,PrecpMax10_TTT,PrecpMax10Time_TTT,PrecpMax60_TTT,PrecpMax60Time_TTT,SunShine_TTT,SunShineRate_TTT,EvapA_TTT
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
2000-01-01,19.3,22.6,2000-01-01 12:28:30,16.9,2000-01-01 22:33:30,14.3,73.5,2000-01-01 19:56:00,3.8,280.0,180.0,2000-01-01 12:36:30,2.2,0.4,1.9,2000-01-01 03:00:00,2.2,2000-01-01 02:57:00,3.8,36.6,2.0,17.5,21.1,2000-01-01 14:05:44,14.3,2000-01-01 22:33:55,11.9,78.7,2000-01-01 19:43:00,1.2,230.9,197.3,2000-01-01 11:02:27,0.0,0.0,0.0,,0.0,,2.7,...,2000-01-01 12:10:40,13.5,2000-01-01 02:05:50,16.4,74.0,2000-01-01 10:56:00,0.8,221.7,162.7,2000-01-01 10:54:40,0.5,0.0,0.0,,0.0,,4.1,37.0,1.2,19.2,25.0,2000-01-01 11:34:18,17.7,2000-01-01 05:33:12,16.9,74.8,2000-01-01 08:28:30,1.8,119.2,75.2,2000-01-01 13:10:42,0.0,0.0,0.1,2000-01-01 01:45:00,0.1,2000-01-01 01:45:00,3.1,28.9,2.9
2000-01-02,18.6,21.7,2000-01-02 11:51:30,15.2,2000-01-02 15:26:00,11.4,64.5,2000-01-02 12:48:30,4.7,130.0,85.0,2000-01-02 20:07:00,0.0,0.0,0.0,,0.0,,8.5,79.8,2.6,16.8,22.6,2000-01-02 13:02:11,12.1,2000-01-02 08:10:22,9.0,68.7,2000-01-02 04:59:40,1.9,98.3,93.7,2000-01-02 20:01:05,0.0,0.0,0.0,,0.0,,7.6,...,2000-01-02 11:30:51,14.6,2000-01-02 11:12:43,13.7,62.0,2000-01-02 11:45:00,1.1,215.7,139.9,2000-01-02 11:21:43,0.3,0.0,0.0,,0.0,,2.6,23.0,1.5,17.9,22.1,2000-01-02 11:34:11,16.5,2000-01-02 11:06:44,14.1,64.0,2000-01-02 13:42:15,2.8,119.8,89.7,2000-01-02 14:44:27,0.0,0.0,0.0,,0.0,,4.2,38.9,3.6
2000-01-03,18.7,20.8,2000-01-03 13:12:00,16.3,2000-01-03 13:59:30,10.0,58.0,2000-01-03 12:57:00,3.9,60.0,85.0,2000-01-03 03:55:30,0.0,0.0,0.0,,0.0,,5.0,47.4,3.0,17.6,20.8,2000-01-03 12:44:49,15.3,2000-01-03 05:40:11,10.7,76.7,2000-01-03 13:20:40,2.3,79.2,89.0,2000-01-03 10:31:44,0.0,0.0,0.0,,0.0,,4.6,...,2000-01-03 11:25:43,12.9,2000-01-03 09:30:09,13.5,67.0,2000-01-03 13:55:00,0.8,210.9,172.4,2000-01-03 13:15:09,0.1,0.0,0.0,,0.0,,1.8,16.7,2.5,16.9,22.2,2000-01-03 12:24:00,16.2,2000-01-03 09:16:12,13.5,64.2,2000-01-03 11:48:45,2.2,101.1,66.7,2000-01-03 05:31:42,0.0,0.0,0.0,,0.0,,5.2,47.8,3.2


In [None]:
# 城市drop名單
# city_drop = ['KLU', 'TPE', 'TPH', 'TYC', 'HSC', 'HSH', 'MAL', 'TXG', 'CWH', 
#         'NTO', 'YLH', 'CYI', 'CHY', 'TNN', 'KHH', 'IUH', 'ILN', 'HWA', 'TTT']
city_drop_list = ['KLU', 'TPH', 'TYC', 'HSC', 'HSH', 'MAL', 'TXG', 'CYI']
city_drop_columns = []
for i in city_drop_list:
    city_drop_columns.append([s for s in list(df_date.columns) if s.__contains__(i)])
city_drop_columns = list(np.array(city_drop_columns).reshape(-1))

# 資料預處理 - 颱風

In [None]:
# 計算兩個日期間隔多少天
def daysBetweenDate(startdate: str, enddate: str) -> int:
    startdate = datetime.strptime(startdate, "%Y-%m-%d")
    enddate = datetime.strptime(enddate, "%Y-%m-%d")
    days = (enddate - startdate).days + 1
    return days

# 日期調整
def dateShift(startdate: str, shiftday: int) -> str:
    startdate = datetime.strptime(startdate, "%Y-%m-%d")
    targetdate = startdate + timedelta(days=shiftday)
    return datetime.strftime(targetdate, "%Y-%m-%d")

# 讀取颱風資料庫
df_typhoon = pd.read_csv('typhoon.csv', encoding='utf-8')

# 將Warning的日期文字轉為4個欄位'startdate','starttime','enddate','endtime'
df_typhoon[['startdate','starttime','enddate','endtime']] = df_typhoon['Warning'].str.split().tolist()
# 將最前面塞入date欄位
df_typhoon_new = pd.DataFrame(columns=df_typhoon.columns.insert(0, 'date'))

# 將所有颱風按日期列出
# 使用iterrows
start_time = time.time()
for index, row in df_typhoon.iterrows():
    days = daysBetweenDate(row['startdate'], row['enddate'])
    for day in range(0, days):
        date = dateShift(row['startdate'],day)
        datesr1 = pd.Series(date).append(df_typhoon.iloc[index]).rename({0: 'date'})
        df_typhoon_new = df_typhoon_new.append(datesr1, ignore_index=True)

# 將相同日期的去除並暫時只留WarnMark欄位
df_typhoon_wm = pd.DataFrame(df_typhoon_new, columns=['date'])
df_typhoon_wm['WarnMark'] = 1
df_typhoon_wm = df_typhoon_wm.drop_duplicates().reset_index().drop(columns=['index'])

df_typhoon = df_typhoon_wm.set_index('date')

print(df_typhoon.head(3))


# 資料預處理 - 市場
1. 選定 台北一 市場
2. 補空值

In [None]:
# 要移除的欄位列表
# market columns 全部列表: 'Date', 'Market', 'Product', 'Up_price', 'Mid_price', 'Low_price', 'Avg_price', 'Volume', 'Month', 'Week_day', 'Year', 'Rest_day'
market_drop_columns = [
              'Product',
              # 'Month', 
              # 'Week_day', 
              'Year', 
              'Rest_day'
]

In [None]:
# 讀取農產品資料
# if not os.path.exists('banana.csv'): urllib.request.urlretrieve(price_data_url, 'banana.csv')

df = pd.read_csv(dataset_path + crop_dict[crop_no][1] + '.csv', encoding='utf-8')

# csv_name = crop_dict[crop_no][0] + '.csv'
# df = pd.read_csv(csv_name, encoding='utf-8')
# 移除不需要的欄位
df = df.drop(market_drop_columns, axis=1)
print(df.head(3))
print(df.tail(3))

In [None]:
# 將休市價格填入前後日之平均價格(暫不使用)
def fillna_fb_mean(self):
    df_f = self.fillna(method='ffill')
    df_b = self.fillna(method='bfill')
    df_fb = (df_f+df_b)/2
    return df_fb

# price_na_del = False
# 去除價格空值者
if price_na_del:
    df = df
else:
    # 將休市價格填入前一日價格
    df = df.fillna(method="ffill")
    
# 只拿出指定市場的資料
df = df[df.Market == market_dict[market_no]]
# 去除空值
df = df.dropna()
df_crop = df.reset_index().drop(['index'], axis=1)
print(df_crop)

In [None]:
# data_dum = pd.get_dummies(data)
# pd.DataFrame(data_dum)
from sklearn.preprocessing import OneHotEncoder
onehotencoder = OneHotEncoder()
df_crop_month_ohe = onehotencoder.fit_transform(df_crop[["Month"]]).toarray()
month = pd.DataFrame(df_crop_month_ohe)
for i in month:
    new = int(i) + 1
    new = str(new)
    month = month.rename(columns={i:new})
month = month.add_prefix("Month_")
# df.loc[df['city'] == '基隆市'].add_suffix('_' + 'KLU')
# df.rename(columns={'舊欄位名稱': '新欄位名稱'}) .add_prefix("Month_")
onehotencoder = OneHotEncoder()
df_crop_week_ohe = onehotencoder.fit_transform(df_crop[["Week_day"]]).toarray()
week = pd.DataFrame(df_crop_week_ohe)
for i in week:
    new = int(i) + 1
    new = str(new)
    week = week.rename(columns={i:new})
week = week.add_prefix("Week_day_")

df_crop = df_crop.join(month, how="left")
df_crop = df_crop.join(week, how="left")
df_crop = df_crop.drop(['Month', 'Week_day'], axis=1).rename(columns={'Date': 'date'}).set_index('date')

In [None]:
df_crop

# 資料預處理 - 氣象(天氣與颱風)
1. 決定 市場 是否將合併 天氣與颱風
2. 選出 與模型無關者排除
3. 確認無空值


df_all 資料合併之參數

In [None]:
df_all = df_crop
# 是否要合併天氣資料
if add_weather_data:
    df_all = pd.merge(df_all, df_weather, how='inner', left_index = True, right_index = True)
# 是否要合併颱風資料
if add_typhoon_data:
    df_all = pd.merge(df_all, df_typhoon, how='left', left_index = True, right_index = True).fillna(0)

# 把平均價格移到最後1欄
col_Avg_price = df_all.pop('Avg_price')
df_all = pd.concat([df_all, col_Avg_price], 1)
print(df_all.head(3))
print(df_all.tail(3))

Dataset 空值數量確認

In [None]:
# 確認無空值
def dataset_na_check():
    market_tp1_na_count = df_crop.isna().sum()
    weather_na_count = df_weather.isna().sum()

    total_na_count = list() # 氣象空值查找
    for i in range(len(weather_na_count.index)):
        if weather_na_count.values[i] != 0 :
            total_na_count.append([weather_na_count.index[i], weather_na_count.values[i]])

    print(market_tp1_na_count)
    print('--------------------')
    print(weather_na_count)
    print('--------------------')
    print(len(total_na_count))
dataset_na_check()

資料Batch化函式 \
1. buildX
2. buildY

# 模型輸入值建立

In [None]:
# 將資料整理為x
def buildX(train, pastDay=30, futureDay=5):
    x = []
    for i in range(train.shape[0] - futureDay - pastDay):
        x.append(train[i : i+pastDay])
    return np.array(x)

# 將資料整理為y
def buildY(test, pastDay=30, futureDay=5):
    y = []
    for i in range(test.shape[0] - futureDay - pastDay):
        y.append(test[i+pastDay+futureDay : i+pastDay+futureDay+1, -1])
    return np.array(y)

In [None]:
# pdy = 30
# fdy = 5
# x = []
# y = []
# for i in range(40 - fdy - pdy):
#     x.append(i+pdy-1)
# for i in range(40 - fdy - pdy):
#     y.append(i + pdy + fdy)
# x, y

依照訓練、測試的期間來切分資料 \
df_train 與 df_test

In [None]:
# 將資料複製一份來作業, 將欄位index改為date
df = df_all.copy()
df = df.reset_index().rename(columns={'index': 'date'})
df

In [None]:

# 依訓練資料的期間、測試資料的期間來切分資料
df_train = df.iloc[df[(train_start_date <= df.date) & (df.date <= train_end_date)].index].set_index('date')
df_test = df.iloc[df[(test_start_date <= df.date) & (df.date <= test_end_date)].index].set_index('date')

# 將非數字的欄位移除
df_train = df_train.select_dtypes(exclude=['object'])
df_test = df_test.select_dtypes(exclude=['object'])

print(df_train.shape)
print(df_test.shape)
print(df_train.dtypes)

# print(df_train.isna().sum().sum())
# print(df_test.isna().sum().sum())

x_train, x_test 為  MinMaxScaler後之資料 \\
y_train, y_test 為  MinMaxScaler後之價格資料


In [None]:
gg = df_test.values
X = []
for i in range(len(df_test)):
	X.append(gg[i])
X = np.array(X)
X, df_test.values

In [None]:
# train 正則化
df_train_scaled = df_train.values
xx_scale = MinMaxScaler()
x_train = buildX(xx_scale.fit_transform(df_train_scaled), pastDay, futureDay)
print('x_train.shape', x_train.shape)

yy_scale = MinMaxScaler()
y_train_fitted_data = yy_scale.fit_transform(df_train_scaled[:, -1].reshape(-1, 1))
y_train = buildY(y_train_fitted_data, pastDay, futureDay)
print('y_train.shape: ', y_train.shape)

# test 正則化
df_test_scaled = df_test.values
x_test = buildX(xx_scale.fit_transform(df_test_scaled), pastDay, futureDay)
print('x_test.shape: ', x_test.shape)

y_test = buildY(yy_scale.fit_transform(df_test_scaled[:, -1].reshape(-1, 1)), pastDay, futureDay)
print('y_test.shape: ', y_test.shape)

# 模型選擇

In [None]:
# 模型1 (純雙層LSTM)
def buildManyToOneModel01(shape):
    model = Sequential()
    # model.add(GRU(units=256,
    #     return_sequences=False,
    #     input_shape=(shape[1], shape[2])))
    
    model.add(LSTM(units=LSTM_unit_1,
        return_sequences=True,
        input_shape=(shape[1], shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(units=LSTM_unit_2, return_sequences=False,))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mse', optimizer='adam')
    model.summary()
    return model

In [None]:
# 模型2 (LSTM 搭配Biderectional)
def buildManyToOneModel02(shape):
    model = Sequential()
    # model.add(GRU(units=256,
    #     return_sequences=False,
    #     input_shape=(shape[1], shape[2])))
    
    # model.add(Bidirectional(LSTM(units=LSTM_unit_1,
    #     return_sequences=True,
    #     input_shape=(shape[1], shape[2])
    #     )))
    model.add(LSTM(units=LSTM_unit_1,
        return_sequences=True,
        input_shape=(shape[1], shape[2])
        ))
    model.add(Dropout(0.2))
    # model.add(LSTM(units=LSTM_unit_2, 
    #             return_sequences=False, 
    #             input_shape=(shape[1], shape[2]),
    #             go_backwards=True
    #             ))
    model.add(Bidirectional(LSTM(units=LSTM_unit_2, 
                return_sequences=False, 
                input_shape=(shape[1], shape[2]),
                go_backwards=True
                )))
    model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mse', optimizer='adam')
    model.build((None, shape[1], shape[2]))
    model.summary()
    return model

In [None]:
# 模型3
def buildManyToOneModel03(shape):
    model = Sequential()
    model.add(LSTM(LSTM_unit_1, return_sequences=False, input_shape=(shape[1], shape[2])))
    # model.add(Dropout(0.2))
    # model.add(LSTM(LSTM_unit_2, return_sequences=True))
    # model.add(Dropout(0.2))
    # model.add(LSTM(LSTM_unit_3, return_sequences=True))
    # model.add(Dropout(0.2))
    model.add(Dense(1, activation='sigmoid'))
    # model.add(TimeDistributed(Dense(1)))
    # layers = [
    #     LSTM(units=LSTM_unit_1, return_sequences=False, input_shape=(shape[1], shape[2])),
    #     # Dense(units=128, activation="relu"),
    #     # Dense(units=10, activation="relu"),
    #     Dense(units=1, activation='sigmoid')]
    # model = Sequential(layers)
    model.compile(loss="mse", optimizer="adam", metrics=["mse"])
    model.summary()
    return model



# 模型訓練

In [None]:
# 模型字典: function name, 說明, 是否shift day, 是否只輸出1天
model_dict = {
    1 : [buildManyToOneModel01, 'LSTM many to one', True, True],
    2 : [buildManyToOneModel02, 'LSTM & Biderectional many to one', True, True],
    3 : [buildManyToOneModel03, 'One LSTM to one', True, True],
    # 4 : [buildManyToOneModel4, 'LSTM many to one', True, True],
    5 : ['', ''],
}

In [None]:
# 模型訓練
if model_no == 1:
    model = buildManyToOneModel01(x_train.shape)
    print('model1')
    model_name = 'buildManyToOneModel01'
elif model_no == 2:
    model = buildManyToOneModel02(x_train.shape)
    print('model2')
    model_name = 'buildManyToOneModel02'
elif model_no == 3:
    model = buildManyToOneModel03(x_train.shape)
    model_name = 'buildManyToOneModel03'
    if repeat_train:
        model_repeat1 = clone_model(model)
        model_repeat2 = clone_model(model)
        model_repeat3 = clone_model(model)
        model_repeat4 = clone_model(model)
        model_repeat5 = clone_model(model)
    print('model3')
else:
    print('選錯model了')

callback = EarlyStopping(monitor="val_loss", patience=patience, verbose=1, mode="auto")


# 模型相關函式建立

In [None]:
def model_load(h5_name):
    model = load_model(f'{save_google_dir}h5/{h5_name}')
    print("MODEL-LOADED")
    return model
# def model_fit():
#     model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])
#     return model


In [None]:
# scaler 存檔
def scaler_save():
    # joblib.dump(xx_scale, save_google_dir + 'scaler/' + f'{crop_dict[crop_no][0]}_{time_now}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}' + 'X_scaler.model')
    # joblib.dump(yy_scale, save_google_dir + 'scaler/' + f'{crop_dict[crop_no][0]}_{time_now}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}' + 'Y_scaler.model')
    joblib.dump(xx_scale, save_google_dir + 'scaler/' + f'{time_now}_{crop_no}-{crop_dict[crop_no][0]}_D{futureDay}_M23_' + 'X_scaler.model')
    joblib.dump(yy_scale, save_google_dir + 'scaler/' + f'{time_now}_{crop_no}-{crop_dict[crop_no][0]}_D{futureDay}_M23_' + 'Y_scaler.model')
# scaler模型獲取
def scaler_load(scaler_time):
    # xx_scale = joblib.load(save_google_dir + 'scaler/' + f'{crop_dict[crop_no][0]}_{scaler_time}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}' + 'X_scaler.model')
    # yy_scale = joblib.load(save_google_dir + 'scaler/' + f'{crop_dict[crop_no][0]}_{scaler_time}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}' + 'Y_scaler.model')
    xx_scale = joblib.load(save_google_dir + 'scaler/' + f'{crop_no}-{crop_dict[crop_no][0]}_D{futureDay}_M23_' + 'X_scaler.model')
    yy_scale = joblib.load(save_google_dir + 'scaler/' + f'{crop_no}-{crop_dict[crop_no][0]}_D{futureDay}_M23_' + 'Y_scaler.model')
    return xx_scale, yy_scale


In [None]:
def model_save(model):
    # 儲存模型
    # if RMSE_dict[RMSE_min][2] == 6:
    # if RMSE_dict[RMSE_esb][2] == 6:
    #     print('123')
    #     for model in RMSE_dict[RMSE_min][1]:
    #         model.save((time_now) + f'_{}' + '.h5')
    #         if save_google: model.save(save_google_dir + 'h5/' + (time_now) + '.h5')   
    # else:
        # model.save((time_now) + '.h5')
        # if save_google: model.save(save_google_dir + 'h5/' + (time_now) + '.h5')
    model.save((time_now) + '.h5')
    # if save_google: model.save(save_google_dir + 'h5/' + f'{crop_dict[crop_no][0]}_{time_now}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}.h5')
    if save_google: model.save(save_google_dir + 'h5/' + f'{time_now}_{crop_no}-{crop_dict[crop_no][0]}_D{futureDay}_M23.h5')
    print('MODEL-SAVED')


fit()用於訓練具有給定輸入的模型。

predict()用於實際預測。它爲輸入樣本生成輸出預測。

evaluate()用於評估已經過訓練的模型。返回模型的損失值&指標值。

In [None]:
def model_cal(model):
    score = model.evaluate(x_test, y_test)
    print('Score: {}'.format(score))
    y_pre = model.predict(x_test)
    print('y_pre.shape:', y_pre.shape, 'y_test.shape:', y_test.shape)
    return y_pre

# 模型產出、處理、作圖與儲存

預測結果與實際結果的 數值迴轉 \
y_pre  --> pre_price \
y_test --> test_price

In [None]:
def price_inverse(y_pre):
    pre_price = yy_scale.inverse_transform(y_pre)
    test_price = yy_scale.inverse_transform(y_test)
    diff_price = pre_price - test_price
    # print('pre_price.shape: ', pre_price.shape)
    # print('test_price.shape: ', test_price.shape)
    # print('diff_price.shape: ', diff_price.shape)
    # print('pre_price[:3]: ', pre_price[:3])
    return pre_price, test_price, diff_price

In [None]:
# 畫圖
def DrawingPlot(pre_price, test_price, pic_days):
    plt.figure(figsize=(15,5))
    plt.plot(test_price[-pic_days:], label='Real Price')
    plt.plot(pre_price[-pic_days:], label='Predict Price')
    # plt.bar(np.arange(pic_days), diff_price[-pic_days:].reshape(-1), color='r', label='Diff', align='center')
    # plt.plot(diff_price[-pic_days:], color='r', label='Diff')
    plt.xlabel('day')   
    plt.ylabel('price')
    plt.title(crop_dict[crop_no][0] + ' predict D' + str(futureDay) + ' price')
    # plt.title(f'model-no.{model_no}_{model_version}_{pastDay}days for D{futureDay}')
    plt.legend()
    if save_google: plt.savefig(save_google_dir + 'img/' + f'{crop_dict[crop_no][0]}_{time_now}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}' + '.png')
    # if save_google: plt.savefig(save_google_dir + 'img/' + time_now + '_D' + str(futureDay) + '.png')
    # plt.savefig(f'{save_google_dir}img/{model_version}_{time_now}.png')  
    print('pic_saved')
    plt.show()
# f'{crop_dict[crop_no][0]}_{time_now}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}.h5'
# f'{crop_dict[crop_no][0]}_{time_now}_P{pastDay}F{futureDay}_RMSE={int(round(RMSE, 0))}'

In [None]:
# 計分
def score_cal(pre_price):
    MSE = mean_squared_error(test_price.reshape(-1, 1), pre_price.reshape(-1, 1))
    RMSE = np.sqrt(MSE)
    MAE = mean_absolute_error(test_price.reshape(-1, 1), pre_price.reshape(-1, 1))
    R2 = r2_score(test_price.reshape(-1, 1), pre_price.reshape(-1, 1))
    print(f"MSE value : {MSE}", f"\nRMSE value : {RMSE}", f"\nMAE value : {MAE}", f"\nR2 score value : {R2}")
    return MSE, RMSE, MAE, R2

儲存每一次試驗資訊

In [None]:
# 記錄結果
result_column_lists = ['time_now', 'crop_name', 'market_name', 'add_weather_data', 'add_typhoon_data', 'train_start_date', 
              'x_train.shape', 'x_test.shape', 
              'model_no', 'model_name', 'pastday', 'futureDay', 'batch_size', 'epochs', 'validation_split', 'patience', 
              'predDay', 'MSE', 'RMSE', 'MAE', 'R2', 'weather_drop_columns', 'city_drop_list', 'market_drop_columns', 'dev_notes', 'LSTM_unit_1', 'LSTM_unit_2', 'repeat_train', 
              'ohe', 'price_na_del']

# def saveResult(path, preDay, real_value, pred_value):
#   with open(path, 'a', newline='', encoding='utf-8') as f:
#     result_writer = csv.writer(f)
#     if f.tell()==0: result_writer.writerow(result_column_lists)
#     for i in range(preDay):
#       # i + 1 = 未來1日的價格
#       Dday = i + 1
#       # real_price = real_value[Dday:]
#       # pred_price = pred_value[:-Dday,i]

#       # MSE = mean_squared_error(real_price, pred_price)
#       # RMSE = np.sqrt(MSE)
#       # R2 = r2_score(real_price, pred_price)
      
#       result_lists = [datetime_now, crop_dict[crop_no][1], market_dict[market_no], add_weather_data, add_typhoon_data, train_start_date, model_no, model_dict[model_no][1], pastDay, futureDay, batch_size, epochs, validation_split, Dday, MSE, RMSE, MAE, R2]
#       result_writer.writerow(result_lists)

def saveResultOne(path, preDay):
    with open(path, 'a', newline='', encoding='utf-8') as f:
        result_writer = csv.writer(f)
        if f.tell()==0: result_writer.writerow(result_column_lists)
        # real_price = real_value
        # pred_price = pred_value
        # MSE = mean_squared_error(real_price, pred_price)
        # RMSE = np.sqrt(MSE)
        # R2 = r2_score(real_price, pred_price)
    
        result_lists = [time_now, crop_dict[crop_no][1], market_dict[market_no], add_weather_data, add_typhoon_data, train_start_date, 
                    x_train.shape, x_test.shape, 
                    model_no, model_dict[model_no][1], pastDay, futureDay, batch_size, epochs, validation_split, patience,
                    preDay, MSE, RMSE, MAE, R2, weather_drop_columns, city_drop_list, market_drop_columns, dev_notes, LSTM_unit_1, LSTM_unit_2, f'repeat={repeat_train}', 
                    ohe, price_na_del]
        result_writer.writerow(result_lists)      


# 執行

In [None]:
if model_no == 3 and repeat_train == True:
    model_repeat1.compile(loss="mse", optimizer="adam", metrics=["mse"])
    train_history1 = model_repeat1.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])
    model_repeat2.compile(loss="mse", optimizer="adam", metrics=["mse"])
    train_history2 = model_repeat2.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])
    model_repeat3.compile(loss="mse", optimizer="adam", metrics=["mse"])
    train_history3 = model_repeat3.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])
    model_repeat4.compile(loss="mse", optimizer="adam", metrics=["mse"])
    train_history4 = model_repeat4.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])
    model_repeat5.compile(loss="mse", optimizer="adam", metrics=["mse"])
    train_history5 = model_repeat5.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])
else:
    train_history1 =  model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])


In [None]:
# # 模型3-2
# model_repeat2 = Sequential()
# model_repeat2.add(LSTM(LSTM_unit_1, return_sequences=False, input_shape=(x_train.shape[1], x_train.shape[2])))
# model_repeat2.add(Dense(1, activation='sigmoid'))
# model_repeat2.compile(loss="mse", optimizer="adam")
# model_repeat2.summary()
# train_history2 =  model_repeat2.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])


In [None]:
# # 模型3-3
# model_repeat3 = Sequential()
# model_repeat3.add(LSTM(LSTM_unit_1, return_sequences=False, input_shape=(x_train.shape[1], x_train.shape[2])))
# model_repeat3.add(Dense(1, activation='sigmoid'))
# model_repeat3.compile(loss="mse", optimizer="adam")
# model_repeat3.summary()
# train_history3 =  model_repeat3.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])


In [None]:
# # 模型3-4
# model_repeat4 = Sequential()
# model_repeat4.add(LSTM(LSTM_unit_1, return_sequences=False, input_shape=(x_train.shape[1], x_train.shape[2])))
# model_repeat4.add(Dense(1, activation='sigmoid'))
# model_repeat4.compile(loss="mse", optimizer="adam")
# model_repeat4.summary()
# train_history4 =  model_repeat4.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])


In [None]:
# # 模型3-5
# model_repeat5 = Sequential()
# model_repeat5.add(LSTM(LSTM_unit_1, return_sequences=False, input_shape=(x_train.shape[1], x_train.shape[2])))
# model_repeat5.add(Dense(1, activation='sigmoid'))
# model_repeat5.compile(loss="mse", optimizer="adam")
# model_repeat5.summary()
# train_history5 =  model_repeat5.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=[callback])


In [None]:
# 執行模型，預測價格
if model_no == 3 and repeat_train == True:
    y_pre1 = model_cal(model_repeat1)
    y_pre2 = model_cal(model_repeat2)
    y_pre3 = model_cal(model_repeat3)
    y_pre4 = model_cal(model_repeat4)
    y_pre5 = model_cal(model_repeat5)
else:
    y_pre = model_cal(model) 
  
# MSE, RMSE, MAE, R2 = score_cal()

In [None]:
# 計算分數
if model_no == 3 and repeat_train == True:
    # 建立各模型的轉換價格，並計算分數
    pre_price1, test_price, diff_price1 = price_inverse(y_pre1)
    pre_price2, test_price, diff_price2 = price_inverse(y_pre2)
    pre_price3, test_price, diff_price3 = price_inverse(y_pre3)
    pre_price4, test_price, diff_price4 = price_inverse(y_pre4)
    pre_price5, test_price, diff_price5 = price_inverse(y_pre5)
    print('*'*10 + '1' +'*'*10)
    MSE_1, RMSE_1, MAE_1, R2_1 = score_cal(pre_price1)
    print('*'*10 + '2' +'*'*10)
    MSE_2, RMSE_2, MAE_2, R2_2 = score_cal(pre_price2)
    print('*'*10 + '3' +'*'*10)
    MSE_3, RMSE_3, MAE_3, R2_3 = score_cal(pre_price3)
    print('*'*10 + '4' +'*'*10)
    MSE_4, RMSE_4, MAE_4, R2_4 = score_cal(pre_price4)
    print('*'*10 + '5' +'*'*10)
    MSE_5, RMSE_5, MAE_5, R2_5 = score_cal(pre_price5)
    pre_price_list = [pre_price1, pre_price2, pre_price3, pre_price4, pre_price5 ]
    # # ensemble轉換價格，並計算分數
    # ensemble_pre_price = (pre_price1 + pre_price2 + pre_price3 + pre_price4 + pre_price5)/5
    # MSE_esb, RMSE_esb, MAE_esb, R2_esb = score_cal(ensemble_pre_price)

    # 建立RMSE字典， 並由之找出最小值作為最終模型
    # RMSE_min = min([RMSE_1, RMSE_2, RMSE_3, RMSE_4, RMSE_5, RMSE_esb])
    RMSE_min = min([RMSE_1, RMSE_2, RMSE_3, RMSE_4, RMSE_5])
    RMSE_dict = {
        RMSE_1:[score_cal(pre_price1), model_repeat1, 1], 
        RMSE_2:[score_cal(pre_price2), model_repeat2, 2], 
        RMSE_3:[score_cal(pre_price3), model_repeat3, 3], 
        RMSE_4:[score_cal(pre_price4), model_repeat4, 4],
        RMSE_5:[score_cal(pre_price5), model_repeat5, 5],
        # RMSE_esb:[score_cal(ensemble_pre_price), [model_repeat1, 
        #                        model_repeat2,
        #                        model_repeat3,
        #                        model_repeat4,
        #                        model_repeat5], 6], 
    }

    # 確認1-5中最終版之MSE, RMSE, MAE, R2, pre_price
    MSE, RMSE, MAE, R2 = RMSE_dict[RMSE_min][0]
    pre_price = pre_price_list[RMSE_dict[RMSE_min][2]-1]
else:
    # 轉換價格，並計算分數
    pre_price, test_price, diff_price = price_inverse(y_pre)
    MSE, RMSE, MAE, R2 = score_cal(pre_price)

print(MSE, RMSE, MAE, R2)

# 新增區段

In [None]:
# h5_list = ['banana_prediction_model_v2_s2_0728-13:15.h5']
# for h5_name in h5_list: 
#     model_load(h5_name) 

# y_pre = model_cal()
# pre_price, test_price, diff_price = price_inverse()
# MSE, RMSE, MAE, R2 = score_cal()
# clear_session()

In [None]:
# 統計數值建立
asd = list(df['Avg_price'].values)
print(np.mean(asd))
print(np.median(asd))
print(np.percentile(asd, [25, 50, 75]))
print(MAE/np.mean(asd))
MAE/np.percentile(asd, [25, 50, 75])
np.std(asd, ddof=1)

In [None]:
# 印出第一個模型
plt.title('Loss Graph')
plt.plot(train_history1.history['loss'], 'blue', label='loss')
plt.plot(train_history1.history['val_loss'], 'red', label='Validation loss')
plt.legend(loc="upper left")

In [None]:
# save the result
is_output_one = model_dict[model_no][3]
if is_output_one:
    saveResultOne('result.csv', futureDay)
    if save_google: saveResultOne(save_google_dir + 'result.csv', futureDay)
# else: 
#     saveResult('result.csv', futureDay, rp, pp)
#     if save_google: saveResult(save_google_dir + 'result.csv', futureDay, rp, pp)

if model_no ==3:
    model_save(RMSE_dict[RMSE_min][1])
else:
    model_save(model)
scaler_save()

In [None]:
DrawingPlot(pre_price, test_price, pic_days)


https://machinelearningmastery.com/reshape-input-data-long-short-term-memory-networks-keras/ \\
The input to every LSTM layer must be three-dimensional.

The three dimensions of this input are:

1. Samples. One sequence is one sample. A batch is comprised of one or more samples.
2. Time Steps. One time step is one point of observation in the sample.
3. Features. One feature is one observation at a time step.