In [27]:
import pandas as pd
import numpy as np
import os
import calendar
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
import optuna
import torch
from optuna.trial import TrialState
from torch.utils.data import Dataset, DataLoader, TensorDataset,Subset
from captum.attr import IntegratedGradients
from captum.attr import visualization as viz
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = "plotly_white"
from sklearn.linear_model import LinearRegression
import seaborn as sns
plot_template = dict(
    layout=go.Layout({
        "font_size": 18,
        "xaxis_title_font_size": 24,
        "yaxis_title_font_size": 24})
)

from Dataset import SimulatedSequenceDataset
from LSTMRegressor import LSTMRegressor,save_model,load_model
from LSTMRegressor import DEVICE
from LSTMRegressor import train_model
from LSTMRegressor import test_model
from LSTMRegressor import predict
from LSTMRegressor import plot_train_val_loss_cv
from LSTMRegressor import save_best_trial
from LSTMRegressor import load_best_trial
from LSTMRegressor import Objective_CV,Objective
from metrics import NSE,NNSE,RMSE,NRMSE,ZNRMSE,MAE,PPTS,LPTS,MAPE,R2
from sklearn.metrics import r2_score

hydro_stations = [
    'Tangnaihai',
    'Guide',
    'Xunhua'
]

hydrostation_abbrs = {'Tangnaihai':'TNH','Guide':'GD','Xunhua':'XH'}
hydrostation_channel = {'Tangnaihai':'3','Guide':'14','Xunhua':'9'}
# read observed climate data
hydrostation_metestations = {
    'Tangnaihai': [
        '玛多', '达日', '久治', '红原', '若尔盖', '玛曲', '玛沁', '河南', '兴海',
    ],
    'Guide': [
        '玛多', '达日', '久治', '红原', '若尔盖', '玛曲', '玛沁', '河南', '兴海',
        '贵南', '共和', '贵德',
    ],
    'Xunhua': [
        '玛多', '达日', '久治', '红原', '若尔盖', '玛曲', '玛沁', '河南', '兴海',
        '贵南', '共和', '贵德', '同仁',
    ]
}

In [28]:
selected_features = ['snomlt(mm)', 'eplant(mm)', 'surq_cha(mm)', 
                         'snofall(mm)', 'wet_oflo(mm)', 'wet_evap(mm)', 
                         'snopack(mm)', 'sw_change(mm)', 'MIN-TEM(C)','SWATPlusSimFlow','flow(m^3/s)']
start_year = 1972
pred_years = np.arange(2015,2020)
RETRAIN = True
name = 'InputOutputSamples_hydrosimyr_swatpsim'
batch_size = 32
ystar_col = "pred"
if not os.path.exists(sample_path):
    os.makedirs(sample_path)
for hydro_station in hydro_stations:
    sample_path = f'../samples/SequenceSamples/{hydro_station}/{name}/'
    if not os.path.exists(sample_path):
        os.makedirs(sample_path)
    result_path = f'../result/LSTMRegressorData/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    pred_df = pd.DataFrame()
    for pred_year in pred_years:
        model_path = f'../model/12MonthlyFlowProcessForecast_{hydro_station}/LSTMRegressorModel/{name}/pred{pred_year}/'
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        df = pd.read_csv(
            f'../samples/{name}/{hydro_station}_MeteAVGCalvalFeatureDataForML_PRED{pred_year}.csv',
            index_col=['date'], parse_dates=['date']
        )

        df = df[selected_features]

        # Check if 'ARIMASimFlow' exists in df columns
        if 'ARIMASimFlow' not in df.columns:
            print(f"Warning: ARIMASimFlow column not found in data for {hydro_station} station, year {pred_year}")
        
        target = 'flow(m^3/s)'
        features = list(df.columns.difference([target]))
        features = list(df.columns.copy())
        features.remove(target)
        cal = df.loc[:f'{pred_year-6}-12-31',:]
        test = df.loc[f'{pred_year-6}-01-01':f'{pred_year-1}-12-31',:]
        pre = df.loc[f'{pred_year-1}-01-01':f'{pred_year}-12-31',:]
        X_scaler = MinMaxScaler(feature_range=(0,1))
        Y_scaler = MinMaxScaler(feature_range=(0,1))
        X_scaler.fit(cal[features])
        Y_scaler.fit(cal[[target]])
        cal_X = X_scaler.transform(cal[features])
        test_X = X_scaler.transform(test[features])
        pre_X = X_scaler.transform(pre[features])
        cal_y = Y_scaler.transform(cal[[target]])
        test_y = Y_scaler.transform(test[[target]])
        pre_y = Y_scaler.transform(pre[[target]])
        cal = pd.concat([pd.DataFrame(cal_X,columns=features,index=cal.index),pd.DataFrame(cal_y,columns=[target],index=cal.index)],axis=1)
        test = pd.concat([pd.DataFrame(test_X,columns=features,index=test.index),pd.DataFrame(test_y,columns=[target],index=test.index)],axis=1)
        pre = pd.concat([pd.DataFrame(pre_X,columns=features,index=pre.index),pd.DataFrame(pre_y,columns=[target],index=pre.index)],axis=1)
        cal.to_csv(sample_path+f'cal_{start_year}_{pred_year-6}.csv')
        test.to_csv(sample_path+f'test_{pred_year-5}_{pred_year-1}.csv')
        pre.to_csv(sample_path+f'pre_{pred_year}.csv')

        sequence_length =12
        cal_dataset = SimulatedSequenceDataset(
            dataframe=cal.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        test_dataset = SimulatedSequenceDataset(
            dataframe=test.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )

        i=0
        X, y = cal_dataset[i]
        # print(X)
        # print(y)

        # print(cal_dataset.X.shape, cal_dataset.y.shape)
        # print(test_dataset.X.shape, test_dataset.y.shape)

        cal_index = cal.index[sequence_length:]
        test_index = test.index[sequence_length:]

        # print(len(cal_index)==cal_dataset.__len__())
        # print(len(test_index)==test_dataset.__len__())

       
        
        if not os.path.exists(model_path+'model.pickle'):
            objective = Objective(
                    train_dataset=cal_dataset,
                    val_dataset=test_dataset,
                    num_epoch=1000,# 1000
                    batch_size=batch_size,
                    shuffle=True,
                    model_path='./scheme/LSTMRegressor/',
            )
            study = optuna.create_study(
                    study_name='example-study',
                    direction='minimize',
                )
            study.optimize(objective, n_trials=100) #100

            pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
            complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])


            print("Study statistics: ")
            print("  Number of finished trials: ", len(study.trials))
            print("  Number of pruned trials: ", len(pruned_trials))
            print("  Number of complete trials: ", len(complete_trials))

            print("Best trial:")
            trial = study.best_trial

            save_best_trial(trial, model_path=model_path)

            save_model(trial, model_path=model_path)

            best_model_state = trial.user_attrs["best_model_state"]
            print("  Value: ", trial.value)
            print("  Params: ")
            for key, value in trial.params.items():
                print("    {}: {}".format(key, value))

            best_trial = load_best_trial(model_file=model_path+'best_trial.pickle')
            model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
            cal_loader = DataLoader(cal_dataset, batch_size=64, shuffle=False)
            test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

            cal_ = cal.loc[cal_index,[target]].copy()
            test_ = test.loc[test_index,[target]].copy()

            cal_[ystar_col] = predict(cal_loader, model).cpu().numpy()
            test_[ystar_col] = predict(test_loader, model).cpu().numpy()

            df_out = pd.concat((cal_, test_))[[target, ystar_col]]
            for c in df_out.columns:
                # df_out[c] = df_out[c] * target_std + target_mean
                df_out[c] = Y_scaler.inverse_transform(df_out[[c]])
            # print(df_out)
            df_out = df_out.rename(columns={target: 'obs'})
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            df_out.to_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv')
        else:
            df_out = pd.read_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv',index_col=['date'], parse_dates=['date'])
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            print(f'model already exists in {model_path}')

        # !Perform prediction
        pre = pd.read_csv(sample_path+f'pre_{pred_year}.csv',index_col=['date'], parse_dates=['date'])
        pre_dataset = SimulatedSequenceDataset(
            dataframe=pre.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        pre_loader = DataLoader(pre_dataset, batch_size=batch_size, shuffle=False)
        pre_index = pre.index[sequence_length:]
        pre_ = pre.loc[pre_index,[target]].copy()
        model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
        pre_[ystar_col] = predict(pre_loader, model).cpu().numpy()
        for c in pre_.columns:
            # df_out[c] = df_out[c] * target_std + target_mean
            pre_[c] = Y_scaler.inverse_transform(pre_[[c]])
        pred_df = pd.concat((pred_df, pre_), axis=0)
    pred_df = pred_df.rename(columns={'flow(m^3/s)': 'obs'})
    print(f'R2 for pred at {hydro_station}({pred_years[0]}-{pred_years[-1]})',r2_score(pred_df['obs'],pred_df['pred']))
    pred_df.to_csv(result_path+f'vif_lstm_pred_{pred_years[0]}_{pred_years[-1]}.csv')
    # Visualization of the flow(m^3/s) and VIF_LSTM of pred_df
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['obs'], mode='lines', name='obs'))
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['pred'], mode='lines', name='pred'))
    fig.show()


R2 for sim at Tangnaihai(1972-2014) 0.9742207343031564
model already exists in ../model/12MonthlyFlowProcessForecast_Tangnaihai/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2015/
R2 for sim at Tangnaihai(1972-2015) 0.8422679737404372
model already exists in ../model/12MonthlyFlowProcessForecast_Tangnaihai/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2016/
R2 for sim at Tangnaihai(1972-2016) 0.905596291249159
model already exists in ../model/12MonthlyFlowProcessForecast_Tangnaihai/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2017/
R2 for sim at Tangnaihai(1972-2017) 0.8815012237392648
model already exists in ../model/12MonthlyFlowProcessForecast_Tangnaihai/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2018/
R2 for sim at Tangnaihai(1972-2018) 0.7722030022279798
model already exists in ../model/12MonthlyFlowProcessForecast_Tangnaihai/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2019/
R2 for pred at Tangna

R2 for sim at Guide(1972-2014) 0.9698826027677683
model already exists in ../model/12MonthlyFlowProcessForecast_Guide/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2015/
R2 for sim at Guide(1972-2015) 0.9615940315245716
model already exists in ../model/12MonthlyFlowProcessForecast_Guide/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2016/
R2 for sim at Guide(1972-2016) 0.9727552161495243
model already exists in ../model/12MonthlyFlowProcessForecast_Guide/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2017/
R2 for sim at Guide(1972-2017) 0.920570431053021
model already exists in ../model/12MonthlyFlowProcessForecast_Guide/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2018/
R2 for sim at Guide(1972-2018) 0.9503388434880898
model already exists in ../model/12MonthlyFlowProcessForecast_Guide/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2019/
R2 for pred at Guide(2015-2019) 0.2774600346584769


R2 for sim at Xunhua(1972-2014) 0.8624612712345638
model already exists in ../model/12MonthlyFlowProcessForecast_Xunhua/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2015/
R2 for sim at Xunhua(1972-2015) 0.9744088740046443
model already exists in ../model/12MonthlyFlowProcessForecast_Xunhua/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2016/
R2 for sim at Xunhua(1972-2016) 0.949707611282172
model already exists in ../model/12MonthlyFlowProcessForecast_Xunhua/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2017/
R2 for sim at Xunhua(1972-2017) 0.7581770111156885
model already exists in ../model/12MonthlyFlowProcessForecast_Xunhua/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2018/
R2 for sim at Xunhua(1972-2018) 0.940988273148866
model already exists in ../model/12MonthlyFlowProcessForecast_Xunhua/LSTMRegressorModel/InputOutputSamples_hydrosimyr_swatpsim/pred2019/
R2 for pred at Xunhua(2015-2019) 0.1673283651923776


In [29]:
selected_features = ['P2020(mm)','MAX-TEM(C)','MIN-TEM(C)','flow(m^3/s)']
start_year = 1972
pred_years = np.arange(2015,2020)
RETRAIN = True
name = 'InputOutputSamples_hydrosimyr'
batch_size = 32
ystar_col = "pred"
if not os.path.exists(sample_path):
    os.makedirs(sample_path)
for hydro_station in hydro_stations:
    sample_path = f'../samples/SequenceSamples/{hydro_station}/{name}/'
    if not os.path.exists(sample_path):
        os.makedirs(sample_path)
    result_path = f'../result/LSTMRegressorData/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    pred_df = pd.DataFrame()
    for pred_year in pred_years:
        model_path = f'../model/12MonthlyFlowProcessForecast_{hydro_station}/LSTMRegressorModel/{name}/pred{pred_year}/'
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        df = pd.read_csv(
            f'../samples/{name}/{hydro_station}_MeteAVGCalvalFeatureDataForML_PRED{pred_year}.csv',
            index_col=['date'], parse_dates=['date']
        )

        df = df[selected_features]

        # Check if 'ARIMASimFlow' exists in df columns
        if 'ARIMASimFlow' not in df.columns:
            print(f"Warning: ARIMASimFlow column not found in data for {hydro_station} station, year {pred_year}")
        
        target = 'flow(m^3/s)'
        features = list(df.columns.difference([target]))
        features = list(df.columns.copy())
        features.remove(target)
        cal = df.loc[:f'{pred_year-6}-12-31',:]
        test = df.loc[f'{pred_year-6}-01-01':f'{pred_year-1}-12-31',:]
        pre = df.loc[f'{pred_year-1}-01-01':f'{pred_year}-12-31',:]
        X_scaler = MinMaxScaler(feature_range=(0,1))
        Y_scaler = MinMaxScaler(feature_range=(0,1))
        X_scaler.fit(cal[features])
        Y_scaler.fit(cal[[target]])
        cal_X = X_scaler.transform(cal[features])
        test_X = X_scaler.transform(test[features])
        pre_X = X_scaler.transform(pre[features])
        cal_y = Y_scaler.transform(cal[[target]])
        test_y = Y_scaler.transform(test[[target]])
        pre_y = Y_scaler.transform(pre[[target]])
        cal = pd.concat([pd.DataFrame(cal_X,columns=features,index=cal.index),pd.DataFrame(cal_y,columns=[target],index=cal.index)],axis=1)
        test = pd.concat([pd.DataFrame(test_X,columns=features,index=test.index),pd.DataFrame(test_y,columns=[target],index=test.index)],axis=1)
        pre = pd.concat([pd.DataFrame(pre_X,columns=features,index=pre.index),pd.DataFrame(pre_y,columns=[target],index=pre.index)],axis=1)
        cal.to_csv(sample_path+f'cal_{start_year}_{pred_year-6}.csv')
        test.to_csv(sample_path+f'test_{pred_year-5}_{pred_year-1}.csv')
        pre.to_csv(sample_path+f'pre_{pred_year}.csv')

        sequence_length =12
        cal_dataset = SimulatedSequenceDataset(
            dataframe=cal.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        test_dataset = SimulatedSequenceDataset(
            dataframe=test.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )

        i=0
        X, y = cal_dataset[i]
        # print(X)
        # print(y)

        # print(cal_dataset.X.shape, cal_dataset.y.shape)
        # print(test_dataset.X.shape, test_dataset.y.shape)

        cal_index = cal.index[sequence_length:]
        test_index = test.index[sequence_length:]

        # print(len(cal_index)==cal_dataset.__len__())
        # print(len(test_index)==test_dataset.__len__())

       
        
        if not os.path.exists(model_path+'model.pickle'):
            objective = Objective(
                    train_dataset=cal_dataset,
                    val_dataset=test_dataset,
                    num_epoch=1000,# 1000
                    batch_size=batch_size,
                    shuffle=True,
                    model_path='./scheme/LSTMRegressor/',
            )
            study = optuna.create_study(
                    study_name='example-study',
                    direction='minimize',
                )
            study.optimize(objective, n_trials=100) #100

            pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
            complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])


            print("Study statistics: ")
            print("  Number of finished trials: ", len(study.trials))
            print("  Number of pruned trials: ", len(pruned_trials))
            print("  Number of complete trials: ", len(complete_trials))

            print("Best trial:")
            trial = study.best_trial

            save_best_trial(trial, model_path=model_path)

            save_model(trial, model_path=model_path)

            best_model_state = trial.user_attrs["best_model_state"]
            print("  Value: ", trial.value)
            print("  Params: ")
            for key, value in trial.params.items():
                print("    {}: {}".format(key, value))

            best_trial = load_best_trial(model_file=model_path+'best_trial.pickle')
            model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
            cal_loader = DataLoader(cal_dataset, batch_size=64, shuffle=False)
            test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

            cal_ = cal.loc[cal_index,[target]].copy()
            test_ = test.loc[test_index,[target]].copy()

            cal_[ystar_col] = predict(cal_loader, model).cpu().numpy()
            test_[ystar_col] = predict(test_loader, model).cpu().numpy()

            df_out = pd.concat((cal_, test_))[[target, ystar_col]]
            for c in df_out.columns:
                # df_out[c] = df_out[c] * target_std + target_mean
                df_out[c] = Y_scaler.inverse_transform(df_out[[c]])
            # print(df_out)
            df_out = df_out.rename(columns={target: 'obs'})
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            df_out.to_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv')
        else:
            df_out = pd.read_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv',index_col=['date'], parse_dates=['date'])
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            print(f'model already exists in {model_path}')

        # !Perform prediction
        pre = pd.read_csv(sample_path+f'pre_{pred_year}.csv',index_col=['date'], parse_dates=['date'])
        pre_dataset = SimulatedSequenceDataset(
            dataframe=pre.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        pre_loader = DataLoader(pre_dataset, batch_size=batch_size, shuffle=False)
        pre_index = pre.index[sequence_length:]
        pre_ = pre.loc[pre_index,[target]].copy()
        model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
        pre_[ystar_col] = predict(pre_loader, model).cpu().numpy()
        for c in pre_.columns:
            # df_out[c] = df_out[c] * target_std + target_mean
            pre_[c] = Y_scaler.inverse_transform(pre_[[c]])
        pred_df = pd.concat((pred_df, pre_), axis=0)
    pred_df = pred_df.rename(columns={'flow(m^3/s)': 'obs'})
    print(f'R2 for pred at {hydro_station}({pred_years[0]}-{pred_years[-1]})',r2_score(pred_df['obs'],pred_df['pred']))
    pred_df.to_csv(result_path+f'vif_lstm_pred_{pred_years[0]}_{pred_years[-1]}.csv')
    # Visualization of the flow(m^3/s) and VIF_LSTM of pred_df
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['obs'], mode='lines', name='obs'))
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['pred'], mode='lines', name='pred'))
    fig.show()


[I 2024-10-29 12:14:41,249] A new study created in memory with name: example-study




[I 2024-10-29 12:15:35,343] Trial 0 finished with value: 0.002868501760531217 and parameters: {'num_layers': 3, 'hidden_size': 104, 'lstm_dropout': 0.24686982687638526, 'dense_dropout': 0.09843667407880913, 'learning_rate': 0.0010948524590535516}. Best is trial 0 with value: 0.002868501760531217.
[I 2024-10-29 12:16:24,158] Trial 1 finished with value: 0.007857411168515682 and parameters: {'num_layers': 2, 'hidden_size': 64, 'lstm_dropout': 0.14712357590201003, 'dense_dropout': 0.3181419695115645, 'learning_rate': 9.269539776984173e-05}. Best is trial 0 with value: 0.002868501760531217.
[I 2024-10-29 12:17:17,536] Trial 2 finished with value: 0.008487807121127844 and parameters: {'num_layers': 3, 'hidden_size': 88, 'lstm_dropout': 0.11508695043578998, 'dense_dropout': 0.29250456799692076, 'learning_rate': 1.607126384945447e-05}. Best is trial 0 with value: 0.002868501760531217.
[I 2024-10-29 12:18:09,844] Trial 3 finished with value: 0.0062603752594441175 and parameters: {'num_layers':

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  67
  Number of complete trials:  33
Best trial:
  Value:  0.001781786122592166
  Params: 
    num_layers: 1
    hidden_size: 96
    lstm_dropout: 0.034200067707317046
    dense_dropout: 0.06060369010853425
    learning_rate: 0.01610251074565405
R2 for sim at Tangnaihai(1972-2014) 0.9765327917124771


[I 2024-10-29 12:42:35,563] Trial 0 finished with value: 0.005818087141960859 and parameters: {'num_layers': 1, 'hidden_size': 104, 'lstm_dropout': 0.3929800216188265, 'dense_dropout': 0.2251619400195643, 'learning_rate': 1.4352688140048598e-05}. Best is trial 0 with value: 0.005818087141960859.
[I 2024-10-29 12:43:29,960] Trial 1 finished with value: 0.009717746637761593 and parameters: {'num_layers': 3, 'hidden_size': 56, 'lstm_dropout': 0.07227708039040975, 'dense_dropout': 0.4036788718346034, 'learning_rate': 0.029721163083579628}. Best is trial 0 with value: 0.005818087141960859.
[I 2024-10-29 12:44:16,631] Trial 2 finished with value: 0.010389550821855664 and parameters: {'num_layers': 1, 'hidden_size': 104, 'lstm_dropout': 0.1751789667861962, 'dense_dropout': 0.3677628987151054, 'learning_rate': 0.021420983611543182}. Best is trial 0 with value: 0.005818087141960859.
[I 2024-10-29 12:45:01,883] Trial 3 finished with value: 0.0017340044723823667 and parameters: {'num_layers': 1, 

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  69
  Number of complete trials:  31
Best trial:
  Value:  0.0015509831137023866
  Params: 
    num_layers: 2
    hidden_size: 64
    lstm_dropout: 0.2637510235281985
    dense_dropout: 0.039797036820399076
    learning_rate: 0.005842236882270005
R2 for sim at Tangnaihai(1972-2015) 0.9499199982410991


[I 2024-10-29 13:09:27,290] Trial 0 finished with value: 0.003276884206570685 and parameters: {'num_layers': 3, 'hidden_size': 48, 'lstm_dropout': 0.38954605853527186, 'dense_dropout': 0.21395604533701423, 'learning_rate': 0.0003598017087604249}. Best is trial 0 with value: 0.003276884206570685.
[I 2024-10-29 13:10:23,358] Trial 1 finished with value: 0.007523299194872379 and parameters: {'num_layers': 3, 'hidden_size': 32, 'lstm_dropout': 0.13358120723624933, 'dense_dropout': 0.292073502951336, 'learning_rate': 0.004655187222634587}. Best is trial 0 with value: 0.003276884206570685.
[I 2024-10-29 13:11:10,626] Trial 2 finished with value: 0.007377174217253923 and parameters: {'num_layers': 1, 'hidden_size': 24, 'lstm_dropout': 0.32701540057603945, 'dense_dropout': 0.42697555142808347, 'learning_rate': 0.036262863531733275}. Best is trial 0 with value: 0.003276884206570685.
[I 2024-10-29 13:12:06,045] Trial 3 finished with value: 0.0065423434134572744 and parameters: {'num_layers': 3, 

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  69
  Number of complete trials:  31
Best trial:
  Value:  0.001960186054930091
  Params: 
    num_layers: 1
    hidden_size: 56
    lstm_dropout: 0.38445872042379775
    dense_dropout: 0.0034122841683422722
    learning_rate: 0.0008353773076921347
R2 for sim at Tangnaihai(1972-2016) 0.9567461810482403


[I 2024-10-29 13:35:10,474] Trial 0 finished with value: 0.0029147498426027596 and parameters: {'num_layers': 2, 'hidden_size': 72, 'lstm_dropout': 0.3939070285596958, 'dense_dropout': 0.26450444484284574, 'learning_rate': 0.0001640261457534649}. Best is trial 0 with value: 0.0029147498426027596.
[I 2024-10-29 13:36:05,091] Trial 1 finished with value: 0.0021809545578435063 and parameters: {'num_layers': 3, 'hidden_size': 48, 'lstm_dropout': 0.41353692225554767, 'dense_dropout': 0.10594023868340618, 'learning_rate': 0.0011998152421985292}. Best is trial 1 with value: 0.0021809545578435063.
[I 2024-10-29 13:37:02,342] Trial 2 finished with value: 0.0017673852271400392 and parameters: {'num_layers': 3, 'hidden_size': 96, 'lstm_dropout': 0.19574717367512112, 'dense_dropout': 0.08330026478866026, 'learning_rate': 7.300381281249551e-05}. Best is trial 2 with value: 0.0017673852271400392.
[I 2024-10-29 13:37:51,477] Trial 3 finished with value: 0.004749504732899368 and parameters: {'num_laye

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  85
  Number of complete trials:  15
Best trial:
  Value:  0.00149074150249362
  Params: 
    num_layers: 3
    hidden_size: 128
    lstm_dropout: 0.19713154681244296
    dense_dropout: 0.10493194470159768
    learning_rate: 0.005105661824414191
R2 for sim at Tangnaihai(1972-2017) 0.8862909260943161


[I 2024-10-29 13:48:55,552] Trial 0 finished with value: 0.007357157301157713 and parameters: {'num_layers': 3, 'hidden_size': 48, 'lstm_dropout': 0.22430783207980853, 'dense_dropout': 0.4352767663287427, 'learning_rate': 0.0001559935045379112}. Best is trial 0 with value: 0.007357157301157713.
[I 2024-10-29 13:49:53,603] Trial 1 finished with value: 0.01229653088375926 and parameters: {'num_layers': 2, 'hidden_size': 128, 'lstm_dropout': 0.07101872295470402, 'dense_dropout': 0.0784611072069506, 'learning_rate': 0.02685127235614315}. Best is trial 0 with value: 0.007357157301157713.
[I 2024-10-29 13:50:42,994] Trial 2 finished with value: 0.004967261338606477 and parameters: {'num_layers': 1, 'hidden_size': 64, 'lstm_dropout': 0.11568472212051345, 'dense_dropout': 0.1725563891416902, 'learning_rate': 0.06988905237433796}. Best is trial 2 with value: 0.004967261338606477.
[I 2024-10-29 13:51:39,071] Trial 3 finished with value: 0.002910076407715678 and parameters: {'num_layers': 2, 'hid

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  85
  Number of complete trials:  15
Best trial:
  Value:  0.0018893461092375219
  Params: 
    num_layers: 1
    hidden_size: 88
    lstm_dropout: 0.04885289984663164
    dense_dropout: 0.04303928193938521
    learning_rate: 0.006887056648661242
R2 for sim at Tangnaihai(1972-2018) 0.9715981619988173
R2 for pred at Tangnaihai(2015-2019) 0.1703737718239443


[I 2024-10-29 14:01:36,816] A new study created in memory with name: example-study




[I 2024-10-29 14:02:20,692] Trial 0 finished with value: 0.005266785155981779 and parameters: {'num_layers': 1, 'hidden_size': 8, 'lstm_dropout': 0.36396321490913225, 'dense_dropout': 0.20093743637995404, 'learning_rate': 7.020866094976458e-05}. Best is trial 0 with value: 0.005266785155981779.
[I 2024-10-29 14:03:10,035] Trial 1 finished with value: 0.007949394173920155 and parameters: {'num_layers': 1, 'hidden_size': 80, 'lstm_dropout': 0.3815861444187482, 'dense_dropout': 0.26429244672340707, 'learning_rate': 0.04665051692586674}. Best is trial 0 with value: 0.005266785155981779.
[I 2024-10-29 14:04:04,839] Trial 2 finished with value: 0.03890648111701012 and parameters: {'num_layers': 3, 'hidden_size': 96, 'lstm_dropout': 0.13754060607262264, 'dense_dropout': 0.44380321866236744, 'learning_rate': 0.0772615891338594}. Best is trial 0 with value: 0.005266785155981779.
[I 2024-10-29 14:04:52,828] Trial 3 finished with value: 0.012931969482451677 and parameters: {'num_layers': 2, 'hidd

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  73
  Number of complete trials:  27
Best trial:
  Value:  0.002150496526155621
  Params: 
    num_layers: 1
    hidden_size: 96
    lstm_dropout: 0.0714837968455423
    dense_dropout: 0.007222531587844109
    learning_rate: 0.011785608779093168
R2 for sim at Guide(1972-2014) 0.9540315749756086


[I 2024-10-29 14:24:05,555] Trial 0 finished with value: 0.013597266748547554 and parameters: {'num_layers': 2, 'hidden_size': 120, 'lstm_dropout': 0.2015837450114895, 'dense_dropout': 0.4778017486978153, 'learning_rate': 0.00011293681125849496}. Best is trial 0 with value: 0.013597266748547554.
[I 2024-10-29 14:24:55,695] Trial 1 finished with value: 0.0033507838379591703 and parameters: {'num_layers': 2, 'hidden_size': 80, 'lstm_dropout': 0.3424186619560133, 'dense_dropout': 0.13416660613913417, 'learning_rate': 0.00013258470681462968}. Best is trial 1 with value: 0.0033507838379591703.
[I 2024-10-29 14:25:51,379] Trial 2 finished with value: 0.012529273284599185 and parameters: {'num_layers': 3, 'hidden_size': 96, 'lstm_dropout': 0.3111269962303139, 'dense_dropout': 0.4594638632335935, 'learning_rate': 3.211381909989367e-05}. Best is trial 1 with value: 0.0033507838379591703.
[I 2024-10-29 14:26:41,888] Trial 3 finished with value: 0.006029557669535279 and parameters: {'num_layers':

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  61
  Number of complete trials:  39
Best trial:
  Value:  0.0012747833388857543
  Params: 
    num_layers: 1
    hidden_size: 64
    lstm_dropout: 0.4323931922253557
    dense_dropout: 0.08996484402668149
    learning_rate: 0.007273997994916858
R2 for sim at Guide(1972-2015) 0.9734413568014075


[I 2024-10-29 14:54:46,855] Trial 0 finished with value: 0.005770710064098239 and parameters: {'num_layers': 2, 'hidden_size': 80, 'lstm_dropout': 0.4907416401432515, 'dense_dropout': 0.28526553548091177, 'learning_rate': 0.00016460385303922545}. Best is trial 0 with value: 0.005770710064098239.
[I 2024-10-29 14:55:24,370] Trial 1 finished with value: 0.002227436867542565 and parameters: {'num_layers': 2, 'hidden_size': 24, 'lstm_dropout': 0.34752256459914826, 'dense_dropout': 0.07206225019460427, 'learning_rate': 0.00043665578087738054}. Best is trial 1 with value: 0.002227436867542565.
[I 2024-10-29 14:55:56,339] Trial 2 finished with value: 0.0036543625174090266 and parameters: {'num_layers': 1, 'hidden_size': 128, 'lstm_dropout': 0.19547834066178554, 'dense_dropout': 0.2421419107879016, 'learning_rate': 0.012140219434220353}. Best is trial 1 with value: 0.002227436867542565.
[I 2024-10-29 14:56:29,407] Trial 3 finished with value: 0.009946288075298071 and parameters: {'num_layers':

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  73
  Number of complete trials:  27
Best trial:
  Value:  0.0021268746349960566
  Params: 
    num_layers: 1
    hidden_size: 24
    lstm_dropout: 0.4760293081426447
    dense_dropout: 0.11797202892673563
    learning_rate: 0.027769498326339334
R2 for sim at Guide(1972-2016) 0.9469694181946073


[I 2024-10-29 15:10:31,041] Trial 0 finished with value: 0.002843742026016116 and parameters: {'num_layers': 1, 'hidden_size': 96, 'lstm_dropout': 0.4172727452251453, 'dense_dropout': 0.08171934771287515, 'learning_rate': 8.63495335301633e-05}. Best is trial 0 with value: 0.002843742026016116.
[I 2024-10-29 15:11:04,366] Trial 1 finished with value: 0.003792222240008414 and parameters: {'num_layers': 1, 'hidden_size': 56, 'lstm_dropout': 0.3535677668222893, 'dense_dropout': 0.15971343909701174, 'learning_rate': 1.5441014424967348e-05}. Best is trial 0 with value: 0.002843742026016116.
[I 2024-10-29 15:11:45,979] Trial 2 finished with value: 0.002987931133247912 and parameters: {'num_layers': 3, 'hidden_size': 72, 'lstm_dropout': 0.4462024288622968, 'dense_dropout': 0.2852332555500555, 'learning_rate': 0.0019052828722774368}. Best is trial 0 with value: 0.002843742026016116.
[I 2024-10-29 15:12:19,667] Trial 3 finished with value: 0.003586297738365829 and parameters: {'num_layers': 1, '

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  76
  Number of complete trials:  24
Best trial:
  Value:  0.0015878991689532995
  Params: 
    num_layers: 2
    hidden_size: 56
    lstm_dropout: 0.10450057700716445
    dense_dropout: 0.1805796582964022
    learning_rate: 0.009886691057741377
R2 for sim at Guide(1972-2017) 0.9233731559823551


[I 2024-10-29 15:26:28,090] Trial 0 finished with value: 0.00332701881416142 and parameters: {'num_layers': 3, 'hidden_size': 32, 'lstm_dropout': 0.4632437145624613, 'dense_dropout': 0.09018751601665853, 'learning_rate': 0.0021483219742416382}. Best is trial 0 with value: 0.00332701881416142.
[I 2024-10-29 15:27:04,199] Trial 1 finished with value: 0.0018656241591088474 and parameters: {'num_layers': 1, 'hidden_size': 24, 'lstm_dropout': 0.18050866396023368, 'dense_dropout': 0.13792180275926863, 'learning_rate': 0.04424705751289487}. Best is trial 1 with value: 0.0018656241591088474.
[I 2024-10-29 15:27:43,142] Trial 2 finished with value: 0.0026142276474274695 and parameters: {'num_layers': 2, 'hidden_size': 72, 'lstm_dropout': 0.4198230848875198, 'dense_dropout': 0.22844115660027525, 'learning_rate': 0.0002853513091048859}. Best is trial 1 with value: 0.0018656241591088474.
[I 2024-10-29 15:28:27,459] Trial 3 finished with value: 0.004605644615367055 and parameters: {'num_layers': 3,

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  86
  Number of complete trials:  14
Best trial:
  Value:  0.0018656241591088474
  Params: 
    num_layers: 1
    hidden_size: 24
    lstm_dropout: 0.18050866396023368
    dense_dropout: 0.13792180275926863
    learning_rate: 0.04424705751289487
R2 for sim at Guide(1972-2018) 0.949414335154362
R2 for pred at Guide(2015-2019) 0.15173738162611217


[I 2024-10-29 15:35:07,184] A new study created in memory with name: example-study




[I 2024-10-29 15:35:45,481] Trial 0 finished with value: 0.016323301009833813 and parameters: {'num_layers': 3, 'hidden_size': 16, 'lstm_dropout': 0.283259121231428, 'dense_dropout': 0.3659266257180318, 'learning_rate': 1.2686641299201279e-05}. Best is trial 0 with value: 0.016323301009833813.
[I 2024-10-29 15:36:20,209] Trial 1 finished with value: 0.003434899146668613 and parameters: {'num_layers': 2, 'hidden_size': 16, 'lstm_dropout': 0.11007339301764363, 'dense_dropout': 0.11480187741057529, 'learning_rate': 0.00011205009574560522}. Best is trial 1 with value: 0.003434899146668613.
[I 2024-10-29 15:36:50,475] Trial 2 finished with value: 0.009108143392950296 and parameters: {'num_layers': 1, 'hidden_size': 104, 'lstm_dropout': 0.10716479291354541, 'dense_dropout': 0.3206678583488347, 'learning_rate': 3.954432491181072e-05}. Best is trial 1 with value: 0.003434899146668613.
[I 2024-10-29 15:37:22,577] Trial 3 finished with value: 0.013334531802684069 and parameters: {'num_layers': 1

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  53
  Number of complete trials:  47
Best trial:
  Value:  0.0018088806536979973
  Params: 
    num_layers: 1
    hidden_size: 96
    lstm_dropout: 0.042043672376161745
    dense_dropout: 0.05977629829449365
    learning_rate: 0.010835715692289251
R2 for sim at Xunhua(1972-2014) 0.9516060777260171


[I 2024-10-29 16:03:22,979] A new study created in memory with name: example-study




[I 2024-10-29 16:04:14,329] Trial 0 finished with value: 0.004490263003390282 and parameters: {'num_layers': 3, 'hidden_size': 104, 'lstm_dropout': 0.11449761408572073, 'dense_dropout': 0.208566465851187, 'learning_rate': 0.010059334711611774}. Best is trial 0 with value: 0.004490263003390282.
[I 2024-10-29 16:05:03,368] Trial 1 finished with value: 0.0028154636966064572 and parameters: {'num_layers': 3, 'hidden_size': 80, 'lstm_dropout': 0.3979117133842245, 'dense_dropout': 0.046959881623966415, 'learning_rate': 0.0005238926714974306}. Best is trial 1 with value: 0.0028154636966064572.
[I 2024-10-29 16:05:55,389] Trial 2 finished with value: 0.011720804497599602 and parameters: {'num_layers': 3, 'hidden_size': 40, 'lstm_dropout': 0.13878297685645619, 'dense_dropout': 0.4817538839303561, 'learning_rate': 0.00016477831377701481}. Best is trial 1 with value: 0.0028154636966064572.
[I 2024-10-29 16:06:32,800] Trial 3 finished with value: 0.013074504677206278 and parameters: {'num_layers':

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  67
  Number of complete trials:  33
Best trial:
  Value:  0.001051256462233141
  Params: 
    num_layers: 1
    hidden_size: 96
    lstm_dropout: 0.33871465152957664
    dense_dropout: 0.03359366670811949
    learning_rate: 0.008336044667732264
R2 for sim at Xunhua(1972-2015) 0.9914602492018506


[I 2024-10-29 16:27:45,446] Trial 0 finished with value: 0.00426440266892314 and parameters: {'num_layers': 1, 'hidden_size': 16, 'lstm_dropout': 0.3927079267761597, 'dense_dropout': 0.04283270681005463, 'learning_rate': 0.0001001714630368836}. Best is trial 0 with value: 0.00426440266892314.
[I 2024-10-29 16:28:32,389] Trial 1 finished with value: 0.007515834178775549 and parameters: {'num_layers': 2, 'hidden_size': 40, 'lstm_dropout': 0.16089977234280428, 'dense_dropout': 0.3992980844147778, 'learning_rate': 0.00014921821390825676}. Best is trial 0 with value: 0.00426440266892314.
[I 2024-10-29 16:29:15,267] Trial 2 finished with value: 0.004418775090016425 and parameters: {'num_layers': 1, 'hidden_size': 96, 'lstm_dropout': 0.2059868874584349, 'dense_dropout': 0.08206461890573896, 'learning_rate': 0.00012051549436194005}. Best is trial 0 with value: 0.00426440266892314.
[I 2024-10-29 16:29:57,988] Trial 3 finished with value: 0.005658412119373679 and parameters: {'num_layers': 1, 'h

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  65
  Number of complete trials:  35
Best trial:
  Value:  0.002300985681358725
  Params: 
    num_layers: 1
    hidden_size: 24
    lstm_dropout: 0.13120629161417807
    dense_dropout: 0.09060595907531592
    learning_rate: 0.015132900391851245
R2 for sim at Xunhua(1972-2016) 0.9688751676698253


[I 2024-10-29 16:55:09,339] Trial 0 finished with value: 0.004443456535227597 and parameters: {'num_layers': 2, 'hidden_size': 104, 'lstm_dropout': 0.4304528192995433, 'dense_dropout': 0.2145202641673672, 'learning_rate': 0.04548005556683225}. Best is trial 0 with value: 0.004443456535227597.
[I 2024-10-29 16:56:05,028] Trial 1 finished with value: 0.014211467001587152 and parameters: {'num_layers': 3, 'hidden_size': 16, 'lstm_dropout': 0.26601314767425127, 'dense_dropout': 0.31776717578043806, 'learning_rate': 0.08182706243987062}. Best is trial 0 with value: 0.004443456535227597.
[I 2024-10-29 16:56:55,541] Trial 2 finished with value: 0.006931096315383911 and parameters: {'num_layers': 3, 'hidden_size': 120, 'lstm_dropout': 0.2650865405634053, 'dense_dropout': 0.4603634295140256, 'learning_rate': 0.000300929729989736}. Best is trial 0 with value: 0.004443456535227597.
[I 2024-10-29 16:57:49,220] Trial 3 finished with value: 0.008127463981509209 and parameters: {'num_layers': 3, 'hid

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  69
  Number of complete trials:  31
Best trial:
  Value:  0.0017146918107755482
  Params: 
    num_layers: 1
    hidden_size: 112
    lstm_dropout: 0.44909897977233487
    dense_dropout: 0.14687893693953827
    learning_rate: 0.012572113657889612
R2 for sim at Xunhua(1972-2017) 0.9538845882290177


[I 2024-10-29 17:18:14,247] Trial 0 finished with value: 0.004815324558876455 and parameters: {'num_layers': 2, 'hidden_size': 88, 'lstm_dropout': 0.2278988286119426, 'dense_dropout': 0.3450567889412119, 'learning_rate': 0.0476743657117066}. Best is trial 0 with value: 0.004815324558876455.
[I 2024-10-29 17:18:58,069] Trial 1 finished with value: 0.005404074210673571 and parameters: {'num_layers': 2, 'hidden_size': 80, 'lstm_dropout': 0.20267760106223393, 'dense_dropout': 0.013904744177716089, 'learning_rate': 5.173767723929824e-05}. Best is trial 0 with value: 0.004815324558876455.
[I 2024-10-29 17:19:48,609] Trial 2 finished with value: 0.005569370114244521 and parameters: {'num_layers': 3, 'hidden_size': 104, 'lstm_dropout': 0.2549482368858819, 'dense_dropout': 0.3144876668370799, 'learning_rate': 1.909282043858444e-05}. Best is trial 0 with value: 0.004815324558876455.
[I 2024-10-29 17:20:35,095] Trial 3 finished with value: 0.005210581934079528 and parameters: {'num_layers': 2, 'h

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  70
  Number of complete trials:  30
Best trial:
  Value:  0.0027120799059048295
  Params: 
    num_layers: 1
    hidden_size: 40
    lstm_dropout: 0.46060518971450587
    dense_dropout: 0.23992478496818537
    learning_rate: 0.0015863830382434499
R2 for sim at Xunhua(1972-2018) 0.8902277837139141
R2 for pred at Xunhua(2015-2019) -0.013836092512255593


In [30]:
selected_features = ['snomlt(mm)', 'eplant(mm)', 'surq_cha(mm)', 
                         'snofall(mm)', 'wet_oflo(mm)', 'wet_evap(mm)', 
                         'snopack(mm)', 'sw_change(mm)', 'MIN-TEM(C)','SWATPlusSimFlow','flow(m^3/s)']
start_year = 1972
pred_years = np.arange(2015,2020)
RETRAIN = True
name = 'InputOutputSamples_metesimyr_swatpsim'
batch_size = 32
ystar_col = "pred"
if not os.path.exists(sample_path):
    os.makedirs(sample_path)
for hydro_station in hydro_stations:
    sample_path = f'../samples/SequenceSamples/{hydro_station}/{name}/'
    if not os.path.exists(sample_path):
        os.makedirs(sample_path)
    result_path = f'../result/LSTMRegressorData/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    pred_df = pd.DataFrame()
    for pred_year in pred_years:
        model_path = f'../model/12MonthlyFlowProcessForecast_{hydro_station}/LSTMRegressorModel/{name}/pred{pred_year}/'
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        df = pd.read_csv(
            f'../samples/{name}/{hydro_station}_MeteAVGCalvalFeatureDataForML_PRED{pred_year}.csv',
            index_col=['date'], parse_dates=['date']
        )

        df = df[selected_features]

        # Check if 'ARIMASimFlow' exists in df columns
        if 'ARIMASimFlow' not in df.columns:
            print(f"Warning: ARIMASimFlow column not found in data for {hydro_station} station, year {pred_year}")
        
        target = 'flow(m^3/s)'
        features = list(df.columns.difference([target]))
        features = list(df.columns.copy())
        features.remove(target)
        cal = df.loc[:f'{pred_year-6}-12-31',:]
        test = df.loc[f'{pred_year-6}-01-01':f'{pred_year-1}-12-31',:]
        pre = df.loc[f'{pred_year-1}-01-01':f'{pred_year}-12-31',:]
        X_scaler = MinMaxScaler(feature_range=(0,1))
        Y_scaler = MinMaxScaler(feature_range=(0,1))
        X_scaler.fit(cal[features])
        Y_scaler.fit(cal[[target]])
        cal_X = X_scaler.transform(cal[features])
        test_X = X_scaler.transform(test[features])
        pre_X = X_scaler.transform(pre[features])
        cal_y = Y_scaler.transform(cal[[target]])
        test_y = Y_scaler.transform(test[[target]])
        pre_y = Y_scaler.transform(pre[[target]])
        cal = pd.concat([pd.DataFrame(cal_X,columns=features,index=cal.index),pd.DataFrame(cal_y,columns=[target],index=cal.index)],axis=1)
        test = pd.concat([pd.DataFrame(test_X,columns=features,index=test.index),pd.DataFrame(test_y,columns=[target],index=test.index)],axis=1)
        pre = pd.concat([pd.DataFrame(pre_X,columns=features,index=pre.index),pd.DataFrame(pre_y,columns=[target],index=pre.index)],axis=1)
        cal.to_csv(sample_path+f'cal_{start_year}_{pred_year-6}.csv')
        test.to_csv(sample_path+f'test_{pred_year-5}_{pred_year-1}.csv')
        pre.to_csv(sample_path+f'pre_{pred_year}.csv')

        sequence_length =12
        cal_dataset = SimulatedSequenceDataset(
            dataframe=cal.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        test_dataset = SimulatedSequenceDataset(
            dataframe=test.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )

        i=0
        X, y = cal_dataset[i]
        # print(X)
        # print(y)

        # print(cal_dataset.X.shape, cal_dataset.y.shape)
        # print(test_dataset.X.shape, test_dataset.y.shape)

        cal_index = cal.index[sequence_length:]
        test_index = test.index[sequence_length:]

        # print(len(cal_index)==cal_dataset.__len__())
        # print(len(test_index)==test_dataset.__len__())

       
        
        if not os.path.exists(model_path+'model.pickle'):
            objective = Objective(
                    train_dataset=cal_dataset,
                    val_dataset=test_dataset,
                    num_epoch=1000,# 1000
                    batch_size=batch_size,
                    shuffle=True,
                    model_path='./scheme/LSTMRegressor/',
            )
            study = optuna.create_study(
                    study_name='example-study',
                    direction='minimize',
                )
            study.optimize(objective, n_trials=100) #100

            pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
            complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])


            print("Study statistics: ")
            print("  Number of finished trials: ", len(study.trials))
            print("  Number of pruned trials: ", len(pruned_trials))
            print("  Number of complete trials: ", len(complete_trials))

            print("Best trial:")
            trial = study.best_trial

            save_best_trial(trial, model_path=model_path)

            save_model(trial, model_path=model_path)

            best_model_state = trial.user_attrs["best_model_state"]
            print("  Value: ", trial.value)
            print("  Params: ")
            for key, value in trial.params.items():
                print("    {}: {}".format(key, value))

            best_trial = load_best_trial(model_file=model_path+'best_trial.pickle')
            model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
            cal_loader = DataLoader(cal_dataset, batch_size=64, shuffle=False)
            test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

            cal_ = cal.loc[cal_index,[target]].copy()
            test_ = test.loc[test_index,[target]].copy()

            cal_[ystar_col] = predict(cal_loader, model).cpu().numpy()
            test_[ystar_col] = predict(test_loader, model).cpu().numpy()

            df_out = pd.concat((cal_, test_))[[target, ystar_col]]
            for c in df_out.columns:
                # df_out[c] = df_out[c] * target_std + target_mean
                df_out[c] = Y_scaler.inverse_transform(df_out[[c]])
            # print(df_out)
            df_out = df_out.rename(columns={target: 'obs'})
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            df_out.to_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv')
        else:
            df_out = pd.read_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv',index_col=['date'], parse_dates=['date'])
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            print(f'model already exists in {model_path}')

        # !Perform prediction
        pre = pd.read_csv(sample_path+f'pre_{pred_year}.csv',index_col=['date'], parse_dates=['date'])
        pre_dataset = SimulatedSequenceDataset(
            dataframe=pre.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        pre_loader = DataLoader(pre_dataset, batch_size=batch_size, shuffle=False)
        pre_index = pre.index[sequence_length:]
        pre_ = pre.loc[pre_index,[target]].copy()
        model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
        pre_[ystar_col] = predict(pre_loader, model).cpu().numpy()
        for c in pre_.columns:
            # df_out[c] = df_out[c] * target_std + target_mean
            pre_[c] = Y_scaler.inverse_transform(pre_[[c]])
        pred_df = pd.concat((pred_df, pre_), axis=0)
    pred_df = pred_df.rename(columns={'flow(m^3/s)': 'obs'})
    print(f'R2 for pred at {hydro_station}({pred_years[0]}-{pred_years[-1]})',r2_score(pred_df['obs'],pred_df['pred']))
    pred_df.to_csv(result_path+f'vif_lstm_pred_{pred_years[0]}_{pred_years[-1]}.csv')
    # Visualization of the flow(m^3/s) and VIF_LSTM of pred_df
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['obs'], mode='lines', name='obs'))
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['pred'], mode='lines', name='pred'))
    fig.show()


[I 2024-10-29 17:41:36,975] A new study created in memory with name: example-study




[I 2024-10-29 17:42:17,025] Trial 0 finished with value: 0.008319276617839932 and parameters: {'num_layers': 1, 'hidden_size': 128, 'lstm_dropout': 0.11967174195172825, 'dense_dropout': 0.25247167430523765, 'learning_rate': 0.0001894477963797847}. Best is trial 0 with value: 0.008319276617839932.
[I 2024-10-29 17:43:06,325] Trial 1 finished with value: 0.011516637168824673 and parameters: {'num_layers': 3, 'hidden_size': 32, 'lstm_dropout': 0.21537673519452372, 'dense_dropout': 0.3632978498483674, 'learning_rate': 0.016303129898019127}. Best is trial 0 with value: 0.008319276617839932.
[I 2024-10-29 17:43:50,573] Trial 2 finished with value: 0.014770777197554708 and parameters: {'num_layers': 2, 'hidden_size': 40, 'lstm_dropout': 0.26308433057057995, 'dense_dropout': 0.4151789716609489, 'learning_rate': 0.00010304838630040942}. Best is trial 0 with value: 0.008319276617839932.
[I 2024-10-29 17:44:30,490] Trial 3 finished with value: 0.011670557782053947 and parameters: {'num_layers': 1

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  75
  Number of complete trials:  25
Best trial:
  Value:  0.005909296218305826
  Params: 
    num_layers: 1
    hidden_size: 112
    lstm_dropout: 0.29559666891583236
    dense_dropout: 0.055581993446474375
    learning_rate: 0.007514444392600025
R2 for sim at Tangnaihai(1972-2014) 0.9471993783320701


[I 2024-10-29 17:59:08,541] Trial 0 finished with value: 0.01172504760324955 and parameters: {'num_layers': 1, 'hidden_size': 32, 'lstm_dropout': 0.2718539858561464, 'dense_dropout': 0.39414606662564733, 'learning_rate': 0.0010899639874682062}. Best is trial 0 with value: 0.01172504760324955.
[I 2024-10-29 17:59:49,334] Trial 1 finished with value: 0.01397743879351765 and parameters: {'num_layers': 1, 'hidden_size': 112, 'lstm_dropout': 0.4099643186166765, 'dense_dropout': 0.3350324450451822, 'learning_rate': 0.04029465374168289}. Best is trial 0 with value: 0.01172504760324955.
[I 2024-10-29 18:00:30,175] Trial 2 finished with value: 0.008026271825656295 and parameters: {'num_layers': 1, 'hidden_size': 56, 'lstm_dropout': 0.2698297516733577, 'dense_dropout': 0.25011877575027885, 'learning_rate': 0.00011334692669774551}. Best is trial 2 with value: 0.008026271825656295.
[I 2024-10-29 18:01:11,984] Trial 3 finished with value: 0.009168017073534429 and parameters: {'num_layers': 1, 'hidd

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  71
  Number of complete trials:  29
Best trial:
  Value:  0.006433308124542236
  Params: 
    num_layers: 1
    hidden_size: 80
    lstm_dropout: 0.4399119955876998
    dense_dropout: 0.0665443239953629
    learning_rate: 0.01336747758565618
R2 for sim at Tangnaihai(1972-2015) 0.9102051021248346


[I 2024-10-29 18:18:39,769] Trial 0 finished with value: 0.00966782053001225 and parameters: {'num_layers': 1, 'hidden_size': 64, 'lstm_dropout': 0.48163586401139846, 'dense_dropout': 0.27359257525019803, 'learning_rate': 5.923949531296079e-05}. Best is trial 0 with value: 0.00966782053001225.
[I 2024-10-29 18:19:28,735] Trial 1 finished with value: 0.015951622277498245 and parameters: {'num_layers': 3, 'hidden_size': 40, 'lstm_dropout': 0.4534092484131659, 'dense_dropout': 0.4811857150020829, 'learning_rate': 1.9135424223508805e-05}. Best is trial 0 with value: 0.00966782053001225.
[I 2024-10-29 18:20:15,118] Trial 2 finished with value: 0.02321216417476535 and parameters: {'num_layers': 3, 'hidden_size': 112, 'lstm_dropout': 0.4145439928425078, 'dense_dropout': 0.4994501816276719, 'learning_rate': 0.0644430798598273}. Best is trial 0 with value: 0.00966782053001225.
[I 2024-10-29 18:20:52,846] Trial 3 finished with value: 0.014903034549206495 and parameters: {'num_layers': 1, 'hidden

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  64
  Number of complete trials:  36
Best trial:
  Value:  0.004062593798153102
  Params: 
    num_layers: 2
    hidden_size: 48
    lstm_dropout: 0.4109479510916762
    dense_dropout: 0.023698828979748218
    learning_rate: 0.007695017492496161
R2 for sim at Tangnaihai(1972-2016) 0.9750145435192072


[I 2024-10-29 18:43:33,196] Trial 0 finished with value: 0.012362377718091011 and parameters: {'num_layers': 3, 'hidden_size': 56, 'lstm_dropout': 0.2134985835005636, 'dense_dropout': 0.06054365248068777, 'learning_rate': 0.00047324820150459324}. Best is trial 0 with value: 0.012362377718091011.
[I 2024-10-29 18:44:14,558] Trial 1 finished with value: 0.004411802161484957 and parameters: {'num_layers': 3, 'hidden_size': 24, 'lstm_dropout': 0.012863860968478968, 'dense_dropout': 0.2979134701348625, 'learning_rate': 1.5132600802605149e-05}. Best is trial 1 with value: 0.004411802161484957.
[I 2024-10-29 18:44:50,938] Trial 2 finished with value: 0.005172033794224262 and parameters: {'num_layers': 2, 'hidden_size': 80, 'lstm_dropout': 0.012203432893925836, 'dense_dropout': 0.20051598327694325, 'learning_rate': 0.0029695771174927782}. Best is trial 1 with value: 0.004411802161484957.
[I 2024-10-29 18:45:28,818] Trial 3 finished with value: 0.006319723092019558 and parameters: {'num_layers'

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  77
  Number of complete trials:  23
Best trial:
  Value:  0.003532502567395568
  Params: 
    num_layers: 2
    hidden_size: 72
    lstm_dropout: 0.02742815975121216
    dense_dropout: 0.12883730544379382
    learning_rate: 0.003559584050477517
R2 for sim at Tangnaihai(1972-2017) 0.6871466732376064


[I 2024-10-29 18:57:27,886] Trial 0 finished with value: 0.007125865085981786 and parameters: {'num_layers': 1, 'hidden_size': 112, 'lstm_dropout': 0.09267909686024206, 'dense_dropout': 0.3621318536353469, 'learning_rate': 0.004234642601467342}. Best is trial 0 with value: 0.007125865085981786.
[I 2024-10-29 18:58:00,393] Trial 1 finished with value: 0.010490757413208485 and parameters: {'num_layers': 1, 'hidden_size': 120, 'lstm_dropout': 0.41490386799613116, 'dense_dropout': 0.043683973433970624, 'learning_rate': 0.026391979396592047}. Best is trial 0 with value: 0.007125865085981786.
[I 2024-10-29 18:58:37,563] Trial 2 finished with value: 0.009305234183557332 and parameters: {'num_layers': 2, 'hidden_size': 88, 'lstm_dropout': 0.426743130283418, 'dense_dropout': 0.19411298694564733, 'learning_rate': 0.00015304878825724792}. Best is trial 0 with value: 0.007125865085981786.
[I 2024-10-29 18:59:10,647] Trial 3 finished with value: 0.004778687842190266 and parameters: {'num_layers': 1

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  75
  Number of complete trials:  25
Best trial:
  Value:  0.004457558738067746
  Params: 
    num_layers: 1
    hidden_size: 72
    lstm_dropout: 0.063938762340349
    dense_dropout: 0.07876145491304885
    learning_rate: 0.007124937750585759
R2 for sim at Tangnaihai(1972-2018) 0.9570400420225073
R2 for pred at Tangnaihai(2015-2019) 0.25253729944545955


[I 2024-10-29 19:11:25,729] A new study created in memory with name: example-study




[I 2024-10-29 19:12:04,003] Trial 0 finished with value: 0.02015328500419855 and parameters: {'num_layers': 3, 'hidden_size': 8, 'lstm_dropout': 0.3250220449045316, 'dense_dropout': 0.48927806965617116, 'learning_rate': 0.004503434838588385}. Best is trial 0 with value: 0.02015328500419855.
[I 2024-10-29 19:12:35,067] Trial 1 finished with value: 0.009575798874720931 and parameters: {'num_layers': 1, 'hidden_size': 24, 'lstm_dropout': 0.13392790698387175, 'dense_dropout': 0.11879345223055454, 'learning_rate': 0.000593515481805685}. Best is trial 1 with value: 0.009575798874720931.
[I 2024-10-29 19:13:08,040] Trial 2 finished with value: 0.013350400608032942 and parameters: {'num_layers': 2, 'hidden_size': 88, 'lstm_dropout': 0.12905564353763188, 'dense_dropout': 0.3840635478755741, 'learning_rate': 0.0009509367003866835}. Best is trial 1 with value: 0.009575798874720931.
[I 2024-10-29 19:13:38,083] Trial 3 finished with value: 0.009759658598341048 and parameters: {'num_layers': 1, 'hid

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  68
  Number of complete trials:  32
Best trial:
  Value:  0.005186898517422378
  Params: 
    num_layers: 2
    hidden_size: 112
    lstm_dropout: 0.30097349533271295
    dense_dropout: 0.011778184957823998
    learning_rate: 0.0024850594330522516
R2 for sim at Guide(1972-2014) 0.9393826540598131


[I 2024-10-29 19:28:33,199] Trial 0 finished with value: 0.014818212483078241 and parameters: {'num_layers': 1, 'hidden_size': 32, 'lstm_dropout': 0.06025321819862306, 'dense_dropout': 0.3345799208319085, 'learning_rate': 0.08064935284912757}. Best is trial 0 with value: 0.014818212483078241.
[I 2024-10-29 19:29:09,946] Trial 1 finished with value: 0.00841040292289108 and parameters: {'num_layers': 2, 'hidden_size': 120, 'lstm_dropout': 0.46236089570653255, 'dense_dropout': 0.15613311553241815, 'learning_rate': 0.014902963902843416}. Best is trial 1 with value: 0.00841040292289108.
[I 2024-10-29 19:29:41,870] Trial 2 finished with value: 0.008869201759807765 and parameters: {'num_layers': 1, 'hidden_size': 72, 'lstm_dropout': 0.35751743179423495, 'dense_dropout': 0.21131781301781832, 'learning_rate': 0.003936438711558509}. Best is trial 1 with value: 0.00841040292289108.
[I 2024-10-29 19:30:15,226] Trial 3 finished with value: 0.010049399919807911 and parameters: {'num_layers': 1, 'hid

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  67
  Number of complete trials:  33
Best trial:
  Value:  0.005183910019695759
  Params: 
    num_layers: 1
    hidden_size: 88
    lstm_dropout: 0.081564960322689
    dense_dropout: 0.07674000615423207
    learning_rate: 0.0025425793140244654
R2 for sim at Guide(1972-2015) 0.9492175905992939


[I 2024-10-29 19:46:51,324] Trial 0 finished with value: 0.013523741625249386 and parameters: {'num_layers': 1, 'hidden_size': 72, 'lstm_dropout': 0.32276648805124486, 'dense_dropout': 0.06528413431540919, 'learning_rate': 0.07718456050638867}. Best is trial 0 with value: 0.013523741625249386.
[I 2024-10-29 19:47:30,945] Trial 1 finished with value: 0.01576598407700658 and parameters: {'num_layers': 3, 'hidden_size': 8, 'lstm_dropout': 0.2767263896366522, 'dense_dropout': 0.48903775441842445, 'learning_rate': 5.7541963679007134e-05}. Best is trial 0 with value: 0.013523741625249386.
[I 2024-10-29 19:48:07,851] Trial 2 finished with value: 0.007021342869848013 and parameters: {'num_layers': 2, 'hidden_size': 80, 'lstm_dropout': 0.3587352382582901, 'dense_dropout': 0.23623129343058386, 'learning_rate': 0.013011973834316397}. Best is trial 2 with value: 0.007021342869848013.
[I 2024-10-29 19:48:39,949] Trial 3 finished with value: 0.009834247408434749 and parameters: {'num_layers': 1, 'hi

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  77
  Number of complete trials:  23
Best trial:
  Value:  0.004578735679388046
  Params: 
    num_layers: 1
    hidden_size: 48
    lstm_dropout: 0.06801258879063271
    dense_dropout: 0.018784534064863467
    learning_rate: 0.0194143737618681
R2 for sim at Guide(1972-2016) 0.9665120645905123


[I 2024-10-29 19:59:49,705] Trial 0 finished with value: 0.003865099512040615 and parameters: {'num_layers': 3, 'hidden_size': 112, 'lstm_dropout': 0.22075461890596987, 'dense_dropout': 0.34353996856158736, 'learning_rate': 2.5972612172100397e-05}. Best is trial 0 with value: 0.003865099512040615.
[I 2024-10-29 20:00:31,332] Trial 1 finished with value: 0.005699346074834466 and parameters: {'num_layers': 3, 'hidden_size': 32, 'lstm_dropout': 0.37376976719280186, 'dense_dropout': 0.3438524659172179, 'learning_rate': 0.0690749191227034}. Best is trial 0 with value: 0.003865099512040615.
[I 2024-10-29 20:01:04,071] Trial 2 finished with value: 0.003213450196199119 and parameters: {'num_layers': 1, 'hidden_size': 80, 'lstm_dropout': 0.17898490179668608, 'dense_dropout': 0.11906462145667679, 'learning_rate': 0.025250658204893396}. Best is trial 2 with value: 0.003213450196199119.
[I 2024-10-29 20:01:42,845] Trial 3 finished with value: 0.003779198625124991 and parameters: {'num_layers': 3, 

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  84
  Number of complete trials:  16
Best trial:
  Value:  0.0019730424392037094
  Params: 
    num_layers: 1
    hidden_size: 88
    lstm_dropout: 0.2768925333309526
    dense_dropout: 0.2548486608569231
    learning_rate: 8.640649566502755e-05
R2 for sim at Guide(1972-2017) 0.752770991920718


[I 2024-10-29 20:09:01,659] Trial 0 finished with value: 0.0036519444547593594 and parameters: {'num_layers': 2, 'hidden_size': 64, 'lstm_dropout': 0.4683494634861216, 'dense_dropout': 0.2651769212664818, 'learning_rate': 0.000894504171904176}. Best is trial 0 with value: 0.0036519444547593594.
[I 2024-10-29 20:09:38,387] Trial 1 finished with value: 0.004750798922032118 and parameters: {'num_layers': 2, 'hidden_size': 24, 'lstm_dropout': 0.0263252179874382, 'dense_dropout': 0.09594137547489529, 'learning_rate': 0.087567529952641}. Best is trial 0 with value: 0.0036519444547593594.
[I 2024-10-29 20:10:16,519] Trial 2 finished with value: 0.0019089483539573848 and parameters: {'num_layers': 2, 'hidden_size': 32, 'lstm_dropout': 0.3967072631017324, 'dense_dropout': 0.2537595696134348, 'learning_rate': 0.00010219166383528481}. Best is trial 2 with value: 0.0019089483539573848.
[I 2024-10-29 20:10:53,127] Trial 3 finished with value: 0.009333065245300531 and parameters: {'num_layers': 2, '

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  83
  Number of complete trials:  17
Best trial:
  Value:  0.0019089483539573848
  Params: 
    num_layers: 2
    hidden_size: 32
    lstm_dropout: 0.3967072631017324
    dense_dropout: 0.2537595696134348
    learning_rate: 0.00010219166383528481
R2 for sim at Guide(1972-2018) 0.7630142256517007
R2 for pred at Guide(2015-2019) 0.41939445718859225


[I 2024-10-29 20:19:19,427] A new study created in memory with name: example-study




[I 2024-10-29 20:19:56,022] Trial 0 finished with value: 0.008281149668619037 and parameters: {'num_layers': 3, 'hidden_size': 88, 'lstm_dropout': 0.287897450430685, 'dense_dropout': 0.04072150752089004, 'learning_rate': 0.002210283192881989}. Best is trial 0 with value: 0.008281149668619037.
[I 2024-10-29 20:20:28,273] Trial 1 finished with value: 0.009938859730027616 and parameters: {'num_layers': 2, 'hidden_size': 120, 'lstm_dropout': 0.14606489158689295, 'dense_dropout': 0.3346764254669178, 'learning_rate': 0.00012688281299477863}. Best is trial 0 with value: 0.008281149668619037.
[I 2024-10-29 20:21:04,665] Trial 2 finished with value: 0.015672922134399414 and parameters: {'num_layers': 3, 'hidden_size': 120, 'lstm_dropout': 0.3372685767019198, 'dense_dropout': 0.4036397923943207, 'learning_rate': 0.0034940017773848787}. Best is trial 0 with value: 0.008281149668619037.
[I 2024-10-29 20:21:37,510] Trial 3 finished with value: 0.019683576188981533 and parameters: {'num_layers': 2, 

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  76
  Number of complete trials:  24
Best trial:
  Value:  0.006552049890160561
  Params: 
    num_layers: 1
    hidden_size: 24
    lstm_dropout: 0.3563719533495064
    dense_dropout: 0.05590564959601277
    learning_rate: 0.007213038197207743
R2 for sim at Xunhua(1972-2014) 0.9565204652949623


[I 2024-10-29 20:32:37,561] Trial 0 finished with value: 0.010067923460155725 and parameters: {'num_layers': 1, 'hidden_size': 88, 'lstm_dropout': 0.4578245414973425, 'dense_dropout': 0.30178159786302877, 'learning_rate': 0.00415695174273368}. Best is trial 0 with value: 0.010067923460155725.
[I 2024-10-29 20:33:10,102] Trial 1 finished with value: 0.007465496892109513 and parameters: {'num_layers': 1, 'hidden_size': 96, 'lstm_dropout': 0.2228832541682017, 'dense_dropout': 0.05754514034714364, 'learning_rate': 0.009733708233554468}. Best is trial 1 with value: 0.007465496892109513.
[I 2024-10-29 20:33:41,684] Trial 2 finished with value: 0.010253493208438158 and parameters: {'num_layers': 1, 'hidden_size': 40, 'lstm_dropout': 0.011435505126757284, 'dense_dropout': 0.33830699327083086, 'learning_rate': 0.011855026314669828}. Best is trial 1 with value: 0.007465496892109513.
[I 2024-10-29 20:34:14,307] Trial 3 finished with value: 0.009472113568335772 and parameters: {'num_layers': 1, 'h

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  82
  Number of complete trials:  18
Best trial:
  Value:  0.0056471709394827485
  Params: 
    num_layers: 1
    hidden_size: 112
    lstm_dropout: 0.2731578510091379
    dense_dropout: 0.05580159920959155
    learning_rate: 0.004342665202582419
R2 for sim at Xunhua(1972-2015) 0.9627569603902972


[I 2024-10-29 20:43:33,495] Trial 0 finished with value: 0.008376629673875868 and parameters: {'num_layers': 1, 'hidden_size': 32, 'lstm_dropout': 0.08684787649522036, 'dense_dropout': 0.14290521755562985, 'learning_rate': 2.4080995569192075e-05}. Best is trial 0 with value: 0.008376629673875868.
[I 2024-10-29 20:44:17,776] Trial 1 finished with value: 0.017455364111810923 and parameters: {'num_layers': 2, 'hidden_size': 56, 'lstm_dropout': 0.3928915592059647, 'dense_dropout': 0.4515099668265093, 'learning_rate': 0.04193156638219602}. Best is trial 0 with value: 0.008376629673875868.
[I 2024-10-29 20:45:08,179] Trial 2 finished with value: 0.010132984258234501 and parameters: {'num_layers': 3, 'hidden_size': 56, 'lstm_dropout': 0.40876334982129386, 'dense_dropout': 0.0479537973311579, 'learning_rate': 3.0065571054028023e-05}. Best is trial 0 with value: 0.008376629673875868.
[I 2024-10-29 20:45:48,457] Trial 3 finished with value: 0.0060938033275306225 and parameters: {'num_layers': 1,

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  68
  Number of complete trials:  32
Best trial:
  Value:  0.004116126918233931
  Params: 
    num_layers: 1
    hidden_size: 88
    lstm_dropout: 0.024531865595348923
    dense_dropout: 0.092607411484683
    learning_rate: 0.008993789439421949
R2 for sim at Xunhua(1972-2016) 0.9369043230114699


[I 2024-10-29 21:05:50,201] Trial 0 finished with value: 0.002831778721883893 and parameters: {'num_layers': 3, 'hidden_size': 72, 'lstm_dropout': 0.27872715766925366, 'dense_dropout': 0.2952660428929327, 'learning_rate': 0.0003903213731982926}. Best is trial 0 with value: 0.002831778721883893.
[I 2024-10-29 21:06:38,530] Trial 1 finished with value: 0.0038843381917104125 and parameters: {'num_layers': 3, 'hidden_size': 80, 'lstm_dropout': 0.005771098782018647, 'dense_dropout': 0.050653639721585486, 'learning_rate': 1.0663979759307548e-05}. Best is trial 0 with value: 0.002831778721883893.
[I 2024-10-29 21:07:15,688] Trial 2 finished with value: 0.003982004011049867 and parameters: {'num_layers': 1, 'hidden_size': 104, 'lstm_dropout': 0.21290721949656072, 'dense_dropout': 0.21278188674969833, 'learning_rate': 0.02243074788710415}. Best is trial 0 with value: 0.002831778721883893.
[I 2024-10-29 21:07:53,865] Trial 3 finished with value: 0.007467061979696155 and parameters: {'num_layers'

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  73
  Number of complete trials:  27
Best trial:
  Value:  0.0016445262299384922
  Params: 
    num_layers: 1
    hidden_size: 32
    lstm_dropout: 0.297534118686633
    dense_dropout: 0.18698590305117418
    learning_rate: 0.009872952383076632
R2 for sim at Xunhua(1972-2017) 0.9308207286864563


[I 2024-10-29 21:25:05,920] Trial 0 finished with value: 0.009329840540885925 and parameters: {'num_layers': 1, 'hidden_size': 72, 'lstm_dropout': 0.39376371122983533, 'dense_dropout': 0.3524886869391458, 'learning_rate': 0.067457598380799}. Best is trial 0 with value: 0.009329840540885925.
[I 2024-10-29 21:25:51,230] Trial 1 finished with value: 0.006798525573685765 and parameters: {'num_layers': 1, 'hidden_size': 128, 'lstm_dropout': 0.37694207734439705, 'dense_dropout': 0.3697906936854813, 'learning_rate': 0.0011835119620884798}. Best is trial 1 with value: 0.006798525573685765.
[I 2024-10-29 21:26:46,210] Trial 2 finished with value: 0.002808980760164559 and parameters: {'num_layers': 2, 'hidden_size': 32, 'lstm_dropout': 0.05905645943743132, 'dense_dropout': 0.028150117063040325, 'learning_rate': 0.011895083716361925}. Best is trial 2 with value: 0.002808980760164559.
[I 2024-10-29 21:27:43,243] Trial 3 finished with value: 0.010359805542975664 and parameters: {'num_layers': 3, 'h

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  79
  Number of complete trials:  21
Best trial:
  Value:  0.0023393032024614513
  Params: 
    num_layers: 2
    hidden_size: 128
    lstm_dropout: 0.004797865588706263
    dense_dropout: 0.11829040400091506
    learning_rate: 0.00500105164235348
R2 for sim at Xunhua(1972-2018) 0.9560914987364005
R2 for pred at Xunhua(2015-2019) 0.3679475235466343


In [31]:
selected_features = ['P2020(mm)','MAX-TEM(C)','MIN-TEM(C)','flow(m^3/s)']
start_year = 1972
pred_years = np.arange(2015,2020)
RETRAIN = True
name = 'InputOutputSamples_metesimyr'
batch_size = 32
ystar_col = "pred"
if not os.path.exists(sample_path):
    os.makedirs(sample_path)
for hydro_station in hydro_stations:
    sample_path = f'../samples/SequenceSamples/{hydro_station}/{name}/'
    if not os.path.exists(sample_path):
        os.makedirs(sample_path)
    result_path = f'../result/LSTMRegressorData/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    pred_df = pd.DataFrame()
    for pred_year in pred_years:
        model_path = f'../model/12MonthlyFlowProcessForecast_{hydro_station}/LSTMRegressorModel/{name}/pred{pred_year}/'
        if not os.path.exists(model_path):
            os.makedirs(model_path)
        df = pd.read_csv(
            f'../samples/{name}/{hydro_station}_MeteAVGCalvalFeatureDataForML_PRED{pred_year}.csv',
            index_col=['date'], parse_dates=['date']
        )

        df = df[selected_features]

        # Check if 'ARIMASimFlow' exists in df columns
        if 'ARIMASimFlow' not in df.columns:
            print(f"Warning: ARIMASimFlow column not found in data for {hydro_station} station, year {pred_year}")
        
        target = 'flow(m^3/s)'
        features = list(df.columns.difference([target]))
        features = list(df.columns.copy())
        features.remove(target)
        cal = df.loc[:f'{pred_year-6}-12-31',:]
        test = df.loc[f'{pred_year-6}-01-01':f'{pred_year-1}-12-31',:]
        pre = df.loc[f'{pred_year-1}-01-01':f'{pred_year}-12-31',:]
        X_scaler = MinMaxScaler(feature_range=(0,1))
        Y_scaler = MinMaxScaler(feature_range=(0,1))
        X_scaler.fit(cal[features])
        Y_scaler.fit(cal[[target]])
        cal_X = X_scaler.transform(cal[features])
        test_X = X_scaler.transform(test[features])
        pre_X = X_scaler.transform(pre[features])
        cal_y = Y_scaler.transform(cal[[target]])
        test_y = Y_scaler.transform(test[[target]])
        pre_y = Y_scaler.transform(pre[[target]])
        cal = pd.concat([pd.DataFrame(cal_X,columns=features,index=cal.index),pd.DataFrame(cal_y,columns=[target],index=cal.index)],axis=1)
        test = pd.concat([pd.DataFrame(test_X,columns=features,index=test.index),pd.DataFrame(test_y,columns=[target],index=test.index)],axis=1)
        pre = pd.concat([pd.DataFrame(pre_X,columns=features,index=pre.index),pd.DataFrame(pre_y,columns=[target],index=pre.index)],axis=1)
        cal.to_csv(sample_path+f'cal_{start_year}_{pred_year-6}.csv')
        test.to_csv(sample_path+f'test_{pred_year-5}_{pred_year-1}.csv')
        pre.to_csv(sample_path+f'pre_{pred_year}.csv')

        sequence_length =12
        cal_dataset = SimulatedSequenceDataset(
            dataframe=cal.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        test_dataset = SimulatedSequenceDataset(
            dataframe=test.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )

        i=0
        X, y = cal_dataset[i]
        # print(X)
        # print(y)

        # print(cal_dataset.X.shape, cal_dataset.y.shape)
        # print(test_dataset.X.shape, test_dataset.y.shape)

        cal_index = cal.index[sequence_length:]
        test_index = test.index[sequence_length:]

        # print(len(cal_index)==cal_dataset.__len__())
        # print(len(test_index)==test_dataset.__len__())

       
        
        if not os.path.exists(model_path+'model.pickle'):
            objective = Objective(
                    train_dataset=cal_dataset,
                    val_dataset=test_dataset,
                    num_epoch=1000,# 1000
                    batch_size=batch_size,
                    shuffle=True,
                    model_path='./scheme/LSTMRegressor/',
            )
            study = optuna.create_study(
                    study_name='example-study',
                    direction='minimize',
                )
            study.optimize(objective, n_trials=100) #100

            pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])
            complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])


            print("Study statistics: ")
            print("  Number of finished trials: ", len(study.trials))
            print("  Number of pruned trials: ", len(pruned_trials))
            print("  Number of complete trials: ", len(complete_trials))

            print("Best trial:")
            trial = study.best_trial

            save_best_trial(trial, model_path=model_path)

            save_model(trial, model_path=model_path)

            best_model_state = trial.user_attrs["best_model_state"]
            print("  Value: ", trial.value)
            print("  Params: ")
            for key, value in trial.params.items():
                print("    {}: {}".format(key, value))

            best_trial = load_best_trial(model_file=model_path+'best_trial.pickle')
            model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
            cal_loader = DataLoader(cal_dataset, batch_size=64, shuffle=False)
            test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

            cal_ = cal.loc[cal_index,[target]].copy()
            test_ = test.loc[test_index,[target]].copy()

            cal_[ystar_col] = predict(cal_loader, model).cpu().numpy()
            test_[ystar_col] = predict(test_loader, model).cpu().numpy()

            df_out = pd.concat((cal_, test_))[[target, ystar_col]]
            for c in df_out.columns:
                # df_out[c] = df_out[c] * target_std + target_mean
                df_out[c] = Y_scaler.inverse_transform(df_out[[c]])
            # print(df_out)
            df_out = df_out.rename(columns={target: 'obs'})
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            df_out.to_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv')
        else:
            df_out = pd.read_csv(result_path+f'cal_test_sim_{start_year}_{pred_year-1}.csv',index_col=['date'], parse_dates=['date'])
            print(f'R2 for sim at {hydro_station}({start_year}-{pred_year-1})',r2_score(df_out['obs'],df_out[ystar_col]))
            print(f'model already exists in {model_path}')

        # !Perform prediction
        pre = pd.read_csv(sample_path+f'pre_{pred_year}.csv',index_col=['date'], parse_dates=['date'])
        pre_dataset = SimulatedSequenceDataset(
            dataframe=pre.copy(),
            target=target,
            features=features,
            sequence_length=sequence_length
        )
        pre_loader = DataLoader(pre_dataset, batch_size=batch_size, shuffle=False)
        pre_index = pre.index[sequence_length:]
        pre_ = pre.loc[pre_index,[target]].copy()
        model = load_model(model_file=model_path+'model.pickle').to(DEVICE)
        pre_[ystar_col] = predict(pre_loader, model).cpu().numpy()
        for c in pre_.columns:
            # df_out[c] = df_out[c] * target_std + target_mean
            pre_[c] = Y_scaler.inverse_transform(pre_[[c]])
        pred_df = pd.concat((pred_df, pre_), axis=0)
    pred_df = pred_df.rename(columns={'flow(m^3/s)': 'obs'})
    print(f'R2 for pred at {hydro_station}({pred_years[0]}-{pred_years[-1]})',r2_score(pred_df['obs'],pred_df['pred']))
    pred_df.to_csv(result_path+f'vif_lstm_pred_{pred_years[0]}_{pred_years[-1]}.csv')
    # Visualization of the flow(m^3/s) and VIF_LSTM of pred_df
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['obs'], mode='lines', name='obs'))
    fig.add_trace(go.Scatter(x=pred_df.index, y=pred_df['pred'], mode='lines', name='pred'))
    fig.show()

[I 2024-10-29 21:40:53,090] A new study created in memory with name: example-study




[I 2024-10-29 21:41:34,438] Trial 0 finished with value: 0.0029427498811855912 and parameters: {'num_layers': 3, 'hidden_size': 104, 'lstm_dropout': 0.1282230949305253, 'dense_dropout': 0.010771193831729309, 'learning_rate': 0.01365923970744934}. Best is trial 0 with value: 0.0029427498811855912.
[I 2024-10-29 21:42:16,798] Trial 1 finished with value: 0.013283176813274622 and parameters: {'num_layers': 3, 'hidden_size': 32, 'lstm_dropout': 0.479898610523583, 'dense_dropout': 0.266965389228919, 'learning_rate': 1.839001818641712e-05}. Best is trial 0 with value: 0.0029427498811855912.
[I 2024-10-29 21:43:00,860] Trial 2 finished with value: 0.00395623967051506 and parameters: {'num_layers': 3, 'hidden_size': 88, 'lstm_dropout': 0.1548382453141704, 'dense_dropout': 0.21955736376697027, 'learning_rate': 0.013415285606111023}. Best is trial 0 with value: 0.0029427498811855912.
[I 2024-10-29 21:43:34,926] Trial 3 finished with value: 0.007723681163042784 and parameters: {'num_layers': 1, '

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  75
  Number of complete trials:  25
Best trial:
  Value:  0.0019671354675665498
  Params: 
    num_layers: 2
    hidden_size: 72
    lstm_dropout: 0.14044621251482972
    dense_dropout: 0.05275544516766682
    learning_rate: 0.015476881437324523
R2 for sim at Tangnaihai(1972-2014) 0.9605971176455003


[I 2024-10-29 21:56:27,634] Trial 0 finished with value: 0.0036265439121052623 and parameters: {'num_layers': 1, 'hidden_size': 112, 'lstm_dropout': 0.3869414340554768, 'dense_dropout': 0.11454429311534758, 'learning_rate': 0.001028949843289967}. Best is trial 0 with value: 0.0036265439121052623.
[I 2024-10-29 21:57:19,564] Trial 1 finished with value: 0.11066295951604843 and parameters: {'num_layers': 3, 'hidden_size': 64, 'lstm_dropout': 0.16611703613439688, 'dense_dropout': 0.3685412746689556, 'learning_rate': 0.07926088725229255}. Best is trial 0 with value: 0.0036265439121052623.
[I 2024-10-29 21:58:09,886] Trial 2 finished with value: 0.005450853379443288 and parameters: {'num_layers': 3, 'hidden_size': 128, 'lstm_dropout': 0.25661614947998873, 'dense_dropout': 0.3229598600892425, 'learning_rate': 0.01078214106169636}. Best is trial 0 with value: 0.0036265439121052623.
[I 2024-10-29 21:58:50,886] Trial 3 finished with value: 0.0036571924574673176 and parameters: {'num_layers': 1,

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  76
  Number of complete trials:  24
Best trial:
  Value:  0.0016205926658585668
  Params: 
    num_layers: 2
    hidden_size: 64
    lstm_dropout: 0.0756546407129671
    dense_dropout: 0.028002001212297664
    learning_rate: 0.007327891968750374
R2 for sim at Tangnaihai(1972-2015) 0.9042078095657861


[I 2024-10-29 22:14:23,686] Trial 0 finished with value: 0.0028185234405100346 and parameters: {'num_layers': 1, 'hidden_size': 8, 'lstm_dropout': 0.20847853078418477, 'dense_dropout': 0.16900300944862812, 'learning_rate': 0.0004476053649821325}. Best is trial 0 with value: 0.0028185234405100346.
[I 2024-10-29 22:14:59,469] Trial 1 finished with value: 0.0027716950280591846 and parameters: {'num_layers': 2, 'hidden_size': 8, 'lstm_dropout': 0.42216177074252625, 'dense_dropout': 0.1119274214114605, 'learning_rate': 0.00039500535925289195}. Best is trial 1 with value: 0.0027716950280591846.
[I 2024-10-29 22:15:34,824] Trial 2 finished with value: 0.002937315497547388 and parameters: {'num_layers': 2, 'hidden_size': 80, 'lstm_dropout': 0.15616603277717278, 'dense_dropout': 0.1714778307725331, 'learning_rate': 0.0019928188830166164}. Best is trial 1 with value: 0.0027716950280591846.
[I 2024-10-29 22:16:13,600] Trial 3 finished with value: 0.008314520586282015 and parameters: {'num_layers'

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  83
  Number of complete trials:  17
Best trial:
  Value:  0.0016952227160800248
  Params: 
    num_layers: 2
    hidden_size: 104
    lstm_dropout: 0.09978079404220064
    dense_dropout: 0.02373738599762578
    learning_rate: 0.006288416036230191
R2 for sim at Tangnaihai(1972-2016) 0.9842467425794008


[I 2024-10-29 22:24:59,534] Trial 0 finished with value: 0.001744485110975802 and parameters: {'num_layers': 3, 'hidden_size': 56, 'lstm_dropout': 0.0034666153819515877, 'dense_dropout': 0.16520164520414982, 'learning_rate': 0.00010637374272516447}. Best is trial 0 with value: 0.001744485110975802.
[I 2024-10-29 22:25:31,146] Trial 1 finished with value: 0.0018343495321460068 and parameters: {'num_layers': 1, 'hidden_size': 8, 'lstm_dropout': 0.3746520699602286, 'dense_dropout': 0.1503651120224196, 'learning_rate': 0.001933460014574254}. Best is trial 0 with value: 0.001744485110975802.
[I 2024-10-29 22:26:11,105] Trial 2 finished with value: 0.0023453013855032623 and parameters: {'num_layers': 3, 'hidden_size': 64, 'lstm_dropout': 0.3812464937256739, 'dense_dropout': 0.07353151368094596, 'learning_rate': 0.0010994537483149328}. Best is trial 0 with value: 0.001744485110975802.
[I 2024-10-29 22:26:46,842] Trial 3 finished with value: 0.005678476067259908 and parameters: {'num_layers': 

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  76
  Number of complete trials:  24
Best trial:
  Value:  0.0015684496611356735
  Params: 
    num_layers: 2
    hidden_size: 128
    lstm_dropout: 0.34960983077512137
    dense_dropout: 0.1317653975436949
    learning_rate: 0.007555972346863215
R2 for sim at Tangnaihai(1972-2017) 0.9684844163418892


[I 2024-10-29 22:40:09,226] Trial 0 finished with value: 0.007481476292014122 and parameters: {'num_layers': 3, 'hidden_size': 48, 'lstm_dropout': 0.08549792219455593, 'dense_dropout': 0.45725757047398524, 'learning_rate': 0.00010767151268776313}. Best is trial 0 with value: 0.007481476292014122.
[I 2024-10-29 22:40:45,252] Trial 1 finished with value: 0.004236119915731251 and parameters: {'num_layers': 2, 'hidden_size': 120, 'lstm_dropout': 0.13004127506809948, 'dense_dropout': 0.31399656733803455, 'learning_rate': 2.7077730388116194e-05}. Best is trial 1 with value: 0.004236119915731251.
[I 2024-10-29 22:41:19,125] Trial 2 finished with value: 0.001972231490071863 and parameters: {'num_layers': 1, 'hidden_size': 80, 'lstm_dropout': 0.34530878336518434, 'dense_dropout': 0.1498964807481422, 'learning_rate': 0.00019657545260686287}. Best is trial 2 with value: 0.001972231490071863.
[I 2024-10-29 22:41:57,217] Trial 3 finished with value: 0.003349585342220962 and parameters: {'num_layers

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  70
  Number of complete trials:  30
Best trial:
  Value:  0.001972231490071863
  Params: 
    num_layers: 1
    hidden_size: 80
    lstm_dropout: 0.34530878336518434
    dense_dropout: 0.1498964807481422
    learning_rate: 0.00019657545260686287
R2 for sim at Tangnaihai(1972-2018) 0.8853748199047432
R2 for pred at Tangnaihai(2015-2019) 0.4602824174000758


[I 2024-10-29 22:57:58,041] A new study created in memory with name: example-study




[I 2024-10-29 22:58:30,224] Trial 0 finished with value: 0.005039786221459508 and parameters: {'num_layers': 2, 'hidden_size': 128, 'lstm_dropout': 0.3837979405429701, 'dense_dropout': 0.2102781384593898, 'learning_rate': 0.006051763688131922}. Best is trial 0 with value: 0.005039786221459508.
[I 2024-10-29 22:59:07,513] Trial 1 finished with value: 0.011875613126903772 and parameters: {'num_layers': 3, 'hidden_size': 72, 'lstm_dropout': 0.23994410395064242, 'dense_dropout': 0.3192583117320796, 'learning_rate': 0.010256104125578718}. Best is trial 0 with value: 0.005039786221459508.
[I 2024-10-29 22:59:37,164] Trial 2 finished with value: 0.009165474213659763 and parameters: {'num_layers': 1, 'hidden_size': 8, 'lstm_dropout': 0.2444265249505156, 'dense_dropout': 0.23050712083876002, 'learning_rate': 0.018044505160684808}. Best is trial 0 with value: 0.005039786221459508.
[I 2024-10-29 23:00:08,739] Trial 3 finished with value: 0.011563865002244711 and parameters: {'num_layers': 1, 'hid

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  82
  Number of complete trials:  18
Best trial:
  Value:  0.0020101724367123097
  Params: 
    num_layers: 1
    hidden_size: 96
    lstm_dropout: 0.14905744324514042
    dense_dropout: 0.09600460440345805
    learning_rate: 0.009122659926681924
R2 for sim at Guide(1972-2014) 0.9804427577302067


[I 2024-10-29 23:08:48,460] Trial 0 finished with value: 0.010911772958934307 and parameters: {'num_layers': 3, 'hidden_size': 16, 'lstm_dropout': 0.3158252983335952, 'dense_dropout': 0.4600483984920236, 'learning_rate': 0.00929749453641401}. Best is trial 0 with value: 0.010911772958934307.
[I 2024-10-29 23:09:20,306] Trial 1 finished with value: 0.004645742708817124 and parameters: {'num_layers': 1, 'hidden_size': 48, 'lstm_dropout': 0.4089430401832159, 'dense_dropout': 0.25760608415093156, 'learning_rate': 0.0025734682664049716}. Best is trial 1 with value: 0.004645742708817124.
[I 2024-10-29 23:09:55,425] Trial 2 finished with value: 0.006540243048220873 and parameters: {'num_layers': 2, 'hidden_size': 56, 'lstm_dropout': 0.1028886262837847, 'dense_dropout': 0.32496447124635097, 'learning_rate': 0.008686885012980473}. Best is trial 1 with value: 0.004645742708817124.
[I 2024-10-29 23:10:30,131] Trial 3 finished with value: 0.027486877515912056 and parameters: {'num_layers': 2, 'hid

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  77
  Number of complete trials:  23
Best trial:
  Value:  0.0014744261570740491
  Params: 
    num_layers: 2
    hidden_size: 32
    lstm_dropout: 0.08144821935978117
    dense_dropout: 0.03393562912047049
    learning_rate: 0.030602135756906013
R2 for sim at Guide(1972-2015) 0.9767882251352312


[I 2024-10-29 23:22:29,036] Trial 0 finished with value: 0.006637463346123695 and parameters: {'num_layers': 3, 'hidden_size': 128, 'lstm_dropout': 0.10165517545806402, 'dense_dropout': 0.374142434838248, 'learning_rate': 0.018162015568805177}. Best is trial 0 with value: 0.006637463346123695.
[I 2024-10-29 23:22:59,991] Trial 1 finished with value: 0.003372400242369622 and parameters: {'num_layers': 1, 'hidden_size': 72, 'lstm_dropout': 0.4165166601231408, 'dense_dropout': 0.08775005871299874, 'learning_rate': 0.0002788730285381155}. Best is trial 1 with value: 0.003372400242369622.
[I 2024-10-29 23:23:32,488] Trial 2 finished with value: 0.0030020788544788957 and parameters: {'num_layers': 1, 'hidden_size': 128, 'lstm_dropout': 0.4328812359548484, 'dense_dropout': 0.1320389636069224, 'learning_rate': 0.006937298929620722}. Best is trial 2 with value: 0.0030020788544788957.
[I 2024-10-29 23:24:05,242] Trial 3 finished with value: 0.012115628458559513 and parameters: {'num_layers': 1, 

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  70
  Number of complete trials:  30
Best trial:
  Value:  0.0020737952436320484
  Params: 
    num_layers: 1
    hidden_size: 104
    lstm_dropout: 0.4690840003458511
    dense_dropout: 0.06607592760975245
    learning_rate: 0.013754710732721812
R2 for sim at Guide(1972-2016) 0.9758245290898127


[I 2024-10-29 23:39:21,037] Trial 0 finished with value: 0.0028683216660283506 and parameters: {'num_layers': 3, 'hidden_size': 120, 'lstm_dropout': 0.052143696705099774, 'dense_dropout': 0.18635058056136122, 'learning_rate': 0.011061575973005958}. Best is trial 0 with value: 0.0028683216660283506.
[I 2024-10-29 23:39:53,657] Trial 1 finished with value: 0.0026840224163606763 and parameters: {'num_layers': 1, 'hidden_size': 112, 'lstm_dropout': 0.4823409708377489, 'dense_dropout': 0.12381678811718277, 'learning_rate': 0.0004311272152294906}. Best is trial 1 with value: 0.0026840224163606763.
[I 2024-10-29 23:40:34,699] Trial 2 finished with value: 0.005596227943897247 and parameters: {'num_layers': 3, 'hidden_size': 32, 'lstm_dropout': 0.17560382663259966, 'dense_dropout': 0.3801632842189898, 'learning_rate': 0.00327128733949433}. Best is trial 1 with value: 0.0026840224163606763.
[I 2024-10-29 23:41:11,157] Trial 3 finished with value: 0.003964953823015094 and parameters: {'num_layers

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  85
  Number of complete trials:  15
Best trial:
  Value:  0.001929771387949586
  Params: 
    num_layers: 2
    hidden_size: 96
    lstm_dropout: 0.3194572908991903
    dense_dropout: 0.08926836786981819
    learning_rate: 0.002343118608045688
R2 for sim at Guide(1972-2017) 0.9734193713790539


[I 2024-10-29 23:48:39,760] Trial 0 finished with value: 0.0033306835684925318 and parameters: {'num_layers': 1, 'hidden_size': 32, 'lstm_dropout': 0.07383545608798159, 'dense_dropout': 0.21901484843751406, 'learning_rate': 0.023814304130457285}. Best is trial 0 with value: 0.0033306835684925318.
[I 2024-10-29 23:49:23,038] Trial 1 finished with value: 0.004919687868095934 and parameters: {'num_layers': 3, 'hidden_size': 56, 'lstm_dropout': 0.29076502072609844, 'dense_dropout': 0.11730759794372464, 'learning_rate': 0.0009007151578207241}. Best is trial 0 with value: 0.0033306835684925318.
[I 2024-10-29 23:50:04,484] Trial 2 finished with value: 0.003772932104766369 and parameters: {'num_layers': 3, 'hidden_size': 128, 'lstm_dropout': 0.32224901002784473, 'dense_dropout': 0.31489014631229306, 'learning_rate': 0.00012565449369278915}. Best is trial 0 with value: 0.0033306835684925318.
[I 2024-10-29 23:50:44,588] Trial 3 finished with value: 0.004823604365810752 and parameters: {'num_laye

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  88
  Number of complete trials:  12
Best trial:
  Value:  0.0030223423382267356
  Params: 
    num_layers: 1
    hidden_size: 56
    lstm_dropout: 0.4030007858714732
    dense_dropout: 0.19463959737927894
    learning_rate: 0.006538097448561421
R2 for sim at Guide(1972-2018) 0.9243946187096396
R2 for pred at Guide(2015-2019) 0.3937915192762437


[I 2024-10-29 23:55:45,154] A new study created in memory with name: example-study




[I 2024-10-29 23:56:22,413] Trial 0 finished with value: 0.004424882587045431 and parameters: {'num_layers': 3, 'hidden_size': 96, 'lstm_dropout': 0.05329230183178052, 'dense_dropout': 0.14149782950135886, 'learning_rate': 0.00012615349659429858}. Best is trial 0 with value: 0.004424882587045431.
[I 2024-10-29 23:56:59,931] Trial 1 finished with value: 0.011758073698729277 and parameters: {'num_layers': 3, 'hidden_size': 64, 'lstm_dropout': 0.29565202490854914, 'dense_dropout': 0.37128040707822285, 'learning_rate': 2.3888502644261756e-05}. Best is trial 0 with value: 0.004424882587045431.
[I 2024-10-29 23:57:30,463] Trial 2 finished with value: 0.002664370695129037 and parameters: {'num_layers': 1, 'hidden_size': 120, 'lstm_dropout': 0.3486950393967817, 'dense_dropout': 0.11454501717692644, 'learning_rate': 0.005444201342440168}. Best is trial 2 with value: 0.002664370695129037.
[I 2024-10-29 23:57:59,868] Trial 3 finished with value: 0.005656398367136717 and parameters: {'num_layers':

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  81
  Number of complete trials:  19
Best trial:
  Value:  0.001643418741878122
  Params: 
    num_layers: 1
    hidden_size: 96
    lstm_dropout: 0.2657863837935563
    dense_dropout: 9.224322284129283e-05
    learning_rate: 0.010968059029772662
R2 for sim at Xunhua(1972-2014) 0.9490708799080532


[I 2024-10-30 00:06:32,356] Trial 0 finished with value: 0.007727289106696844 and parameters: {'num_layers': 3, 'hidden_size': 56, 'lstm_dropout': 0.3725652560567853, 'dense_dropout': 0.135779310295093, 'learning_rate': 1.1820988455635846e-05}. Best is trial 0 with value: 0.007727289106696844.
[I 2024-10-30 00:07:12,202] Trial 1 finished with value: 0.012362116482108831 and parameters: {'num_layers': 3, 'hidden_size': 32, 'lstm_dropout': 0.17941260801753345, 'dense_dropout': 0.45859973566565987, 'learning_rate': 0.007829695578722165}. Best is trial 0 with value: 0.007727289106696844.
[I 2024-10-30 00:07:44,868] Trial 2 finished with value: 0.009852851275354624 and parameters: {'num_layers': 1, 'hidden_size': 8, 'lstm_dropout': 0.041971428741438, 'dense_dropout': 0.3702943596272181, 'learning_rate': 4.9293960567212994e-05}. Best is trial 0 with value: 0.007727289106696844.
[I 2024-10-30 00:08:23,662] Trial 3 finished with value: 0.0032376209273934364 and parameters: {'num_layers': 3, 'h

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  61
  Number of complete trials:  39
Best trial:
  Value:  0.001320664130616933
  Params: 
    num_layers: 1
    hidden_size: 112
    lstm_dropout: 0.23350632378447495
    dense_dropout: 0.02989022026852058
    learning_rate: 0.00543688100185827
R2 for sim at Xunhua(1972-2015) 0.9902632504524715


[I 2024-10-30 00:28:50,649] Trial 0 finished with value: 0.004093878553248942 and parameters: {'num_layers': 2, 'hidden_size': 24, 'lstm_dropout': 0.011106488118667712, 'dense_dropout': 0.08948501691503785, 'learning_rate': 0.0018243856574765756}. Best is trial 0 with value: 0.004093878553248942.
[I 2024-10-30 00:29:30,715] Trial 1 finished with value: 0.002630230621434748 and parameters: {'num_layers': 3, 'hidden_size': 64, 'lstm_dropout': 0.14874828066165502, 'dense_dropout': 0.09326025373226349, 'learning_rate': 6.46813879745441e-05}. Best is trial 1 with value: 0.002630230621434748.
[I 2024-10-30 00:30:03,291] Trial 2 finished with value: 0.003490010218229145 and parameters: {'num_layers': 1, 'hidden_size': 96, 'lstm_dropout': 0.01425614744028364, 'dense_dropout': 0.16995651907528808, 'learning_rate': 0.00015903899011093147}. Best is trial 1 with value: 0.002630230621434748.
[I 2024-10-30 00:30:44,712] Trial 3 finished with value: 0.003014675807207823 and parameters: {'num_layers':

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  63
  Number of complete trials:  37
Best trial:
  Value:  0.0016664667637087405
  Params: 
    num_layers: 1
    hidden_size: 128
    lstm_dropout: 0.1751526390038343
    dense_dropout: 0.07014923951027559
    learning_rate: 0.018187861352936367
R2 for sim at Xunhua(1972-2016) 0.9708941843686503


[I 2024-10-30 00:50:11,612] Trial 0 finished with value: 0.00376211223192513 and parameters: {'num_layers': 1, 'hidden_size': 32, 'lstm_dropout': 0.36442384345226525, 'dense_dropout': 0.05713672153974031, 'learning_rate': 6.255224480150893e-05}. Best is trial 0 with value: 0.00376211223192513.
[I 2024-10-30 00:50:44,361] Trial 1 finished with value: 0.003265012288466096 and parameters: {'num_layers': 1, 'hidden_size': 56, 'lstm_dropout': 0.10648842672945968, 'dense_dropout': 0.19677539553916812, 'learning_rate': 8.356618131931402e-05}. Best is trial 1 with value: 0.003265012288466096.
[I 2024-10-30 00:51:22,623] Trial 2 finished with value: 0.0024285426479764283 and parameters: {'num_layers': 3, 'hidden_size': 104, 'lstm_dropout': 0.4086900580756615, 'dense_dropout': 0.1517723088204047, 'learning_rate': 7.617361486447501e-05}. Best is trial 2 with value: 0.0024285426479764283.
[I 2024-10-30 00:51:54,613] Trial 3 finished with value: 0.003245683154091239 and parameters: {'num_layers': 1

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  57
  Number of complete trials:  43
Best trial:
  Value:  0.001650667458306998
  Params: 
    num_layers: 2
    hidden_size: 72
    lstm_dropout: 0.032399367231173176
    dense_dropout: 0.12860833882048817
    learning_rate: 0.007965345776222715
R2 for sim at Xunhua(1972-2017) 0.9526204865089549


[I 2024-10-30 01:16:29,368] Trial 0 finished with value: 0.003747583949007094 and parameters: {'num_layers': 3, 'hidden_size': 80, 'lstm_dropout': 0.37964189057182895, 'dense_dropout': 0.2442765563514594, 'learning_rate': 0.00021843807279510306}. Best is trial 0 with value: 0.003747583949007094.
[I 2024-10-30 01:17:12,465] Trial 1 finished with value: 0.005353954387828708 and parameters: {'num_layers': 3, 'hidden_size': 8, 'lstm_dropout': 0.3809353769489302, 'dense_dropout': 0.04322386628640523, 'learning_rate': 0.0668430382733256}. Best is trial 0 with value: 0.003747583949007094.
[I 2024-10-30 01:17:46,750] Trial 2 finished with value: 0.009128424571827054 and parameters: {'num_layers': 1, 'hidden_size': 72, 'lstm_dropout': 0.22838095608471365, 'dense_dropout': 0.48932519515732886, 'learning_rate': 0.0010045778213617932}. Best is trial 0 with value: 0.003747583949007094.
[I 2024-10-30 01:18:31,458] Trial 3 finished with value: 0.006089309579692781 and parameters: {'num_layers': 3, 'h

Study statistics: 
  Number of finished trials:  100
  Number of pruned trials:  76
  Number of complete trials:  24
Best trial:
  Value:  0.0027137468568980694
  Params: 
    num_layers: 2
    hidden_size: 88
    lstm_dropout: 0.18130728360354695
    dense_dropout: 0.13452296363045613
    learning_rate: 0.004754276830086625
R2 for sim at Xunhua(1972-2018) 0.953830138844613
R2 for pred at Xunhua(2015-2019) 0.3561776205338002
