In [None]:
import numpy as np
import pandas as pd
import tensorflow.keras as keras
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, GRU, Flatten, TimeDistributed
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from pickle import dump
from pickle import load
from numpy import save
from numpy import load as loadBinary
import glob
import os
import json


In [None]:
def createSummaryFile(filename):
    """
    Create a summary file for the results of each gauge
        
    Parameters:
        filename, str:
            filename(path) of the summary file
    
    """

    Results = {}
    Results["Train"] = {}
    Results["Test"] = {}
    Results["Train"]["NSE"] = {}
    Results["Train"]["KGE"] = {}
    Results["Test"]["NSE"] = {}
    Results["Test"]["KGE"] = {}
    Results["Train"]["NSE"]["max"] = {}
    Results["Train"]["NSE"]["min"] = {}
    Results["Train"]["NSE"]["median"] = {}
    Results["Train"]["NSE"]["mean"] = {}
    Results["Train"]["KGE"]["max"] = {}
    Results["Train"]["KGE"]["min"] = {}
    Results["Train"]["KGE"]["median"] = {}
    Results["Train"]["KGE"]["mean"] = {}
    Results["Test"]["NSE"]["max"] = {}
    Results["Test"]["NSE"]["min"] = {}
    Results["Test"]["NSE"]["median"] = {}
    Results["Test"]["NSE"]["mean"] = {}
    Results["Test"]["KGE"]["max"] = {}
    Results["Test"]["KGE"]["min"] = {}
    Results["Test"]["KGE"]["median"] = {}
    Results["Test"]["KGE"]["mean"] = {}
    
    with open(filename, "w") as outfile:
        json.dump(Results, outfile)

In [None]:
def updateJSON(station_id, NSE_train, NSE_test, KGE_train, KGE_test, jsonFilePath):
    """
    Updates the summary file with results of a gauge
    
    Parameters:
        station_id, int:
            id of the gauge which results will be added to summary file
        NSE_train, list:
            NSE scores of train set
        NSE_test, list:
            NSE scores of test set
        KGE_train, list:
            KGE scores of train set
        KGE_test, list:
            KGE scores of test set
        jsonFilePath, str:
            path of the updated summary file
    
    """

    nse_train_max = np.max(NSE_train)
    nse_train_min = np.min(NSE_train)
    nse_train_median = np.median(NSE_train)
    nse_train_mean = np.mean(NSE_train)
    
    nse_test_max = np.max(NSE_test)
    nse_test_min = np.min(NSE_test)
    nse_test_median = np.median(NSE_test)
    nse_test_mean = np.mean(NSE_test)
    
    kge_train_max = np.max(KGE_train)
    kge_train_min = np.min(KGE_train)
    kge_train_median = np.median(KGE_train)
    kge_train_mean = np.mean(KGE_train)
    
    kge_test_max = np.max(KGE_test)
    kge_test_min = np.min(KGE_test)
    kge_test_median = np.median(KGE_test)
    kge_test_mean = np.mean(KGE_test)
    
    with open(jsonFilePath, "r") as jsonFile:
        Results = json.load(jsonFile)
    
    Results["Train"]["NSE"]["max"][station_id] = nse_train_max
    Results["Train"]["NSE"]["min"][station_id] = nse_train_min
    Results["Train"]["NSE"]["median"][station_id] = nse_train_median
    Results["Train"]["NSE"]["mean"][station_id] = nse_train_mean
    
    Results["Train"]["KGE"]["max"][station_id] = kge_train_max
    Results["Train"]["KGE"]["min"][station_id] = kge_train_min
    Results["Train"]["KGE"]["median"][station_id] = kge_train_median
    Results["Train"]["KGE"]["mean"][station_id] = kge_train_mean
    
    Results["Test"]["NSE"]["max"][station_id] = nse_test_max
    Results["Test"]["NSE"]["min"][station_id] = nse_test_min
    Results["Test"]["NSE"]["median"][station_id] = nse_test_median
    Results["Test"]["NSE"]["mean"][station_id] = nse_test_mean
    
    Results["Test"]["KGE"]["max"][station_id] = kge_test_max
    Results["Test"]["KGE"]["min"][station_id] = kge_test_min
    Results["Test"]["KGE"]["median"][station_id] = kge_test_median
    Results["Test"]["KGE"]["mean"][station_id] = kge_test_mean
    
    with open(jsonFilePath, "w") as jsonFile:
        json.dump(Results, jsonFile)

In [None]:
def Seg2Seg_main(station_id, resultDir, dataAsBinary):
    """
    Main function to get results of Seq2Seq model for given gauge
    
    Parameters:
        station_id, str:
            id of gauge which model will use its training and test data
        resultDir, str:
            directory to store the results
        dataAsBinary, str:
            path to dataset which contains test and training data for each gauge as binary
        
        
    
    """

    def nse(y_true, y_pred):
        """
        Calculate the NSE loss
        
        Parameters:
            y_pred, array:
                prediction results
            y_true, array:
                actual results
        """  

        return 1-np.sum((y_pred-y_true)**2)/np.sum((y_true-np.mean(y_true))**2)
    
    def kge(y_true, y_pred):
        """
        Calculate the KGE loss
        
        Parameters:
            y_pred, array:
                prediction results
            y_true, array:
                actual results
        """ 

        kge_r = np.corrcoef(y_true,y_pred)[1][0]
        kge_a = np.std(y_pred)/np.std(y_true)
        kge_b = np.mean(y_pred)/np.mean(y_true)
        return 1-np.sqrt((kge_r-1)**2+(kge_a-1)**2+(kge_b-1)**2)
    
    
    RESULT_PATH = resultDir
    os.mkdir(RESULT_PATH + station_id)
    testname = RESULT_PATH + station_id + '/' + station_id
    
    DATASET_PATH = dataAsBinary
    
    train_x = loadBinary(DATASET_PATH + station_id + "_train_x.npy")
    train_y = loadBinary(DATASET_PATH + station_id + "_train_y.npy")
    test_x = loadBinary(DATASET_PATH + station_id + "_test_x.npy")
    test_y = loadBinary(DATASET_PATH + station_id + "_test_y.npy")
    
    scaler_x = MinMaxScaler()
    scaler_y = MinMaxScaler()
    scaler_x.fit(train_x)
    scaler_y.fit(train_y)
    
    train_x_scaled = scaler_x.transform(train_x)
    train_y_scaled = scaler_y.transform(train_y)
    test_x_scaled = scaler_x.transform(test_x)

    
    x_train, x_valid, y_train, y_valid = train_test_split(train_x_scaled, train_y_scaled, test_size=0.2, shuffle= False)
    

    x_train_encoder = x_train[:,:72*3].reshape(-1, 72, 3)
    x_train_decoder = x_train[:,72*3:].reshape(-1, 120, 2)
    x_valid_encoder = x_valid[:,:72*3].reshape(-1, 72, 3)
    x_valid_decoder = x_valid[:,72*3:].reshape(-1, 120, 2)
    x_test_encoder = test_x_scaled[:,:72*3].reshape(-1, 72, 3)
    x_test_decoder = test_x_scaled[:,72*3:].reshape(-1, 120, 2)

    
    batch_size = 32
    lr = 0.0001
    epochs = 300
    dim_dense=[128, 64, 64, 32, 32]
    drop=0.20

    encoder_input = Input(shape=(72,3))
    encoder_1 = GRU(32, return_state=True, return_sequences=True)
    encoder_output1, encoder_hc1 = encoder_1(encoder_input)
    encoder_2 = GRU(32, return_state=True, return_sequences=True)
    encoder_output2, encoder_hc2 = encoder_2(encoder_output1)
    encoder_3 = GRU(32, return_state=True, return_sequences=True)
    encoder_output3, encoder_hc3 = encoder_3(encoder_output2)
    encoder_4 = GRU(32, return_state=True, return_sequences=True)
    encoder_output4, encoder_hc4 = encoder_4(encoder_output3)
    encoder_5 = GRU(32, return_state=True, return_sequences=True)
    encoder_output5, encoder_hc5 = encoder_5(encoder_output4)
 
    decoder_input = Input(shape=(120,2))
    decoder_1 = GRU(32, return_sequences=True)
    decoder_2 = GRU(32, return_sequences=True)
    decoder_3 = GRU(32, return_sequences=True)
    decoder_4 = GRU(32, return_sequences=True)
    decoder_5 = GRU(32, return_sequences=True)

    x = decoder_1(decoder_input, initial_state=encoder_hc1)
    x = decoder_2(x, initial_state=encoder_hc2)
    x = decoder_3(x, initial_state=encoder_hc3)
    x = decoder_4(x, initial_state=encoder_hc4)
    x = decoder_5(x, initial_state=encoder_hc5)
 

    for dim in dim_dense:
        x = TimeDistributed(Dense(dim, activation='relu'))(x)
        x = TimeDistributed(Dropout(drop))(x)
    main_out = TimeDistributed(Dense(1, activation='linear'))(x)
    main_out = Flatten()(main_out)
    model = Model(inputs=[encoder_input, decoder_input], outputs=main_out)
    
    
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2,
                      patience=10, cooldown=200, min_lr=1e-8)
    earlystoping = EarlyStopping(monitor='val_loss', min_delta=0,
                            patience=20, verbose=1, mode='auto')
    checkpoint = ModelCheckpoint(testname+'_model.h5', monitor='val_loss', verbose=1,
                    save_best_only=True, mode='min')
    RMSprop=keras.optimizers.RMSprop(lr=lr)
    
    model.compile(optimizer=RMSprop, loss='mse')
    model.summary()

   
    history = model.fit([x_train_encoder, x_train_decoder], y_train, epochs=epochs, batch_size=batch_size,
              validation_data=([x_valid_encoder, x_valid_decoder], y_valid), callbacks=[reduce_lr, earlystoping, checkpoint], verbose=1)

    
    loss1 = history.history['loss']
    loss2 = history.history['val_loss']
    loss1 = pd.DataFrame({'TrainLoss':loss1})
    loss2 = pd.DataFrame({'TestLoss':loss2})
    LossEpoches=pd.concat([loss1, loss2], axis=1)
    LossName = testname + '_loss.csv'
    LossEpoches.to_csv(LossName, index = True)
    
    
    model.load_weights(testname+'_model.h5')
    
    # Training is finished
    
    train_x = loadBinary(DATASET_PATH + station_id + "_train_x.npy")
    train_y = loadBinary(DATASET_PATH + station_id + "_train_y.npy")
    test_x = loadBinary(DATASET_PATH + station_id + "_test_x.npy")
    test_y = loadBinary(DATASET_PATH + station_id + "_test_y.npy")

    train_x = scaler_x.transform(train_x)
    test_x = scaler_x.transform(test_x)

    x_test_encoder = test_x[:,:72*3].reshape(-1, 72, 3)
    x_test_decoder = test_x[:,72*3:].reshape(-1, 120, 2)
    x_train_encoder = train_x[:,:72*3].reshape(-1, 72, 3)
    x_train_decoder = train_x[:,72*3:].reshape(-1, 120, 2)

    y_model_scaled_test = model.predict([x_test_encoder,x_test_decoder])
    y_model_test = scaler_y.inverse_transform(y_model_scaled_test)

    y_model_scaled_train = model.predict([x_train_encoder,x_train_decoder])
    y_model_train = scaler_y.inverse_transform(y_model_scaled_train)

    NSEs_train=[]
    KGEs_train=[]
    NSEs_test=[]
    KGEs_test=[]
    for x in range(0, 120):
        y_pred_test = y_model_test[:,x]
        y_True_test = test_y[:,x]

        y_pred_train = y_model_train[:,x]
        y_True_train = train_y[:,x]

        NSEs_test.append(nse(y_True_test, y_pred_test))
        KGEs_test.append(kge(y_True_test, y_pred_test))
        NSEs_train.append(nse(y_True_train, y_pred_train))
        KGEs_train.append(kge(y_True_train, y_pred_train))

    updateJSON(station_id, NSEs_train, NSEs_test, KGEs_train, KGEs_test, "summary_file_path")
    NSEs_test=pd.DataFrame(NSEs_test)
    NSEs_test.columns = ['NSE_Test']
    KGEs_test=pd.DataFrame(KGEs_test)
    KGEs_test.columns = ['KGE_Test']
    NSEs_train=pd.DataFrame(NSEs_train)
    NSEs_train.columns = ['NSE_Train']
    KGEs_train=pd.DataFrame(KGEs_train)
    KGEs_train.columns = ['KGE_Train']
    
    eva = pd.concat([NSEs_test, KGEs_test, NSEs_train, KGEs_train], axis=1)
    name_eva = testname + '_eva.csv'
    eva.to_csv(name_eva, index = True)

In [None]:
dataAsBinary = binaryData # unzip the files in binaryData directory and gave as dataset path

In [None]:
l = glob.glob(dataAsBinary +'*')
sensors = [i.split("_")[0] for i in l]
sensors = list(set(sensors))

In [None]:
createSummaryFile("summary_file_path")

In [None]:
for sensor in sensors:
    Seg2Seg_main(sensor, resultDir, dataAsBinary)