In [47]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from MultiOutXGBRegressor import OptunaTuneMultioutXGBRegressorCV,FitFinalMultioutXGBRegressor,predict,save_model,load_model
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
pio.templates.default = 'plotly_white'
plot_template = dict(
    layout=go.Layout({
        'font_size': 8,
        'xaxis_title_font_size': 8,
        'yaxis_title_font_size': 8,
        }   
))
hydro_stations = [
    'Tangnaihai',
    'Guide',
    'Xunhua'
]
nfolds = 5
n_trials = 100 #100
num_boost_round = 10 #100
n_warmup_steps = 10


1. 基于流域加权平均气象数据、集水区水量平衡数据、集水区流量模拟数据训练XGBoost模型；
2. 样本数据进行多重共线性检验；
3. 采用在线训练方式，模型参数随着年份更新。


In [48]:
name = 'InputOutputSamples_hydrosimyr_swatpsim'
pred_years = np.arange(2015,2020)
start_year = 1973
end_year = 2020
sample_path = f'../samples_mete_wb_vif/{name}/'
for hydro_station in hydro_stations:
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    result_path = f'../result_mete_wb_vif/XGboostPredResult/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date']) 
    full_df = pd.DataFrame(
            index = pd.date_range(f'{start_year}-01-01',f'2019-12-31',freq='MS'),
            columns=['obs','pred'],
    )
    full_df.index.name = 'date'
    full_feature_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete_swatpsim/{hydro_station}_meteavg_vif_feature_samples.csv',index_col=['date'],parse_dates=['date'])
    full_target_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete_swatpsim/{hydro_station}_meteavg_vif_target_samples.csv',index_col=['date'],parse_dates=['date'])
            
    for pred_year in pred_years:
        df = pd.DataFrame(
            index=pd.date_range(f'{start_year}-01-01',f'{pred_year}-12-31',freq='MS'),
            columns=['obs','pred']
        )
        df.index.name='date'
        
        if not os.path.exists(model_path+f'model_{pred_year}.json'):
            feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
            target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])

            
            feature_samples = feature_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用
            target_samples = target_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用

            best_params = OptunaTuneMultioutXGBRegressorCV(
                train_X=feature_samples,
                train_Y=target_samples,
                nfolds=nfolds,
                n_trials=n_trials,#200
                num_boost_round=num_boost_round,#100
                n_warmup_steps=n_warmup_steps
            )   
            print(best_params)
            model = FitFinalMultioutXGBRegressor(feature_samples,target_samples,best_params,100)
            calval_pred = predict(model,feature_samples)
            
            
            for sim_year in range(start_year,pred_year):
                features = feature_samples.loc[feature_samples.index.month == 1].loc[f'{sim_year}']
                year_feature_samples = pd.DataFrame(
                    features.values,
                    index=[f'{sim_year}-01-01'],
                    columns=feature_samples.columns
                ) #预测年份数据暂时不用
                year_feature_samples.index.name='date'
                year_pred = predict(model,year_feature_samples)
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
           
            df.to_csv(result_path+f'XGBoost_sim_{1973}_{pred_year}.csv')

            save_model(model,model_path+f'model_{pred_year}.json')

        else:
            model = load_model(model_path+f'model_{pred_year}.json')
            print(f'model_{pred_year}.json loaded')
            if pred_year == 2015:
                for sim_year in range(start_year,end_year):
                    full_features = full_feature_samples.loc[full_feature_samples.index.month == 1].loc[f'{sim_year}']
                    print(full_features)
                    year_full_feature_samples = pd.DataFrame(
                        full_features.values,
                        index=[f'{sim_year}-01-01'],
                        columns=full_feature_samples.columns
                    ) #预测年份数据暂时不用
                    year_full_feature_samples.index.name='date'
                    year_pred = predict(model,year_full_feature_samples)
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
                full_df.to_csv(result_path+f'{hydro_station}_full_sim_1973_2019.csv')

                fig = go.Figure()
                fig.add_scatter(x=full_df.index, y=full_df['obs'], name='ObsFlow', line=dict(dash='solid'))
                fig.add_scatter(x=full_df.index, y=full_df['pred'], name='XGBoost', line=dict(dash='dash'))
                fig.show()


    #! Perform Predictions
    full_pred = pd.DataFrame(index=pd.date_range(f'{pred_years[0]}-01-01',f'{pred_years[-1]}-12-31',freq='MS'),columns=['obs','pred']) 
    full_pred.index.name = 'date'
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date'])  
    for pred_year in pred_years:
        model = load_model(model_path+f'model_{pred_year}.json')
        feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        
        feature_samples = pd.DataFrame(
            feature_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[feature_samples.index[feature_samples.index.shape[0]-1]],
            columns=feature_samples.columns) #预测年份数据暂时不用
        target_samples = pd.DataFrame(
            target_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[target_samples.index[target_samples.index.shape[0]-1]],
            columns=target_samples.columns) #预测年份数据暂时不用
        test_pred = predict(model,feature_samples)
               
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','obs'] = obs_flow.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','flow(m^3/s)'].values
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','pred'] = test_pred[0]

    print('XGBoost',r2_score(full_pred['obs'],full_pred['pred']))
    full_pred.to_csv(result_path+f'XGBoost_pred_{pred_years[0]}_{pred_years[-1]}.csv')

    # Visualize the results using plotly
    fig = go.Figure()
    fig.add_scatter(x=full_pred.index,y=full_pred['obs'],name='ObsFlow',line=dict(dash='solid'))
    fig.add_scatter(x=full_pred.index,y=full_pred['pred'],name='XGBoost',line=dict(dash='dash'))
    fig.show()



model_2015.json loaded
            P2020(mm)_t-12  P2020(mm)_t-11  P2020(mm)_t-10  P2020(mm)_t-9  \
date                                                                        
1973-01-01        0.636387         9.52351        5.880554      21.773792   

            P2020(mm)_t-8  P2020(mm)_t-7  P2020(mm)_t-6  P2020(mm)_t-5  \
date                                                                     
1973-01-01      63.193324      70.178479     145.505265      68.498457   

            P2020(mm)_t-4  P2020(mm)_t-3  ...  SWATPlusSimFlow_t+3  \
date                                      ...                        
1973-01-01      53.863039      15.348229  ...                247.4   

            SWATPlusSimFlow_t+4  SWATPlusSimFlow_t+5  SWATPlusSimFlow_t+6  \
date                                                                        
1973-01-01                261.1                682.2                913.3   

            SWATPlusSimFlow_t+7  SWATPlusSimFlow_t+8  SWATPlusSimFlow_t+9  \
da

model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
XGBoost 0.39388964688040373


model_2015.json loaded
            P2020(mm)_t-12  P2020(mm)_t-11  P2020(mm)_t-10  P2020(mm)_t-9  \
date                                                                        
1973-01-01        0.549118        8.645502        5.254902      22.290889   

            P2020(mm)_t-8  P2020(mm)_t-7  P2020(mm)_t-6  P2020(mm)_t-5  \
date                                                                     
1973-01-01      63.533611      66.124014     136.676488      70.302157   

            P2020(mm)_t-4  P2020(mm)_t-3  ...  SWATPlusSimFlow_t+3  \
date                                      ...                        
1973-01-01      48.600528       13.32386  ...                274.6   

            SWATPlusSimFlow_t+4  SWATPlusSimFlow_t+5  SWATPlusSimFlow_t+6  \
date                                                                        
1973-01-01                285.3                617.1                906.1   

            SWATPlusSimFlow_t+7  SWATPlusSimFlow_t+8  SWATPlusSimFlow_t+9  \
da

model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
XGBoost 0.25557779783207246


model_2015.json loaded
            P2020(mm)_t-12  P2020(mm)_t-11  P2020(mm)_t-10  P2020(mm)_t-9  \
date                                                                        
1973-01-01         0.52228        8.634969        5.209209       22.52349   

            P2020(mm)_t-8  P2020(mm)_t-7  P2020(mm)_t-6  P2020(mm)_t-5  \
date                                                                     
1973-01-01      63.550052      64.797354     132.720728      69.830409   

            P2020(mm)_t-4  P2020(mm)_t-3  ...  SWATPlusSimFlow_t+3  \
date                                      ...                        
1973-01-01      46.654794       12.89455  ...                212.2   

            SWATPlusSimFlow_t+4  SWATPlusSimFlow_t+5  SWATPlusSimFlow_t+6  \
date                                                                        
1973-01-01                249.7                504.4                653.3   

            SWATPlusSimFlow_t+7  SWATPlusSimFlow_t+8  SWATPlusSimFlow_t+9  \
da

model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
XGBoost 0.3169536182515619


In [49]:
name = 'InputOutputSamples_hydrosimyr'
pred_years = np.arange(2015,2020)
start_year = 1973
sample_path = f'../samples_mete_wb_vif/{name}/'
for hydro_station in hydro_stations:
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    result_path = f'../result_mete_wb_vif/XGboostPredResult/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date']) 
    full_df = pd.DataFrame(
            index = pd.date_range(f'{start_year}-01-01',f'2019-12-31',freq='MS'),
            columns=['obs','pred'],
        )
    full_df.index.name = 'date'
    full_feature_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete/{hydro_station}_meteavg_vif_feature_samples.csv',index_col=['date'],parse_dates=['date'])
    full_target_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete/{hydro_station}_meteavg_vif_target_samples.csv',index_col=['date'],parse_dates=['date'])
             
    for pred_year in pred_years:
        df = pd.DataFrame(
            index=pd.date_range(f'{start_year}-01-01',f'{pred_year}-12-31',freq='MS'),
            columns=['obs','pred']
        )
        df.index.name='date'
        
        if not os.path.exists(model_path+f'model_{pred_year}.json'):
            feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
            target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])

               
            feature_samples = feature_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用
            target_samples = target_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用

            best_params = OptunaTuneMultioutXGBRegressorCV(
                train_X=feature_samples,
                train_Y=target_samples,
                nfolds=nfolds,
                n_trials=n_trials,#200
                num_boost_round=num_boost_round,#100
                n_warmup_steps=n_warmup_steps
            )   
            print(best_params)
            model = FitFinalMultioutXGBRegressor(feature_samples,target_samples,best_params,100)
            calval_pred = predict(model,feature_samples)
            for sim_year in range(start_year,pred_year):
                features = feature_samples.loc[feature_samples.index.month == 1].loc[f'{sim_year}']
                year_feature_samples = pd.DataFrame(
                    features.values,
                    index=[f'{sim_year}-01-01'],
                    columns=feature_samples.columns
                ) #预测年份数据暂时不用
                year_feature_samples.index.name='date'
                year_pred = predict(model,year_feature_samples)
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
           
            df.to_csv(result_path+f'XGBoost_sim_{1973}_{pred_year}.csv')

            save_model(model,model_path+f'model_{pred_year}.json')

        else:
            model = load_model(model_path+f'model_{pred_year}.json')
            print(f'model_{pred_year}.json loaded')
            if pred_year == 2015:
                for sim_year in range(start_year,end_year):
                    full_features = full_feature_samples.loc[full_feature_samples.index.month == 1].loc[f'{sim_year}']
                    year_full_feature_samples = pd.DataFrame(
                        full_features.values,
                        index=[f'{sim_year}-01-01'],
                        columns=full_feature_samples.columns
                    ) #预测年份数据暂时不用
                    year_full_feature_samples.index.name='date'
                    year_pred = predict(model,year_full_feature_samples)
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
                full_df.to_csv(result_path+f'{hydro_station}_full_sim_1973_2019.csv')
                fig = go.Figure()
                fig.add_scatter(x=full_df.index, y=full_df['obs'], name='ObsFlow', line=dict(dash='solid'))
                fig.add_scatter(x=full_df.index, y=full_df['pred'], name='XGBoost', line=dict(dash='dash'))
                fig.show()


    #! Perform Prediction
    full_pred = pd.DataFrame(index=pd.date_range(f'{pred_years[0]}-01-01',f'{pred_years[-1]}-12-31',freq='MS'),columns=['obs','pred']) 
    full_pred.index.name = 'date'
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date'])  
    for pred_year in pred_years:
        model = load_model(model_path+f'model_{pred_year}.json')
        feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        
        feature_samples = pd.DataFrame(
            feature_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[feature_samples.index[feature_samples.index.shape[0]-1]],
            columns=feature_samples.columns) #预测年份数据暂时不用
        target_samples = pd.DataFrame(
            target_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[target_samples.index[target_samples.index.shape[0]-1]],
            columns=target_samples.columns) #预测年份数据暂时不用
        test_pred = predict(model,feature_samples)

        print(test_pred)
        
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','obs'] = obs_flow.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','flow(m^3/s)'].values
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','pred'] = test_pred[0]

    print('XGBoost',r2_score(full_pred['obs'],full_pred['pred']))
    full_pred.to_csv(result_path+f'XGBoost_pred_{pred_years[0]}_{pred_years[-1]}.csv')

    # Visualize the results using plotly
    fig = go.Figure()
    fig.add_scatter(x=full_pred.index,y=full_pred['obs'],name='ObsFlow',line=dict(dash='solid'))
    fig.add_scatter(x=full_pred.index,y=full_pred['pred'],name='XGBoost',line=dict(dash='dash'))
    fig.show()



model_2015.json loaded


model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 210.4122   169.48082  228.3522   315.6712   646.9399  1157.1306
  2370.012   1427.841   1510.1278  1323.4537   717.88403  330.47174]]
[[122.20254 163.96979 212.36336 297.331   584.02155 896.1032  609.5335
  826.06036 671.78094 543.9472  368.7396  198.03854]]
[[ 141.84029  148.14644  200.701    257.52957  298.37192  906.3137
  1139.8636   524.8477   591.601    613.03485  227.38762  132.78731]]
[[ 172.25438  177.50282  212.59776  306.35864  530.8306   806.57684
  1530.1337  1258.2025   746.00085  764.87756  415.96567  204.98372]]
[[ 234.73723  235.03223  240.9775   414.7019   690.0063  1123.0197
  1496.595   1494.142   1628.9248   812.47736  534.25415  214.04193]]
XGBoost 0.4490881736495209


model_2015.json loaded


model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 229.74945  200.7356   252.82457  328.6143   632.65045 1334.9249
  2438.6018  1501.2476  1667.0845  1348.5967   849.48914  327.43054]]
[[162.84377 184.86172 215.07463 375.74207 509.7405  763.55536 714.7025
  974.9052  795.6642  729.845   419.097   247.7519 ]]
[[185.94519 229.85172 250.87881 263.281   441.67084 828.31866 816.4129
  478.13895 489.45792 516.43134 274.4702  172.29819]]
[[ 217.46875  214.64844  262.757    275.9929   479.561    826.71466
  2046.5244  1182.71     774.2885   576.1722   470.98926  188.44417]]
[[ 285.26538  256.98044  313.0531   318.44952  456.84186 1078.9602
  2049.4282   867.4869   888.77423  702.31964  520.9778   279.25772]]
XGBoost 0.33062138399777663


model_2015.json loaded


model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 222.71843  224.22478  226.99171  305.37497  582.18756 1257.1687
  2180.9333  1525.0868  1363.2528  1915.1931   892.4534   291.2775 ]]
[[ 182.3803   184.62411  190.81639  367.5469   376.9958   708.44257
  1790.48    1175.2733  1266.1818  1564.4579   553.2769   254.68922]]
[[176.88548 157.76909 185.22763 241.2088  407.72775 759.9854  847.8478
  653.968   604.0336  636.55695 293.9679  194.91626]]
[[ 229.57959  214.35266  227.2816   284.60825  501.36563  931.67035
  1867.4989  1108.9414   686.502    797.79645  496.91635  251.20552]]
[[ 252.93887  254.52107  273.65265  315.91592  434.05426 1058.8219
  1988.3027  1088.1586   879.5733   848.2652   515.1176   254.62039]]
XGBoost 0.2766899821257839


In [50]:
name = 'InputOutputSamples_metesimyr_swatpsim'
pred_years = np.arange(2015,2020)
start_year = 1973
sample_path = f'../samples_mete_wb_vif/{name}/'
for hydro_station in hydro_stations:
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    result_path = f'../result_mete_wb_vif/XGboostPredResult/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date']) 
    full_df = pd.DataFrame(
            index = pd.date_range(f'{start_year}-01-01',f'2019-12-31',freq='MS'),
            columns=['obs','pred'],
    )
    full_df.index.name = 'date'
    full_feature_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete_swatpsim/{hydro_station}_meteavg_vif_feature_samples.csv',index_col=['date'],parse_dates=['date'])
    full_target_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete_swatpsim/{hydro_station}_meteavg_vif_target_samples.csv',index_col=['date'],parse_dates=['date'])
    for pred_year in pred_years:
        df = pd.DataFrame(
            index=pd.date_range(f'{start_year}-01-01',f'{pred_year}-12-31',freq='MS'),
            columns=['obs','pred']
        )
        df.index.name='date'
        if not os.path.exists(model_path+f'model_{pred_year}.json'):
            feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
            target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])

            feature_samples = feature_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用
            target_samples = target_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用

            best_params = OptunaTuneMultioutXGBRegressorCV(
                train_X=feature_samples,
                train_Y=target_samples,
                nfolds=nfolds,
                n_trials=n_trials,#200
                num_boost_round=num_boost_round,#100
                n_warmup_steps=n_warmup_steps
            )   
            print(best_params)
            model = FitFinalMultioutXGBRegressor(feature_samples,target_samples,best_params,100)
            calval_pred = predict(model,feature_samples)
               
            for sim_year in range(start_year,pred_year):
                features = feature_samples.loc[feature_samples.index.month == 1].loc[f'{sim_year}']
                year_feature_samples = pd.DataFrame(
                    features.values,
                    index=[f'{sim_year}-01-01'],
                    columns=feature_samples.columns
                ) #预测年份数据暂时不用
                year_feature_samples.index.name='date'
                year_pred = predict(model,year_feature_samples)
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
           
            df.to_csv(result_path+f'XGBoost_sim_{1973}_{pred_year}.csv')

            save_model(model,model_path+f'model_{pred_year}.json')

        else:
            model = load_model(model_path+f'model_{pred_year}.json')
            print(f'model_{pred_year}.json loaded')
            if pred_year == 2015:
                for sim_year in range(start_year,end_year):
                    full_features = full_feature_samples.loc[full_feature_samples.index.month == 1].loc[f'{sim_year}']
                    year_full_feature_samples = pd.DataFrame(
                        full_features.values,
                        index=[f'{sim_year}-01-01'],
                        columns=full_feature_samples.columns
                    ) #预测年份数据暂时不用
                    year_full_feature_samples.index.name='date'
                    year_pred = predict(model,year_full_feature_samples)
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
                full_df.to_csv(result_path+f'{hydro_station}_full_sim_1973_2019.csv')
                fig = go.Figure()
                fig.add_scatter(x=full_df.index, y=full_df['obs'], name='ObsFlow', line=dict(dash='solid'))
                fig.add_scatter(x=full_df.index, y=full_df['pred'], name='XGBoost', line=dict(dash='dash'))
                fig.show()


    #! Perform Prediction
    full_pred = pd.DataFrame(index=pd.date_range(f'{pred_years[0]}-01-01',f'{pred_years[-1]}-12-31',freq='MS'),columns=['obs','pred']) 
    full_pred.index.name = 'date'
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date'])  
    for pred_year in pred_years:
        model = load_model(model_path+f'model_{pred_year}.json')
        feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        
        feature_samples = pd.DataFrame(
            feature_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[feature_samples.index[feature_samples.index.shape[0]-1]],
            columns=feature_samples.columns) #预测年份数据暂时不用
        target_samples = pd.DataFrame(
            target_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[target_samples.index[target_samples.index.shape[0]-1]],
            columns=target_samples.columns) #预测年份数据暂时不用
        test_pred = predict(model,feature_samples)

        print(test_pred)
        
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','obs'] = obs_flow.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','flow(m^3/s)'].values
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','pred'] = test_pred[0]

    print('XGBoost',r2_score(full_pred['obs'],full_pred['pred']))
    full_pred.to_csv(result_path+f'XGBoost_pred_{pred_years[0]}_{pred_years[-1]}.csv')

    # Visualize the results using plotly
    fig = go.Figure()
    fig.add_scatter(x=full_pred.index,y=full_pred['obs'],name='ObsFlow',line=dict(dash='solid'))
    fig.add_scatter(x=full_pred.index,y=full_pred['pred'],name='XGBoost',line=dict(dash='dash'))
    fig.show()

model_2015.json loaded


model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 168.89464  159.29681  249.48294  418.85504  415.3231   857.6352
  1566.5348   943.95557 1622.3464   699.5424   752.0703   343.8677 ]]
[[ 199.08087  151.36784  232.33078  320.0658   619.9583   677.4002
  1408.2545  1103.686    903.185    738.57794  428.691    225.81863]]
[[200.1614  197.50993 182.36653 300.46136 526.35724 619.9899  865.74756
  910.9341  728.1502  862.7851  416.2852  239.52307]]
[[ 159.66795  154.98827  218.36865  301.748    569.05023  725.96893
   963.18134 1083.9017  1226.9213   936.048    451.45053  220.61275]]
[[ 144.99402  192.54175  191.14575  260.38763  528.5151   702.9545
  1079.6299  1401.4856  1102.6674  1026.0718   524.6834   210.96257]]
XGBoost 0.4732724361390561


model_2015.json loaded


model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 186.1849   201.094    297.5477   396.14224  430.5124  1003.5264
  1801.2072  1754.6351  1362.2665  1117.2009   639.86163  292.81055]]
[[ 182.4817   188.34793  255.84605  346.0986   689.8925   586.4263
   868.31824 1021.7057   801.4107   927.54474  448.84637  232.88124]]
[[193.3211  200.82712 229.29872 257.2014  312.44916 606.4867  609.27057
  532.62103 885.7499  699.5273  398.3254  199.59679]]
[[ 174.93025  184.49925  244.28214  322.19183  490.435    762.60455
  1068.5421  1109.3931   868.90247 1236.6682   486.4217   279.35095]]
[[ 273.943    243.70653  280.31995  385.3383   535.0653   756.6225
  1330.5607   748.54333  980.73224 1142.7405   471.12463  283.0897 ]]
XGBoost 0.4190400379728568


model_2015.json loaded


model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 196.35335  209.84816  226.18433  362.2157   559.75885  870.2161
  1839.1467  1687.1434  1548.8284  1219.3401   682.52997  235.88535]]
[[ 165.59927  211.17064  206.3732   316.68677  581.5046   764.212
   932.29974 1043.9036   807.7684   952.9027   505.9182   267.68317]]
[[ 121.11505  163.8547   214.27422  341.05948  585.27423  488.13782
  1274.0822   506.59164  952.8722   796.5634   591.9077   167.56483]]
[[ 244.07076  210.36961  195.10117  231.71193  452.51218  582.1325
  1409.0299  1343.9407   791.76105  961.1491   574.65125  303.27478]]
[[ 191.58162  210.97551  229.3028   377.01163  396.73807  913.24536
  1194.1254  1087.4618  1128.286   1024.3907   497.23026  310.25723]]
XGBoost 0.428766615073758


In [52]:
name = 'InputOutputSamples_metesimyr'
pred_years = np.arange(2015,2020)
start_year = 1973
sample_path = f'../samples_mete_wb_vif/{name}/'
for hydro_station in hydro_stations:
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    if not os.path.exists(model_path):
        os.makedirs(model_path)
    result_path = f'../result_mete_wb_vif/XGboostPredResult/{hydro_station}/{name}/'
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date']) 
    full_df = pd.DataFrame(
            index = pd.date_range(f'{start_year}-01-01',f'2019-12-31',freq='MS'),
            columns=['obs','pred'],
        )
    full_df.index.name = 'date'
    full_feature_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete/{hydro_station}_meteavg_vif_feature_samples.csv',index_col=['date'],parse_dates=['date'])
    full_target_samples = pd.read_csv(f'../samples_mete_wb_vif/InputOutputSamples_hismete/{hydro_station}_meteavg_vif_target_samples.csv',index_col=['date'],parse_dates=['date'])
    
    for pred_year in pred_years:
        df = pd.DataFrame(
            index=pd.date_range(f'{start_year}-01-01',f'{pred_year}-12-31',freq='MS'),
            columns=['obs','pred']
        )
        df.index.name='date'
        
        if not os.path.exists(model_path+f'model_{pred_year}.json'):
            feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
            target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
            
            feature_samples = feature_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用
            target_samples = target_samples.loc[:f'{pred_year-1}-12-31'] #预测年份数据暂时不用

            best_params = OptunaTuneMultioutXGBRegressorCV(
                train_X=feature_samples,
                train_Y=target_samples,
                nfolds=nfolds,
                n_trials=n_trials,#200
                num_boost_round=num_boost_round,#100
                n_warmup_steps=n_warmup_steps
            )   
            print(best_params)
            model = FitFinalMultioutXGBRegressor(feature_samples,target_samples,best_params,100)
            calval_pred = predict(model,feature_samples)
            
            for sim_year in range(start_year,pred_year):
                features = feature_samples.loc[feature_samples.index.month == 1].loc[f'{sim_year}']
                year_feature_samples = pd.DataFrame(
                    features.values,
                    index=[f'{sim_year}-01-01'],
                    columns=feature_samples.columns
                ) #预测年份数据暂时不用
                year_feature_samples.index.name='date'
                year_pred = predict(model,year_feature_samples)
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
           
            df.to_csv(result_path+f'XGBoost_sim_{1973}_{pred_year}.csv')

            save_model(model,model_path+f'model_{pred_year}.json')

        else:
            model = load_model(model_path+f'model_{pred_year}.json')
            print(f'model_{pred_year}.json loaded')
            if pred_year == 2015:
                for sim_year in range(start_year,end_year):
                    full_features = full_feature_samples.loc[full_feature_samples.index.month == 1].loc[f'{sim_year}']
                    print(full_features)
                    year_full_feature_samples = pd.DataFrame(
                        full_features.values,
                        index=[f'{sim_year}-01-01'],
                        columns=full_feature_samples.columns
                    ) #预测年份数据暂时不用
                    year_full_feature_samples.index.name='date'
                    year_pred = predict(model,year_full_feature_samples)
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','pred'] = year_pred[0]
                    full_df.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','obs'] = obs_flow.loc[f'{sim_year}-01-01':f'{sim_year}-12-31','flow(m^3/s)'].values
                full_df.to_csv(result_path+f'{hydro_station}_full_sim_1973_2019.csv')
                fig = go.Figure()
                fig.add_scatter(x=full_df.index, y=full_df['obs'], name='ObsFlow', line=dict(dash='solid'))
                fig.add_scatter(x=full_df.index, y=full_df['pred'], name='XGBoost', line=dict(dash='dash'))
                fig.show()

    #! Perform Prediction    
    full_pred = pd.DataFrame(index=pd.date_range(f'{pred_years[0]}-01-01',f'{pred_years[-1]}-12-31',freq='MS'),columns=['obs','pred']) 
    full_pred.index.name = 'date'
    model_path = f'../model_mete_wb_vif/12MonthlyFlowProcessForecast_{hydro_station.lower()}/MultiXGBRegressor/{name}/'
    obs_flow = pd.read_csv(f'../data/{hydro_station.lower()}_natural_monthly_flow.csv', index_col=['date'], parse_dates=['date'])  
    for pred_year in pred_years:
        model = load_model(model_path+f'model_{pred_year}.json')
        feature_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_feature_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        target_samples = pd.read_csv(sample_path+f'{hydro_station}_meteavg_vif_target_samples_pred{pred_year}.csv',index_col=['date'],parse_dates=['date'])
        
        feature_samples = pd.DataFrame(
            feature_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[feature_samples.index[feature_samples.index.shape[0]-1]],
            columns=feature_samples.columns) #预测年份数据暂时不用
        target_samples = pd.DataFrame(
            target_samples[f'{pred_year}-01-01':f'{pred_year}-12-31'].values,
            index=[target_samples.index[target_samples.index.shape[0]-1]],
            columns=target_samples.columns) #预测年份数据暂时不用
        test_pred = predict(model,feature_samples)

        print(test_pred)
        
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','obs'] = obs_flow.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','flow(m^3/s)'].values
        full_pred.loc[f'{pred_year}-01-01':f'{pred_year}-12-31','pred'] = test_pred[0]

    print('XGBoost',r2_score(full_pred['obs'],full_pred['pred']))
    full_pred.to_csv(result_path+f'XGBoost_pred_{pred_years[0]}_{pred_years[-1]}.csv')

    # Visualize the results using plotly
    fig = go.Figure()
    fig.add_scatter(x=full_pred.index,y=full_pred['obs'],name='ObsFlow',line=dict(dash='solid'))
    fig.add_scatter(x=full_pred.index,y=full_pred['pred'],name='XGBoost',line=dict(dash='dash'))
    fig.show()

model_2015.json loaded
            P2020(mm)_t-12  P2020(mm)_t-11  P2020(mm)_t-10  P2020(mm)_t-9  \
date                                                                        
1973-01-01        0.636387         9.52351        5.880554      21.773792   

            P2020(mm)_t-8  P2020(mm)_t-7  P2020(mm)_t-6  P2020(mm)_t-5  \
date                                                                     
1973-01-01      63.193324      70.178479     145.505265      68.498457   

            P2020(mm)_t-4  P2020(mm)_t-3  ...  MIN-TEM(C)_t+3  MIN-TEM(C)_t+4  \
date                                      ...                                   
1973-01-01      53.863039      15.348229  ...       -10.68731       -6.808016   

            MIN-TEM(C)_t+5  MIN-TEM(C)_t+6  MIN-TEM(C)_t+7  MIN-TEM(C)_t+8  \
date                                                                         
1973-01-01       -1.355897         0.83586        2.226753        3.390877   

            MIN-TEM(C)_t+9  MIN-TEM(C)_t+10

model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 190.00575  165.33986  232.64835  414.1761   622.60065 1095.3275
  1744.2863   928.57916 1216.7671   836.55853  532.5023   211.83058]]
[[ 176.25677  171.93622  227.41277  358.75848  514.05237  618.16296
  1005.8085  1073.6312   932.1319   570.4146   514.9583   254.06993]]
[[175.4645  153.75218 192.665   346.4041  393.77136 811.9066  784.99115
  911.5044  970.17755 848.2177  530.3222  243.72403]]
[[ 198.23961  190.88844  212.57304  281.5537   425.24136  853.24744
  1050.477    686.29663  983.225   1089.8884   446.4008   223.69403]]
[[ 235.26988  217.4747   241.09383  424.3968   474.39975  930.3808
  1345.6613   813.70905  947.88074  984.26746  494.5098   290.67603]]
XGBoost 0.5248430764756788


model_2015.json loaded
            P2020(mm)_t-12  P2020(mm)_t-11  P2020(mm)_t-10  P2020(mm)_t-9  \
date                                                                        
1973-01-01        0.549118        8.645502        5.254902      22.290889   

            P2020(mm)_t-8  P2020(mm)_t-7  P2020(mm)_t-6  P2020(mm)_t-5  \
date                                                                     
1973-01-01      63.533611      66.124014     136.676488      70.302157   

            P2020(mm)_t-4  P2020(mm)_t-3  ...  MIN-TEM(C)_t+3  MIN-TEM(C)_t+4  \
date                                      ...                                   
1973-01-01      48.600528       13.32386  ...       -9.935045       -5.924707   

            MIN-TEM(C)_t+5  MIN-TEM(C)_t+6  MIN-TEM(C)_t+7  MIN-TEM(C)_t+8  \
date                                                                         
1973-01-01       -0.569667        1.601684         3.10022         4.22274   

            MIN-TEM(C)_t+9  MIN-TEM(C)_t+10

model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 188.91255  194.56648  259.5791   414.93997  567.1488   935.551
  1717.0708  1312.5004  1399.3259  1155.6165   556.08307  308.76862]]
[[ 154.78639  168.10669  207.71292  336.59018  498.03876  581.2555
   959.33014 1010.93756 1000.5183   721.9428   464.28787  211.06493]]
[[ 192.29205  166.79672  247.40552  251.60909  445.79434  914.8969
   976.4991   908.3761  1111.3762  1142.1398   502.6172   231.28503]]
[[ 181.90828  191.12177  267.17355  346.69382  415.97488  811.9159
  1015.24554  786.4489   957.84686 1069.971    551.0657   288.3548 ]]
[[ 222.3075   242.08876  290.56445  392.104    429.62137  722.59717
  1096.9646   982.42084  867.5451   996.4778   584.8973   270.4955 ]]
XGBoost 0.4621802136797468


model_2015.json loaded
            P2020(mm)_t-12  P2020(mm)_t-11  P2020(mm)_t-10  P2020(mm)_t-9  \
date                                                                        
1973-01-01         0.52228        8.634969        5.209209       22.52349   

            P2020(mm)_t-8  P2020(mm)_t-7  P2020(mm)_t-6  P2020(mm)_t-5  \
date                                                                     
1973-01-01      63.550052      64.797354     132.720728      69.830409   

            P2020(mm)_t-4  P2020(mm)_t-3  ...  MIN-TEM(C)_t+3  MIN-TEM(C)_t+4  \
date                                      ...                                   
1973-01-01      46.654794       12.89455  ...       -9.625105       -5.537062   

            MIN-TEM(C)_t+5  MIN-TEM(C)_t+6  MIN-TEM(C)_t+7  MIN-TEM(C)_t+8  \
date                                                                         
1973-01-01        -0.26576        1.894688        3.393931        4.515666   

            MIN-TEM(C)_t+9  MIN-TEM(C)_t+10

model_2016.json loaded
model_2017.json loaded
model_2018.json loaded
model_2019.json loaded
[[ 216.12747  190.12442  221.6327   349.71616  494.3899   818.8899
  1994.1177  1125.6342  1366.2532  1381.9166   607.73315  279.3529 ]]
[[ 154.88484  156.5547   193.58458  320.58908  343.45792  932.1194
   935.1117  1230.7454  1128.5189  1008.2553   454.27713  222.58669]]
[[ 186.58464  164.92953  203.71652  225.34116  459.5373   716.4656
   952.5571   675.7268  1412.1553  1003.8501   580.0605   274.19568]]
[[ 190.16823  190.6332   222.81725  258.03152  310.77917  672.0651
  1070.2012   998.2085  1116.2499   906.29407  586.6862   274.89725]]
[[ 239.85965  224.79953  262.3608   409.23004  496.44043  724.13776
  1218.5896  1218.9163   976.13947 1163.8066   532.98663  244.8082 ]]
XGBoost 0.48586357013135273
