In [39]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Aim

## Imports
Main imports

In [40]:
import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np
from pathlib import Path
import sys

Package imports

In [130]:
sys.path.append(r"C:\Users\SachaGobeyn\GitHub\rfactor")
from rfactor.src.flanders.data_processing import (load_input_data,
                                                  reformat_resolution,
                                                  compute_statistics_inputdata,
                                                  get_files_rfactor_script,
                                                  load_cumulative_erosivity,
                                                  load_data_completeness_file) 

Load completeness per station
Compute completeness for every station for every year
1. Remove each set (year, station) if no data are reported for December or Januari (value: -1)
2. Compute the number of non-NA-values on the non-zero data series (%) (value in [0,1])

__NOTE__: These values are computed based on the original rainfall timeseries of KMI and VMM

In [42]:
path_data_flanders = Path("..","..","src","flanders","data")
txt_completeness = path_data_flanders / "datafiles_completeness.csv"
df_completeness = load_data_completeness_file(txt_completeness)

## Input data

**Load data**  
Non-zero rainfall data are loaded. stations for years that are incomplete (no data for januari and december, begin and ends of dataseries per station are not loaded, as these are not computed with the scripts of GV). Stations/year with data with a high amount of na values (<95 % completeness) are ommited.

In [43]:
fmap =  Path(r"C:\Users\SachaGobeyn\Dropbox (Fluves)\FLUVES\PROJECTS\FL_P_2020_DepO_CNWSMod_1\08_reports\2019_rapportering_rfactor\versie_2020\elektronische_appendixB_Inputdata")

In [44]:
fmap.exists()

True

In [45]:
dict_inputdata = load_input_data(fmap, df_completeness)

**Station geodata informations**


In [46]:
df_stations_KMI= pd.read_csv(path_data_flanders / "geoinfo_KMI.csv", delimiter=";")
df_stations_VMM = pd.read_csv(path_data_flanders / "geoinfo_VMM.csv", delimiter=",")
df_stations = df_stations_KMI.append(df_stations_VMM)

**Compute statistics**  
Compute general statistics with .describe() functions,  and couple x and y's. Only 

In [47]:
resmap = "results"
compute_statistics_inputdata(dict_inputdata, resmap, df_stations)

## Compute R-factor

Run model with Matlab in terminal (navigate to *C:\Users\$USERNAME\GitHub\rfactor\rfactor\src\rfactor*, **see README.md**)
    
    matlab -nodisplay -r "main('C:\Users\\$USERNAME\GitHub\rfactor\rfactor\docs\data\example_inputdata')"

Results are saved in *C:\Users\\$USERNAME\GitHub\rfactor\rfactor\src\rfactor*

In [48]:
fmap = Path(r"C:\Users\SachaGobeyn\GitHub\rfactor\rfactor\src\rfactor\results")

In [49]:
fmap.exists()

True

## Process results

**Get files**

In [126]:
dict_output_files = get_files_rfactor_script(fmap,dict_inputdata,df_completeness)

**Create dataframes**
One dataframe for each station.  
__NOTE__: Results from incomplete datasets (<0.95 coverage) are not loaded as these have not been computed! 

In [127]:
dict_df_output = load_cumulative_erosivity(dict_output_files)

**Reformat for half-monthly analysis**

In [134]:
dict_df_output_hm = reformat_resolution(dict_df_output)

**Generate table output**

In [120]:
n_records = np.sum([len(dict_df_output_hm [i].columns) for i in dict_df_output_hm.keys()])
columns = ["station","year","value","Flanders","Belgium"]
df_R_yearly =  pd.DataFrame(data=np.zeros([n_records,len(columns)]),columns=columns)

ind = 0

for station in dict_df_output_hm.keys():
    
    for year in dict_df_output_hm[station].columns:
        
        value = dict_df_output_hm[station][year].iloc[-1]
        
        if station in ["KMI_6447","KMI_FS3"]:
            Belgium = 0
            Flanders = 0
            Ukkel = 1
        elif station in ["KMI_6455","KMI_6459","KMI_6472","KMI_6494"]:
            Flanders = 0
            Belgium = 1
            Ukkel = 0
        else:
            Belgium = 1
            Flanders = 1
            Ukkel = 0
        df_R_yearly.loc[ind, ["station","year","value","Ukkel","Flanders","Belgium"]] = [station, year, value, Ukkel, Flanders, Belgium]        
        
        ind += 1
        
df_R_yearly.to_csv(Path(resmap) / "Ryearly_allstations_filtereddata.csv")

## Analysis

**Values for Flanders and Belgium**  
Average value for Flanders (excl. Ukkel)

In [136]:
np.mean(df_R_yearly.loc[df_R_yearly["Flanders"]==1,"value"])

1230.5252586206896

Average value for Belgium (excl. Ukkel)

In [137]:
np.mean(df_R_yearly.loc[df_R_yearly["Belgium"]==1,"value"])

1256.2069003690037

**Ukkel: long term**   
Two timeseries: 
 - KMI_FS from 1989 to 2002  
 - KMI_6447 from 2003 to 2017  

In [138]:
dict_df_output_hm["Ukkel_samen"] = pd.concat([dict_df_output_hm["KMI_FS3"],dict_df_output_hm["KMI_6447"]], axis=1)
dict_df_output_hm["Ukkel_samen"].to_csv(Path(resmap) / "Ryearly_Ukkel_filtereddata.csv")

Compute different values for Ukkel, for different time periods

In [139]:
a= [[1898,2002],
    [1898,2019],
    [2003,2019],
    [1996,2019],
    [1990,2019],
    [1990,2000],
    [2000,2019]]
pres = {}
for j in a:
    col = np.arange(j[0],j[1]+1,1)
    col = [i for i in col if i in dict_df_output_hm["Ukkel_samen"].columns]
    col_fill_nan = [i for i in col if i not in dict_df_output_hm["Ukkel_samen"].columns]
    for i in col_fill_nan:
        dict_df_output_hm["Ukkel_samen"][i] = np.nan
    pres[str(j)] = {"mean":np.nanmean(dict_df_output_hm["Ukkel_samen"][col].iloc[-1]),"median":np.nanmedian(dict_df_output_hm["Ukkel_samen"][col].iloc[-1])}
df_summary = pd.DataFrame.from_dict(pres).T    

In [140]:
df_summary

Unnamed: 0,mean,median
"[1898, 2002]",958.10581,927.27
"[1898, 2019]",1005.035702,941.02
"[2003, 2019]",1313.013125,1094.68
"[1996, 2019]",1260.633043,1095.0
"[1990, 2019]",1244.897586,1095.0
"[1990, 2000]",1094.643636,1073.07
"[2000, 2019]",1302.779474,1146.81


Figure

In [None]:
out = dict_df_output_hm["Ukkel_samen"]
plt.figure(figsize=[12,7.5])
col = np.arange(1898,2003,1)
col = [i for i in col if i in dict_df_output_hm["Ukkel_samen"].columns]
plt.plot(col,out[col].iloc[-1],label="KMI_FS3 (1898-2002)")
col = np.arange(2002,2019,1)
col = [i for i in col if i in dict_df_output_hm["Ukkel_samen"].columns]
plt.plot(col,out[col].iloc[-1],label="KMI_6447 (2003-2019)")

# rolling mean
y = out.iloc[-1].rolling(10).mean()
x = y.index
plt.plot(x,y,ls=':',lw=3,label='voortschrijdend gemiddelde (10 jaar)')
y = out.iloc[-1].rolling(20).mean()
x = y.index
plt.plot(x,y,ls='--',lw=3,label='voortschrijdend gemiddelde (20 jaar)')
y = out.iloc[-1].rolling(30).mean()
x = y.index
plt.plot(x,y,ls='-.',lw=3,label='voortschrijdend gemiddelde (30 jaar)')
plt.ylim([0,3500])
plt.xlim([1890,2020])
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xlabel(r'Jaar (-)',fontsize=16)
plt.ylabel(r'Jaarlijkse R [MJ mm ha$^{-1}$ h$^{-1}$ jaar$^{-1}$]',fontsize=16)
plt.legend(prop={'size':16})

Compare results of Ukkel with other average of other stations

In [None]:
coldf = ["station","rc","offset","afstand","p","r_value","n","dR","meanR"]
out = pd.DataFrame(data = np.zeros([len(dict_df_output_hm)-3,len(coldf)]),columns=coldf)
ind = 0

for i in dict_df_output_hm.keys():
    
    if (i!="Ukkel_samen") & (i!="KMI_FS3") & (i!="KMI_6447"):
        col = dict_df_output_hm["Ukkel_samen"].columns
        col_fill_nan = [j for j in col if j not in dict_df_output_hm[i].columns]
        for j in col_fill_nan:
            dict_df_output_hm[i][j] = np.nan
        y = dict_df_output_hm[i][col].iloc[-1]
        x = dict_df_output_hm["Ukkel_samen"][col].iloc[-1]
        x = x[~np.isnan(y)]        
        y = y[~np.isnan(y)]
        df_i = pd.DataFrame()
        df_i[i] = x.values.flatten()
        df_i["Ukkel (KMI_FS3 & KMI_6447)"] = y.values.flatten()
        df_i = df_i.sort_values("Ukkel (KMI_FS3 & KMI_6447)")
        
        out["dR"].loc[ind] = np.mean(dict_df_output_hm[i].iloc[-1])-np.mean(dict_df_output_hm["Ukkel_samen"].iloc[-1])
        out["meanR"].loc[ind] = np.mean(dict_df_output_hm[i].iloc[-1])
        ind += 1

out.to_csv(Path(resmap) / "stat_allstations_res.csv")