In [1]:
# RECUIREMENTS:
# 1. NetCDF ensemble output data
# 2. GRDC discharge data

In [None]:
#Import Modules
import os
import sys
import xarray as xr
import numpy as np
import pandas as pd
import holoviews as hv

import geoviews as gv
import geoviews.feature as gf

from bokeh.layouts import gridplot
from bokeh.plotting import figure, show, output_file

import hydrostats.data as hydrod
import hydrostats.visual as hydrov
import HydroErr as he
import matplotlib.pyplot as plt
from scipy.stats import norm
from ranky import rankz

import cartopy
from cartopy import crs as ccrs

hv.notebook_extension('bokeh')

In [None]:
WorkingPath = r'D:/sbranchett/Jerom/' #set working directory
WorkingFolder = os.path.join(WorkingPath,'netcdf') #set working directory
os.chdir(WorkingFolder) #Set working directory

In [None]:
#Create empty lists and xarray dataset to be filled with netcdf
data_var = []
data_stats = []

ds = xr.Dataset()
ds_stats = xr.Dataset()

In [None]:
#Loop files in working directory and create 1 xarray dataset
for f in os.listdir(WorkingFolder):
        fName, fExt = os.path.splitext(f) # break up file name and extension
        if fExt == '.nc': #only use netcdf file
            var_name = 'discharge' + fName[-3:]
            if var_name != 'dischargeOut':
                data_var.append(var_name)
                ds[var_name] = xr.open_dataarray(f)
            else:
                var_name = fName
                data_stats.append(var_name)
                ds_stats[var_name] = xr.open_dataarray(f)

In [None]:
#Select stations based on lat and lon, calculate: mean, std, min, max

class Station:
    
    def __init__(self, name, lat, lon, ds, ds_stats, data_var):
        self.name = name
        self.ds = ds.sel(lat=lat, lon=lon, method='nearest')
        self.ds_stats = ds_stats.sel(lat=lat, lon=lon, method='nearest')
        self.mean = self.ds_stats['dischargeEnsMeanOut']
        self.std = self.ds_stats['dischargeEnsStdOut']
        
        self.array = xr.Dataset.to_array(self.ds)
        
        self.min = self.array.min('variable')
        self.max = self.array.max('variable')
        self.std_low = self.std
        self.std_high = self.mean-self.std
        self.std_high = self.std_high + self.mean
        
        self.curves = [hv.Curve(self.ds[data_var[i]], label=data_var[i]) for i in range(len(data_var))]
        
        return None
    
    def basin_plot(self, xdim, xlabel, ylabel, color, plotlabel):
        maxname = self.name+'_max'
        minname = self.name+'_min'
        
        plot = (hv.Area((self.ds[xdim], self.max, self.min), kdims=[xlabel, ylabel], 
                         vdims=[maxname, minname]).options(color=color, alpha=0.4, title_format=plotlabel) *\
                         hv.Overlay.from_values(self.curves))
        return plot

In [None]:
#Initialise the Stations of interest

lobith = Station('lobith', 51.88, 6.01, ds, ds_stats, data_var) #Lobith (Rhine)
kiens = Station('kiens', 48.38, 15.62, ds, ds_stats, data_var) #Kienstock (Danube)
arctic = Station('arctic', 67.4583, -133.7447, ds, ds_stats, data_var) #Arctic Red River (Mackenzie)
thebes = Station('thebes', 37.2167, -89.464, ds, ds_stats, data_var) #Thebes (Mississippi)
jatura = Station('jatura', -3.0633, -59.6478, ds, ds_stats, data_var) #Jatuarana (Amazon)
louth = Station('louth', -30.5318, 145.1144, ds, ds_stats, data_var) #Louth (Darling)
koulik = Station('koulik', 12.8667, -7.55, ds, ds_stats, data_var) #Koulikoro (Niger)
stolb = Station('stolb', 72.37, 126.8, ds, ds_stats, data_var) #Stolb (Lena)

In [None]:
#Create plot for each subbasin showing the ensemble spread.

#Basin Plots
xdim = 'time'
xlabel = 'Date 2017'
ylabel = 'Discharge [m3*s-1]'
colour = 'blue'
lobith_plot = lobith.basin_plot(xdim, xlabel, ylabel, colour, 'Lobith (Rhine, Europe)')
kiens_plot = kiens.basin_plot(xdim, xlabel, ylabel, colour, 'Kienstock (Danube, Europe)')
arctic_plot = arctic.basin_plot(xdim, xlabel, ylabel, colour, 'Arctic Red River (Mackenzie, North America)')
thebes_plot = thebes.basin_plot(xdim, xlabel, ylabel, colour, 'Thebes (Mississippi, North America)')
jatura_plot = jatura.basin_plot(xdim, xlabel, ylabel, colour, 'Jatuarana (Amazon, South America)')
louth_plot = louth.basin_plot(xdim, xlabel, ylabel, colour, 'Louth (Darling, Australia)')
koulik_plot = koulik.basin_plot(xdim, xlabel, ylabel, colour, 'Koulikoro (Niger, Africa)')
stolb_plot = stolb.basin_plot(xdim, xlabel, ylabel, colour, 'Stolb (Lena, Asia)')

In [None]:
%%opts Layout [shared_axes=False] Overlay [width=450, height=400 legend_position='top_right', show_title=True, fontsize={'legend':6, 'title':10, 'xlabel':10, 'ylabel':10, 'ticks':10}] Curve (muted_alpha=0.5 muted_color='black') 

(lobith_plot+kiens_plot+arctic_plot+thebes_plot+jatura_plot+louth_plot+koulik_plot+stolb_plot).cols(2)

In [None]:
#Functions for GRDC data retrieval

#Read grdc_data, drop columns and create 2 columns: date and discharge
def grdc_data(station_id):

    grdc = pd.read_table(station_id + '.day', skiprows= 40, delimiter=';')
    grdc = grdc.rename(columns={'YYYY-MM-DD':'date', ' Original':'discharge'})
    grdc = grdc.reset_index().set_index(pd.DatetimeIndex(grdc['date']))
    grdc = grdc.drop(columns=['hh:mm', ' Calculated', ' Flag', 'index', 'date'])
    
    return grdc

#Take the mean of multiple years for certain dates (only necessary when up to data GRDC data is not available)
def grdc_mean(grdc_data):
    data = np.array([])
    subdata = np.array([])
    for k in ['-11-22','-11-23','-11-24','-11-25','-11-26','-11-27','-11-28','-11-29','-11-30']:
        for i in [2000,2001,2002,2003,2004,2005]:
            grdc = grdc_data.loc[str(i)+str(k)].mean()
            
            subdata = np.append(subdata, grdc)
        data = np.append(data, subdata.mean())
    return data


#Select the grdc data for validation
def grdc_other(grdc_data):
    data = np.array([])                   
    grdc = grdc_data['2000-11-22':'2000-11-30']
    data = np.append(data,grdc)
    return data

In [None]:
#Add GRDC data to station xarray dataset

os.chdir(os.path.join(WorkingPath,'GRDCdat_day'))
times = pd.date_range('2017-11-22', periods=9)


lobith_grdc = grdc_data('6435060')
lobith_grdc = grdc_other(lobith_grdc)
lobith_grdc = xr.DataArray(lobith_grdc, coords=[times], dims=['time'])

kiens_grdc = grdc_data('6242401')
kiens_grdc = grdc_other(kiens_grdc)
kiens_grdc = xr.DataArray(kiens_grdc, coords=[times], dims=['time'])

arctic_grdc = grdc_data('4208025')
arctic_grdc = grdc_other(arctic_grdc)
arctic_grdc = xr.DataArray(arctic_grdc, coords=[times], dims=['time'])

thebes_grdc = grdc_data('4127501')
thebes_grdc = grdc_other(thebes_grdc)
thebes_grdc = xr.DataArray(thebes_grdc, coords=[times], dims=['time'])

jatura_grdc = grdc_data('3618000')
jatura_grdc = grdc_other(jatura_grdc)
jatura_grdc = xr.DataArray(jatura_grdc, coords=[times], dims=['time'])

louth_grdc = grdc_data('5204250')
louth_grdc = grdc_other(louth_grdc)
louth_grdc = xr.DataArray(louth_grdc, coords=[times], dims=['time'])


koulik_grdc = grdc_data('1134100')
koulik_grdc = grdc_other(koulik_grdc)
koulik_grdc = xr.DataArray(koulik_grdc, coords=[times], dims=['time'])

stolb_grdc = grdc_data('2903430')
stolb_grdc = grdc_other(stolb_grdc)
stolb_grdc = xr.DataArray(stolb_grdc, coords=[times], dims=['time'])

In [None]:
#Create observation and simulation plot with mean and std

lobith_plot2 = (hv.Area((lobith.ds['time'], lobith.min, lobith.max), vdims=['lobith_min', 'lobith_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Lobith (Rhine, Europe)') *\
                hv.Area((lobith.ds['time'], lobith.max, lobith.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((lobith.ds['time'], lobith.min, lobith.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(lobith.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(lobith.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(lobith.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(lobith_grdc, label='obs').options(line_width = 5, color='purple'))

kiens_plot2 = (hv.Area((kiens.ds['time'], kiens.min, kiens.max), vdims=['kiens_min', 'kiens_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Kienstock (Danube, Europe)') *\
                hv.Area((kiens.ds['time'], kiens.max, kiens.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((kiens.ds['time'], kiens.min, kiens.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(kiens.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(kiens.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(kiens.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(kiens_grdc, label='obs').options(line_width = 5, color='purple'))

arctic_plot2 = (hv.Area((arctic.ds['time'], arctic.min, arctic.max), vdims=['arctic_min', 'arctic_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Arcitc Red River (Mackenzie, North America)') *\
                hv.Area((arctic.ds['time'], arctic.max, arctic.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((arctic.ds['time'], arctic.min, arctic.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(arctic.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(arctic.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(arctic.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(arctic_grdc, label='obs').options(line_width = 5, color='purple'))

thebes_plot2 = (hv.Area((thebes.ds['time'], thebes.min, thebes.max), vdims=['thebes_min', 'thebes_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Thebes (Mississippi, North America)') *\
                hv.Area((thebes.ds['time'], thebes.max, thebes.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((thebes.ds['time'], thebes.min, thebes.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(thebes.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(thebes.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(thebes.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(thebes_grdc, label='obs').options(line_width = 5, color='purple'))

jatura_plot2 = (hv.Area((jatura.ds['time'], jatura.min, jatura.max), vdims=['jatura_min', 'jatura_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Jatuarana (Amazon, South America)') *\
                hv.Area((jatura.ds['time'], jatura.max, jatura.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((jatura.ds['time'], jatura.min, jatura.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(jatura.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(jatura.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(jatura.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(jatura_grdc, label='obs').options(line_width = 5, color='purple'))

louth_plot2 = (hv.Area((louth.ds['time'], louth.min, louth.max), vdims=['louth_min', 'louth_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Louth (Darling, Australia)') *\
                hv.Area((louth.ds['time'], louth.max, louth.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((louth.ds['time'], louth.min, louth.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(louth.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(louth.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(louth.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(louth_grdc, label='obs').options(line_width = 5, color='purple'))

koulik_plot2 = (hv.Area((koulik.ds['time'], koulik.min, koulik.max), vdims=['koulik_min', 'koulik_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Koulikoro (Niger, Africa)') *\
                hv.Area((koulik.ds['time'], koulik.max, koulik.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((koulik.ds['time'], koulik.min, koulik.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(koulik.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(koulik.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(koulik.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(koulik_grdc, label='obs').options(line_width = 5, color='purple'))

stolb_plot2 = (hv.Area((stolb.ds['time'], stolb.min, stolb.max), vdims=['stolb_min', 'stolb_max'], kdims=['Date 2017', 'Discharge [m3*s-1]']).options(color='blue', alpha=0.4, title_format='Stolb (Lena, Asia)') *\
                hv.Area((stolb.ds['time'], stolb.max, stolb.std_high), vdims=['maxima', 'lobstd_high']).options(color='red', alpha=0.2) *\
                hv.Area((stolb.ds['time'], stolb.min, stolb.std_low), vdims=['maxima', 'lobstd_low']).options(color='red', alpha=0.2) *\
                hv.Curve(stolb.mean, label='ens_mean').options(line_width = 5, color='blue')*\
                hv.Curve(stolb.std_low, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(stolb.std_high, label='ens_Std').options(line_width = 2, color='red')*\
                hv.Curve(stolb_grdc, label='obs').options(line_width = 5, color='purple'))


In [None]:
%%opts Layout [shared_axes=False] Overlay [width=450, height=400 legend_position='top_right', show_title=True, fontsize={'legend':6, 'title':10, 'xlabel':10, 'ylabel':10, 'ticks':10}] Curve (muted_alpha=0.5 muted_color='black') 
(lobith_plot2+kiens_plot2+arctic_plot2+thebes_plot2+jatura_plot2+louth_plot2+koulik_plot2+stolb_plot2).cols(2)

In [None]:
#General statistics NSE
sim = lobith.mean.values
obs = lobith_grdc.values
print str(he.nse(sim, obs))+ ' Lobith (Rhine, Europe) NSE '

sim = kiens.mean.values
obs = kiens_grdc.values
print str(he.nse(sim, obs)) + ' Kienstock (Danube, Europe) NSE '

sim = arctic.mean.values
obs = arctic_grdc.values
print str(he.nse(sim, obs)) + ' Arctic Red River (Mackenzie, Norht America) NSE '

sim = thebes.mean.values
obs = thebes_grdc.values
print str(he.nse(sim, obs)) + ' Thebes (Mississippi, Norht America) NSE '

sim = jatura.mean.values
obs = jatura_grdc.values
print str(he.nse(sim, obs)) + ' Jatuarana (Amazon, South America) NSE '

sim = louth.mean.values
obs = louth_grdc.values
print str(he.nse(sim, obs)) + ' Louth (Darling, Australia)'

sim = koulik.mean.values
obs = koulik_grdc.values
print str(he.nse(sim, obs))+ ' Koulikoro (Niger, Africa)'

sim = stolb.mean.values
obs = stolb_grdc.values
print str(he.nse(sim, obs))+ ' Stolb (Lena, Asia)'

In [None]:
#Ignore this part for Now.

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

def ensemble_stack(station):
    ls = station.keys()
    del ls[0:3]

    for f in ls:
        rankhist = np.stack(([station[f]]), axis=0)
        
    return rankhist


lobith_stack = ensemble_stack(lobith)
kiens_stack = ensemble_stack(kiens)
arctic_stack = ensemble_stack(arctic)
thebes_stack = ensemble_stack(thebes)
jatura_stack = ensemble_stack(jatura)
louth_stack = ensemble_stack(louth)
koulik_stack = ensemble_stack(koulik)
stolb_stack = ensemble_stack(stolb)


ens_stack = np.stack((lobith_stack, kiens_stack, arctic_stack, thebes_stack, jatura_stack, louth_stack, koulik_stack, stolb_stack), axis=2)
obs_stack = np.stack((lobith_grdc, kiens_grdc, arctic_grdc, thebes_grdc, jatura_grdc, louth_grdc, koulik_grdc, stolb_grdc), axis=1)



In [None]:
obs = np.array(obs_stack)
ensemble = np.array(ens_stack)
mask = np.random.randint(0, 2, (9,8)) #masked where 0/false.

# feed into rankz function
result = rankz(obs, ensemble, mask)

# plot histogram
plt.bar(range(1,ensemble.shape[0]+2), result[0])

# view histogram
plt.show()

In [None]:
def ensemble_normalize(station):
    ls = station.keys()
    del ls[0:3]
    for f in ls:
        norm = (station[f]-station[f].min())/(station[f].max()-station[f].min())
        normhist = np.stack((norm), axis=0)
    return norm

lobith_norm = ensemble_normalize(lobith)





In [None]:
  lobith_stack.shape