In [None]:
# time-series vars

In [3]:
import hbv
import pandas as pd
import numpy as np

from star import star_vars
from itertools import combinations

In [4]:
# helper functions
def scale(df, bounds, axis=1, *args, **kwargs):
    '''scale the sampled matrix
    bounds is a dict with ['ub', 'lb'] keys
    the values are lists of the upper and lower bounds
    of the parameters/variables/factors'''
    
    # numpy equivalent for math operations
    bounds_np = {key:np.array(value) for key,value in bounds.items()}
    
    if axis:
        return df * (bounds_np['ub'] - bounds_np['lb']) + bounds_np['lb']
    else:
        return df.T * (bounds_np['ub'] - bounds_np['lb']) + bounds_np['lb']
    
    
def pairs_h(iterable):
    '''gives the pairs of numbers considering their differences'''
    interval = range(min(iterable), max(iterable)-min(iterable))
    pairs  = {key+1:[j for j in combinations(iterable, 2) if np.abs(j[0]-j[1])==key+1] for key in interval}
    return pairs
    
    
def section_df(df):
    '''gets the paired values of each section based on index'''
    pairs = pairs_h(df.index.get_level_values(-1))
    df_values = df.to_numpy()
    sample = pd.concat({h:
                    pd.DataFrame.from_dict({str(idx_tup): [df_values[idx_tup[0]].item(), 
                                                           df_values[idx_tup[1]].item()] 
                                            for idx_tup in idx}, 
                                           'index') \
                      for h, idx in pairs.items()})

    return sample
    
    
# lambda functions
'''covariogram of each section'''
cov_section = lambda pair_cols, mu_star: (pair_cols.sub(mu_star, axis=0)[0] * pair_cols.sub(mu_star, axis=0)[1]).groupby(level=['ts', 'centre', 'param', 'h']).mean()

'''variogram over all sections'''
variogram = lambda pair_cols: 0.5*(pair_cols[0] - pair_cols[1]).pow(2).groupby(level=['ts', 'param', 'h']).mean()

'''morris sensitivity measure equivalent evaluated over all sections'''
morris_eq = lambda pair_cols: ((pair_cols[1] - pair_cols[0]).abs().groupby(level=['ts', 'param', 'h']).mean(), \
                               (pair_cols[1] - pair_cols[0]).groupby(level=['ts', 'param', 'h']).mean())

'''covariogram over all sections'''
covariogram = lambda pair_cols, mu_overall: (pair_cols[0].sub(mu_overall, level=0) * pair_cols[1].sub(mu_overall, level=0)).groupby(level=['ts', 'param', 'h']).mean()

'''expected covariogram over all sections'''
e_covariogram = lambda cov_section_all: cov_section_all.groupby(level=['ts', 'param', 'h']).mean()

'''sobol (total order) sensitivity measure equivalent evaluated over all sections'''
sobol_eq = lambda gamma, ecov, variance: (gamma + ecov).div(variance, level='ts').loc[:, :, 1]

# ivars function
def ivars(variogram_array, scale, delta_h):
    '''generate Integrated Variogram Across a Range of Scales (IVARS)
    by approximating area using right trapezoids having width of `delta_h`
    and hights of variogram values'''
    num_h  = len(variogram_value.index.levels[-1].to_list())
    x_bench= np.arange(start=0, stop=delta_h*(num_h+1), step=delta_h)
    x_int  = np.arange(start=0, stop=(scale*10+1)/10, step=delta_h)

    # calculate interpolated values for both x (h) and y (variogram)
    if x_int[-1] < scale:
        x_int.append(scale)
    y_bench= [0] + variogram_array.to_list()

    y_int  = np.interp(x=x_int, xp=x_bench, fp=y_bench)
    
    # for loop for each step size to caluclate the area
    ivars = 0
    for i in range(len(x_int)-1):
        ivars += 0.5*(y_int[i+1] + y_int[i]) * (x_int[i+1] - x_int[i])

    return ivars

# alias
idx = pd.IndexSlice

In [5]:
delta_h = 0.1
rng = np.random.default_rng(seed=100)
star_centres = rng.random((5, 12))
star_points  = star_vars(star_centres, 
                         delta_h=delta_h, 
                         parameters=['TT', 
                                     'C0',
                                     'ETF',
                                     'LP',
                                     'FC',
                                     'beta',
                                     'FRAC',
                                     'K1',
                                     'alpha',
                                     'K2',
                                     'UBAS',
                                     'PM',
                                    ], 
                         rettype='DataFrame')

bounds = {'lb':[-4, 0, 0, 0, 50,  1, 0.1, 0.05, 1, 0,    1, 0.5],
          'ub':[+4,10, 1, 1, 500, 3, 0.9, 1,    3, 0.05, 3, 2  ]}

star_points.columns = ['TT', 
                       'C0',
                       'ETF',
                       'LP',
                       'FC',
                       'beta',
                       'FRAC',
                       'K1',
                       'alpha',
                       'K2',
                       'UBAS',
                       'PM'
                      ]

star_points_scaled = scale(star_points, bounds)

In [6]:
par_values ={
    'TT':4.0,
    'C0':1.0,
    'ETF':0.1,
    'LP':0.3,
    'FC':500.0,
    'beta':2.0,
    'FRAC':0.7,
    'K1':0.05,
    'alpha':1.5,
    'K2':0.01,
    'UBAS':1.0,
    'PM':1.0,
}

In [7]:
star_points_scaled

Unnamed: 0,Unnamed: 1,Unnamed: 2,TT,C0,ETF,LP,FC,beta,FRAC,K1,alpha,K2,UBAS,PM
0,TT,0,-3.720147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,1,-2.920147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,2,-2.120147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,3,-1.320147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,4,-0.520147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,PM,5,0.716537,5.821629,0.597145,0.148874,302.021883,2.126335,0.564791,0.225702,2.419238,0.003501,1.649730,1.260283
4,PM,6,0.716537,5.821629,0.597145,0.148874,302.021883,2.126335,0.564791,0.225702,2.419238,0.003501,1.649730,1.410283
4,PM,7,0.716537,5.821629,0.597145,0.148874,302.021883,2.126335,0.564791,0.225702,2.419238,0.003501,1.649730,1.560283
4,PM,8,0.716537,5.821629,0.597145,0.148874,302.021883,2.126335,0.564791,0.225702,2.419238,0.003501,1.649730,1.710283


In [8]:
# defining model
import hbv

def model(par_vals):
    
    return hbv.HBV_SASK(par_vals, 'banff')[0]['Q_cms']

model(par_values)

1950-01-01    9.072358
1950-01-02    8.214854
1950-01-03    7.494817
1950-01-04    6.884789
1950-01-05    6.363708
                ...   
2011-12-27    6.271989
2011-12-28    6.207884
2011-12-29    6.144481
2011-12-30    6.081769
2011-12-31    6.019737
Name: Q_cms, Length: 22645, dtype: float64

In [9]:
# df = apply_unique(ishigami, star_points_scaled, axis=1) this bit needs to be corrected, the block doesn't work.
# df = apply_unique(model, star_points_scaled, axis=1)
# df.index.names=['centre', 'param', 'points']

In [10]:
# serial version
df = star_points_scaled.apply(model, axis=1, result_type='expand')
df.index.names = ['centre', 'param', 'point']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1950-01-01,1950-01-02,1950-01-03,1950-01-04,1950-01-05,1950-01-06,1950-01-07,1950-01-08,1950-01-09,1950-01-10,...,2011-12-22,2011-12-23,2011-12-24,2011-12-25,2011-12-26,2011-12-27,2011-12-28,2011-12-29,2011-12-30,2011-12-31
centre,param,point,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,TT,0,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,3.054605,3.025587,2.996845,2.968377,2.940178,2.912248,2.884583,9.990341,20.236120,9.423414
0,TT,1,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,3.068343,3.039195,3.010324,2.981727,2.953402,2.925346,2.897557,2.870031,2.842767,2.815762
0,TT,2,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,2.866804,2.839197,2.811874,2.784831,2.758064,2.731568,2.705340,2.679376,2.653673,2.628227
0,TT,3,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,2.857068,2.829732,2.802665,2.775864,2.749327,2.723050,2.697030,2.671264,2.645750,2.620485
0,TT,4,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,2.893166,2.865487,2.838081,2.810945,2.784075,2.757468,2.731121,2.705032,2.679198,2.653615
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,PM,5,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,12.605935,12.561265,12.516771,12.472454,12.428311,12.384340,12.340540,12.296910,12.253449,12.210154
4,PM,6,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,15.298070,15.243934,15.190010,15.136297,15.082793,15.029496,14.976404,14.923516,14.870830,14.818345
4,PM,7,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,18.092347,18.028397,17.964696,17.901241,17.838030,17.775062,17.712335,17.649846,17.587593,17.525576
4,PM,8,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,20.974242,20.900179,20.826402,20.752908,20.679694,20.606759,20.534100,20.461715,20.389603,20.317761


In [7]:
# parallel version - the best parallel version that I could end up with - moving forward with this bit!
import mapply

mapply.init(
    n_workers=-1,
    chunk_size=1,
    max_chunks_per_worker=10,
    progressbar=True
)

df_m = star_points_scaled.mapply(model,
                               axis=1,
                               result_type='expand')
df_m.index.names = ['centre', 'param', 'point']
df_m

  0%|          | 0/30 [00:00<?, ?it/s]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1950-01-01,1950-01-02,1950-01-03,1950-01-04,1950-01-05,1950-01-06,1950-01-07,1950-01-08,1950-01-09,1950-01-10,...,2011-12-22,2011-12-23,2011-12-24,2011-12-25,2011-12-26,2011-12-27,2011-12-28,2011-12-29,2011-12-30,2011-12-31
centre,param,point,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,TT,0,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,3.054605,3.025587,2.996845,2.968377,2.940178,2.912248,2.884583,9.990341,20.236120,9.423414
0,TT,1,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,3.068343,3.039195,3.010324,2.981727,2.953402,2.925346,2.897557,2.870031,2.842767,2.815762
0,TT,2,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,2.866804,2.839197,2.811874,2.784831,2.758064,2.731568,2.705340,2.679376,2.653673,2.628227
0,TT,3,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,2.857068,2.829732,2.802665,2.775864,2.749327,2.723050,2.697030,2.671264,2.645750,2.620485
0,TT,4,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,2.893166,2.865487,2.838081,2.810945,2.784075,2.757468,2.731121,2.705032,2.679198,2.653615
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,PM,5,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,12.605935,12.561265,12.516771,12.472454,12.428311,12.384340,12.340540,12.296910,12.253449,12.210154
4,PM,6,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,15.298070,15.243934,15.190010,15.136297,15.082793,15.029496,14.976404,14.923516,14.870830,14.818345
4,PM,7,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,18.092347,18.028397,17.964696,17.901241,17.838030,17.775062,17.712335,17.649846,17.587593,17.525576
4,PM,8,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,20.974242,20.900179,20.826402,20.752908,20.679694,20.606759,20.534100,20.461715,20.389603,20.317761


In [13]:
ts_pair = lambda ts: ts.groupby(level=['centre', 'param']).apply(section_df)

In [34]:
# serial
a = df.iloc[:, :20]
pair_df = a.groupby(level=0, axis=1).apply(ts_pair)
pair_df.index.names = ['centre', 'param', 'h', 'pair_ind']
pair_df.columns.names = ['ts', None]
pair_df.stack(level=0).reorder_levels([-1,0,1,2,3]).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,0,1
ts,centre,param,h,pair_ind,Unnamed: 5_level_1,Unnamed: 6_level_1
1950-01-01,0,C0,1,"(0, 1)",19.627838,19.627838
1950-01-01,0,C0,1,"(1, 2)",19.627838,19.627838
1950-01-01,0,C0,1,"(2, 3)",19.627838,19.627838
1950-01-01,0,C0,1,"(3, 4)",19.627838,19.627838
1950-01-01,0,C0,1,"(4, 5)",19.627838,19.627838
...,...,...,...,...,...,...
1950-01-20,4,beta,7,"(1, 8)",0.825860,0.825860
1950-01-20,4,beta,7,"(2, 9)",0.825860,0.825860
1950-01-20,4,beta,8,"(0, 8)",0.825860,0.825860
1950-01-20,4,beta,8,"(1, 9)",0.825860,0.825860


In [26]:
a = df_m.iloc[:, :2000]
a

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1950-01-01,1950-01-02,1950-01-03,1950-01-04,1950-01-05,1950-01-06,1950-01-07,1950-01-08,1950-01-09,1950-01-10,...,1955-06-14,1955-06-15,1955-06-16,1955-06-17,1955-06-18,1955-06-19,1955-06-20,1955-06-21,1955-06-22,1955-06-23
centre,param,point,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
0,TT,0,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,22.999474,51.873190,46.830928,21.733484,23.664201,54.625386,57.791824,23.674548,9.701626,13.319646
0,TT,1,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,23.382479,52.306219,47.253840,22.110609,24.034595,55.043340,58.215943,24.036098,10.034973,13.665672
0,TT,2,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,23.876694,52.816536,47.756985,22.592790,24.510966,55.534978,58.706841,24.499649,10.484522,14.117101
0,TT,3,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,24.670306,53.686437,48.609271,23.371762,25.277536,56.374020,59.553778,25.247118,11.186203,14.836968
0,TT,4,19.627838,44.803788,18.329532,2.327651,2.305540,2.283638,2.261944,2.240457,2.219173,2.198092,...,25.379822,54.736830,49.611191,24.094031,25.971357,57.391589,60.627815,25.932079,11.704271,15.454163
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,PM,5,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,61.963199,68.512804,43.255649,30.235198,68.545601,97.991014,49.043560,24.679042,24.592640,27.613196
4,PM,6,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,79.408105,74.909336,48.163845,35.888316,82.286830,109.725498,54.850955,29.037962,28.936299,33.146003
4,PM,7,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,90.200928,90.061203,54.597033,39.416243,91.515799,123.806306,62.562027,33.361550,33.244750,38.951544
4,PM,8,53.060062,24.345783,0.876596,0.873527,0.870469,0.867422,0.864385,0.861359,0.858343,0.855338,...,101.541691,107.733787,61.042574,42.138994,100.547334,138.472081,70.515860,37.644659,37.512864,45.152848


In [23]:
import time

In [51]:
d = pd.DataFrame([])

for col in a.iteritems():
    d = pd.concat([d, pd.DataFrame(col[1])], axis=1)

In [53]:
d.iloc[:, 0:2]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,1950-01-01,1950-01-02
centre,param,point,Unnamed: 3_level_1,Unnamed: 4_level_1
0,TT,0,19.627838,44.803788
0,TT,1,19.627838,44.803788
0,TT,2,19.627838,44.803788
0,TT,3,19.627838,44.803788
0,TT,4,19.627838,44.803788
...,...,...,...,...
4,PM,5,53.060062,24.345783
4,PM,6,53.060062,24.345783
4,PM,7,53.060062,24.345783
4,PM,8,53.060062,24.345783


In [27]:
start_time = time.time()

# iteration version
for col in a.iteritems():
    col[1].groupby(level=['centre', 'param']).apply(section_df)
    
print("--- %s seconds ---" % (time.time() - start_time))

--- 1598.510687828064 seconds ---


In [28]:
start_time = time.time()

#chunked parallel version - chunking the whole time-series df and then use 
#a parallel function to create pair_df and do the analysis...
#let's see how fast this goes...
pair_df = applyParallel(a.groupby(level=0, axis=1), ts_pair)
pair_df.index.names = ['ts', 'centre', 'param', 'h', 'pair_ind']
# pair_df = pair_df.stack().unstack(level=['ts', -1])
pair_df

print("--- %s seconds ---" % (time.time() - start_time))

--- 1211.3670809268951 seconds ---


In [18]:
# parallel version
from joblib import Parallel, delayed
import multiprocessing

def temp_func(func, name, group):     
    return func(group), name

def applyParallel(dfGrouped, func):
    retLst, top_index = zip(*Parallel(n_jobs=multiprocessing.cpu_count())\
                                (delayed(temp_func)(func, name, group)\
                            for name, group in dfGrouped))
    return pd.concat(retLst, keys=top_index)

In [None]:
pair_df = applyParallel(df.groupby(level=0, axis=1), ts_pair)
pair_df.index.names = ['ts', 'centre', 'param', 'h', 'pair_ind']
# pair_df = pair_df.stack().unstack(level=['ts', -1])
pair_df

# VARS functions
Common VARS functions to be applied to each column

In [None]:
df

In [None]:
# mu_star calculation
mu_star_df = df.groupby(level=['centre','param']).mean().stack().reorder_levels(order=[2,0,1]).sort_index()
mu_star_df.index.names = ['ts', 'centre', 'param']
mu_star_df

In [None]:
# overall mu (mean) of the unique evaluated function values over all stars points
mu_overall = df.apply(lambda x: np.mean(list(np.unique(x))))
mu_overall

In [None]:
# overall var (variance) of the unique evaluated function values over all stars points
var_overall = df.apply(lambda x: np.var(list(np.unique(x)), ddof=1))
var_overall

In [None]:
# variogram
variogram_value = variogram(pair_df)
variogram_value

In [None]:
# sectional covariogram
cov_section_ts_all = cov_section(pair_df, mu_star_df)
cov_section_ts_all

In [None]:
# morris values
morris_values = morris_eq(pair_df)
display('morris absolute: ', morris_values[0])
display('morris: ', morris_values[1])

In [None]:
# overall covariogram calculation
covariogram_value = covariogram(pair_df, mu_overall)
covariogram_value.unstack(level=[0,1])

In [None]:
# expected value of the overall covariogram calculation
e_covariogram_value = e_covariogram(cov_section_ts_all)
e_covariogram_value.unstack(level=[0,1])

In [None]:
# sobol value
sobol_value = sobol_eq(variogram_value, e_covariogram_value, var_overall)
sobol_value

In [None]:
variogram_value

In [None]:
ivars_values = [0.1, 0.3, 0.5]
delta_h = 0.1
ivars_df = pd.DataFrame.from_dict({scale: variogram_value.groupby(level=['ts', 'param']).apply(ivars, scale=scale, delta_h=delta_h) \
                      for scale in ivars_values}, 'index')
ivars_df

In [56]:
for i in range(1):
    print(i)

0
