In [1]:
# time-series vars

In [2]:
import hbv
import pandas as pd
import numpy as np

from star import star_vars

In [3]:
# helper functions
def apply_unique(func, df, axis=1, *args, **kwargs):
    '''Apply a function to unique rows of a DataFrame
    for efficiency.'''

    applied_df = df.merge(df.drop_duplicates()
                         .assign(**{func.__name__: lambda x: x.apply(func, 
                                                                     axis=axis, 
                                                                     result_type='expand',
                                                                     **kwargs)}), 
                         how='left')
    applied_df.index = df.index
    
    return applied_df
    
    
def scale(df, bounds, axis=1, *args, **kwargs):
    '''scale the sampled matrix
    bounds is a dict with ['ub', 'lb'] keys
    the values are lists of the upper and lower bounds
    of the parameters/variables/factors'''
    
    # numpy equivalent for math operations
    bounds_np = {key:np.array(value) for key,value in bounds.items()}
    
    if axis:
        return df * (bounds_np['ub'] - bounds_np['lb']) + bounds_np['lb']
    else:
        return df.T * (bounds_np['ub'] - bounds_np['lb']) + bounds_np['lb']
    
    
def pairs_h(iterable):
    '''gives the pairs of numbers considering their differences'''
    interval = range(min(iterable), max(iterable)-min(iterable))
    pairs  = {key+1:[j for j in combinations(iterable, 2) if np.abs(j[0]-j[1])==key+1] for key in interval}
    return pairs
    
    
def section_df(df):
    '''gets the paired values of each section based on index'''
    pairs = pairs_h(df.index.get_level_values(-1))
    df_values = df.to_numpy()
    sample = pd.concat({h:
                    pd.DataFrame.from_dict({str(idx_tup): [df_values[idx_tup[0]], df_values[idx_tup[1]]] for idx_tup in idx}, 'index') \
                      for h, idx in pairs.items()})

    return sample
    
    
# lambda functions
'''covariogram of each section'''
cov_section = lambda pair_cols, mu_star: (pair_cols.sub(mu_star, axis=0)[0] * pair_cols.sub(mu_star, axis=0)[1]).groupby(level=[0,1,2]).mean()

'''variogram over all sections'''
variogram = lambda pair_cols: 0.5*(pair_cols[0] - pair_cols[1]).pow(2).groupby(level=[1,2]).mean()

'''morris sensitivity measure equivalent evaluated over all sections'''
morris_eq = lambda pair_cols: ((pair_cols[1] - pair_cols[0]).abs().groupby(level=[1,2]).mean(), \
                               (pair_cols[1] - pair_cols[0]).groupby(level=[1,2]).mean())

'''covariogram over all sections'''
covariogram = lambda pair_cols, mu_overall: ((pair_cols - mu_overall)[0] * (pair_cols - mu_overall)[1]).groupby(level=[1,2]).mean()

'''expected covariogram over all sections'''
e_covariogram = lambda cov_section_all: cov_section_all.groupby(level=[1,2]).mean()

'''sobol (total order) sensitivity measure equivalent evaluated over all sections'''
sobol_eq = lambda gamma, ecov, variance: ((gamma + ecov) / variance).loc[:,1]

# ivars function
def ivars(variogram_array, scale, delta_h):
    '''generate Integrated Variogram Across a Range of Scales (IVARS)
    by approximating area using right trapezoids having width of `delta_h`
    and hights of variogram values'''
    num_h  = len(variogram_value.index.levels[-1].to_list())
    x_bench= np.arange(start=0, stop=delta_h*(num_h+1), step=delta_h)
    x_int  = np.arange(start=0, stop=(scale*10+1)/10, step=delta_h)

    # calculate interpolated values for both x (h) and y (variogram)
    if x_int[-1] < scale:
        x_int.append(scale)
    y_bench= [0] + variogram_array.to_list()

    y_int  = np.interp(x=x_int, xp=x_bench, fp=y_bench)
    
    # for loop for each step size to caluclate the area
    ivars = 0
    for i in range(len(x_int)-1):
        ivars += 0.5*(y_int[i+1] + y_int[i]) * (x_int[i+1] - x_int[i])

    return ivars

# alias
idx = pd.IndexSlice

In [4]:
delta_h = 0.1
rng = np.random.default_rng(seed=100)
star_centres = rng.random((2, 12))
star_points  = star_vars(star_centres, 
                         delta_h=delta_h, 
                         parameters=['TT', 
                                     'C0',
                                     'ETF',
                                     'LP',
                                     'FC',
                                     'beta',
                                     'FRAC',
                                     'K1',
                                     'alpha',
                                     'K2',
                                     'UBAS',
                                     'PM'
                                    ], 
                         rettype='DataFrame')

bounds = {'lb':[-4, 0, 0, 0, 50,  1, 0.1, 0.05, 1, 0,    1, 0.5],
          'ub':[+4,10, 1, 1, 500, 3, 0.9, 1,    3, 0.05, 3, 2  ]}
star_points.columns = ['TT', 
                       'C0',
                       'ETF',
                       'LP',
                       'FC',
                       'beta',
                       'FRAC',
                       'K1',
                       'alpha',
                       'K2',
                       'UBAS',
                       'PM'
                      ]
star_points_scaled = scale(star_points, bounds)

In [5]:
par_values ={
    'TT':4.0,
    'C0':1.0,
    'ETF':0.1,
    'LP':0.3,
    'FC':500.0,
    'beta':2.0,
    'FRAC':0.7,
    'K1':0.05,
    'alpha':1.5,
    'K2':0.01,
    'UBAS':1.0,
    'PM':1.0,
}

In [6]:
star_points_scaled

Unnamed: 0,Unnamed: 1,Unnamed: 2,TT,C0,ETF,LP,FC,beta,FRAC,K1,alpha,K2,UBAS,PM
0,TT,0,-3.720147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,1,-2.920147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,2,-2.120147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,3,-1.320147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
0,TT,4,-0.520147,5.965540,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.009500,2.962958,0.927110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9,PM,5,3.132580,8.165179,0.909649,0.767474,345.876225,2.527589,0.803794,0.861143,1.540960,0.047616,2.985292,1.390501
9,PM,6,3.132580,8.165179,0.909649,0.767474,345.876225,2.527589,0.803794,0.861143,1.540960,0.047616,2.985292,1.540501
9,PM,7,3.132580,8.165179,0.909649,0.767474,345.876225,2.527589,0.803794,0.861143,1.540960,0.047616,2.985292,1.690501
9,PM,8,3.132580,8.165179,0.909649,0.767474,345.876225,2.527589,0.803794,0.861143,1.540960,0.047616,2.985292,1.840501


In [7]:
# defining model
def model(par_vals, basin='banff'):
    return hbv.HBV_SASK(par_vals, basin)[0]['Q_cms']

In [8]:
model(par_values)

1950-01-01    9.072358
1950-01-02    8.214854
1950-01-03    7.494817
1950-01-04    6.884789
1950-01-05    6.363708
                ...   
2011-12-27    6.271989
2011-12-28    6.207884
2011-12-29    6.144481
2011-12-30    6.081769
2011-12-31    6.019737
Name: Q_cms, Length: 22645, dtype: float64

In [39]:
df_sampled = star_points_scaled.iloc[0:3, :]
df_sampled

Unnamed: 0,Unnamed: 1,Unnamed: 2,TT,C0,ETF,LP,FC,beta,FRAC,K1,alpha,K2,UBAS,PM
0,TT,0,-3.720147,5.96554,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.0095,2.962958,0.92711
0,TT,1,-2.920147,5.96554,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.0095,2.962958,0.92711
0,TT,2,-2.120147,5.96554,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.0095,2.962958,0.92711


In [19]:
# df = apply_unique(ishigami, star_points_scaled, axis=1)
df = apply_unique(model, df_sampled, axis=1)
df.index.names=['centre', 'param', 'points']

ValueError: Wrong number of items passed 22645, placement implies 1

In [57]:
def test_model(df):
    return (1,2,3)

In [58]:
results = df_sampled.apply(test_model, axis=1, result_type='expand')

In [59]:
df_sampled

Unnamed: 0,Unnamed: 1,Unnamed: 2,TT,C0,ETF,LP,FC,beta,FRAC,K1,alpha,K2,UBAS,PM
0,TT,0,-3.720147,5.96554,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.0095,2.962958,0.92711
0,TT,1,-2.920147,5.96554,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.0095,2.962958,0.92711
0,TT,2,-2.120147,5.96554,0.288863,0.042952,488.144478,2.192943,0.732211,0.914822,2.376309,0.0095,2.962958,0.92711


In [60]:
results # this is done - easy from now on

Unnamed: 0,Unnamed: 1,Unnamed: 2,0,1,2
0,TT,0,1,2,3
0,TT,1,1,2,3
0,TT,2,1,2,3
