In [1]:
import numpy as np
import pandas as pd

from star import star_vars
from itertools import combinations

In [2]:
def ishigami(x, a=7, b=0.05):
    '''Ishigami test function'''
    # check whether the input x is a dataframe
    
    if not isinstance(x, (pd.core.frame.DataFrame, pd.core.series.Series, np.ndarray, list)):
        raise TypeError('`x` must be of type pandas.DataFrame, numpy.ndarray, pd.Series, or list')
    
    if x.shape[0] > 3:
        raise ValueError('`x` must have only three arguments at a time')
    
    return np.sin(x[0]) + a*(np.sin(x[1])**2) + b*(x[2]**4)*np.sin(x[0])

In [3]:
# helper functions
def apply_unique(func, df, axis=1, *args, **kwargs):
    '''Apply a function to unique rows of a DataFrame
    for efficiency.'''

    applied_df = df.merge(df.drop_duplicates()
                         .assign(**{func.__name__: lambda x: x.apply(func, axis=axis)}), 
                         how='left')
    applied_df.index = df.index
    
    return applied_df
    
    
def scale(df, bounds, axis=1, *args, **kwargs):
    '''scale the sampled matrix
    bounds is a dict with ['ub', 'lb'] keys
    the values are lists of the upper and lower bounds
    of the parameters/variables/factors'''
    
    # numpy equivalent for math operations
    bounds_np = {key:np.array(value) for key,value in bounds.items()}
    
    if axis:
        return df * (bounds_np['ub'] - bounds_np['lb']) + bounds_np['lb']
    else:
        return df.T * (bounds_np['ub'] - bounds_np['lb']) + bounds_np['lb']
    
    
def pairs_h(iterable):
    '''gives the pairs of numbers considering their differences'''
    interval = range(min(iterable), max(iterable)-min(iterable))
    pairs  = {key+1:[j for j in combinations(iterable, 2) if np.abs(j[0]-j[1])==key+1] for key in interval}
    return pairs
    
    
def section_df(df):
    '''gets the paired values of each section based on index'''
    pairs = pairs_h(df.index.get_level_values(-1))
    df_values = df.to_numpy()
    sample = pd.concat({h:
                    pd.DataFrame.from_dict({str(idx_tup): [df_values[idx_tup[0]], df_values[idx_tup[1]]] for idx_tup in idx}, 'index') \
                      for h, idx in pairs.items()})

    return sample
    
    
# lambda functions
'''covariogram of each section'''
cov_section = lambda pair_cols, mu_star: (pair_cols.sub(mu_star, axis=0)[0] * pair_cols.sub(mu_star, axis=0)[1]).groupby(level=[0,1,2]).mean()

'''variogram over all sections'''
variogram = lambda pair_cols: 0.5*(pair_cols[0] - pair_cols[1]).pow(2).groupby(level=[1,2]).mean()

'''morris sensitivity measure equivalent evaluated over all sections'''
morris_eq = lambda pair_cols: ((pair_cols[1] - pair_cols[0]).abs().groupby(level=[1,2]).mean(), \
                               (pair_cols[1] - pair_cols[0]).groupby(level=[1,2]).mean())

'''covariogram over all sections'''
covariogram = lambda pair_cols, mu_overall: ((pair_cols - mu_overall)[0] * (pair_cols - mu_overall)[1]).groupby(level=[1,2]).mean()

'''expected covariogram over all sections'''
e_covariogram = lambda cov_section_all: cov_section_all.groupby(level=[1,2]).mean()

'''sobol (total order) sensitivity measure equivalent evaluated over all sections'''
sobol_eq = lambda gamma, ecov, variance: ((gamma + ecov) / variance).loc[:,1]

# ivars function
def ivars(variogram_array, scale, delta_h):
    '''generate IVARS'''
    num_h  = len(variogram_value.index.levels[-1].to_list())
    x_bench= np.arange(start=0, stop=delta_h*(num_h+1), step=delta_h)
    x_int  = np.arange(start=0, stop=(scale*10+1)/10, step=delta_h)

    # calculate interpolated values for both x (h) and y (variogram)
    if x_int[-1] < scale:
        x_int.append(scale)
    y_bench= [0] + variogram_array.to_list()

    y_int  = np.interp(x=x_int, xp=x_bench, fp=y_bench)
    
    # for loop for each step size to caluclate the area
    ivars = 0
    for i in range(len(x_int)-1):
        ivars += 0.5*(y_int[i+1] + y_int[i]) * (x_int[i+1] - x_int[i])

    return ivars

# alias
idx = pd.IndexSlice

In [4]:
delta_h = 0.1
rng = np.random.default_rng(seed=100)
star_centres = rng.random((10, 3))
star_points  = star_vars(star_centres, delta_h=delta_h, parameters=['x1', 'x2', 'x3'], rettype='DataFrame')

In [5]:
# bounds = {'ub':[3,4,5], 'lb':[-3,-4,-5]}
# star_points_scaled = scale(star_points, bounds)

In [6]:
# df = apply_unique(ishigami, star_points_scaled, axis=1)
df = apply_unique(ishigami, star_points, axis=1)
df.index.names=['centre', 'param', 'points']
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,0,1,2,ishigami
centre,param,points,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0,x1,0,0.034982,0.596554,0.288863,2.244282
0,x1,1,0.134982,0.596554,0.288863,2.343915
0,x1,2,0.234982,0.596554,0.288863,2.442202
0,x1,3,0.334982,0.596554,0.288863,2.538162
0,x1,4,0.434982,0.596554,0.288863,2.630836
...,...,...,...,...,...,...
9,x3,5,0.408518,0.389765,0.531648,1.409483
9,x3,6,0.408518,0.389765,0.631648,1.411058
9,x3,7,0.408518,0.389765,0.731648,1.413588
9,x3,8,0.408518,0.389765,0.831648,1.417398


In [7]:
# getting the paired values of each section based on `h`
pair_df = df[ishigami.__name__].groupby(level=[0,1]).apply(section_df)
pair_df.index.names = ['centre', 'param', 'h', 'pair_ind']
pair_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,0,1
centre,param,h,pair_ind,Unnamed: 4_level_1,Unnamed: 5_level_1
0,x1,1,"(0, 1)",2.244282,2.343915
0,x1,1,"(1, 2)",2.343915,2.442202
0,x1,1,"(2, 3)",2.442202,2.538162
0,x1,1,"(3, 4)",2.538162,2.630836
0,x1,1,"(4, 5)",2.630836,2.719298
...,...,...,...,...,...
9,x3,7,"(1, 8)",1.407902,1.417398
9,x3,7,"(2, 9)",1.407953,1.422860
9,x3,8,"(0, 8)",1.407896,1.417398
9,x3,8,"(1, 9)",1.407902,1.422860


In [8]:
# mu_star calculation
mu_star_df = df[ishigami.__name__].groupby(level=[0,1]).mean()
mu_star_df.index.names = ['centre', 'param']
mu_star_df.unstack(level=1)

param,x1,x2,x3
centre,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,2.656646,2.885317,2.95948
1,5.243696,2.068463,4.830351
2,4.864449,2.432892,5.084556
3,5.330994,2.254328,5.028727
4,2.554175,2.655503,2.704976
5,5.378425,2.651379,5.443588
6,2.075819,2.862646,2.280281
7,1.513483,2.958328,1.866293
8,4.072956,2.926255,4.365968
9,1.435236,2.505585,1.411486


In [9]:
# overall mu (mean) of the unique evaluated function values over all stars points
mu_overall = df[ishigami.__name__].unique().mean()
mu_overall

3.2181307939105364

In [10]:
# overall var (variance) of the unique evaluated function values over all stars points
var_overall = df[ishigami.__name__].unique().var(ddof=1)
var_overall

2.625633323232234

In [11]:
# sectional covariogram calculation - content matches MATLAB code style!!
cov_section_all = cov_section(pair_df, mu_star_df)
cov_section_all.unstack(level=1)

Unnamed: 0_level_0,param,x1,x2,x3
centre,h,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,1,0.048396,2.130395,0.000085
0,2,0.032084,1.415033,0.000045
0,3,0.013009,0.545250,0.000009
0,4,-0.008655,-0.447458,-0.000026
0,5,-0.032701,-1.525340,-0.000059
...,...,...,...,...
9,5,-0.033542,-1.506603,-0.000010
9,6,-0.060431,-2.610690,-0.000016
9,7,-0.089252,-3.710385,-0.000023
9,8,-0.119703,-4.758991,-0.000031


In [12]:
# variogram calculation
variogram_value = variogram(pair_df)
variogram_value.unstack(level=0)

param,x1,x2,x3
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.003723,0.157006,9e-06
2,0.014996,0.643154,3e-05
3,0.033887,1.467836,5.5e-05
4,0.060336,2.620288,8.1e-05
5,0.094159,4.06793,0.00011
6,0.135042,5.756585,0.000144
7,0.182547,7.612715,0.000191
8,0.23612,9.547591,0.000259
9,0.295096,11.463071,0.000365


In [13]:
# morris calculation
morris_values = morris_eq(pair_df)
morris_values[0].unstack(level=0)

param,x1,x2,x3
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.085347,0.531766,0.002697
2,0.171708,1.090234,0.004997
3,0.258653,1.664431,0.00707
4,0.345745,2.242759,0.009086
5,0.432546,2.813301,0.011216
6,0.518617,3.364139,0.013629
7,0.603523,3.883672,0.016496
8,0.686833,4.360934,0.019987
9,0.768126,4.785892,0.024272


In [14]:
morris_values[1].unstack(level=0)

param,x1,x2,x3
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.085347,0.531766,0.002697
2,0.171708,1.090234,0.004997
3,0.258653,1.664431,0.00707
4,0.345745,2.242759,0.009086
5,0.432546,2.813301,0.011216
6,0.518617,3.364139,0.013629
7,0.603523,3.883672,0.016496
8,0.686833,4.360934,0.019987
9,0.768126,4.785892,0.024272


In [15]:
# overall covariogram calculation
covariogram_value = covariogram(pair_df, mu_overall)
covariogram_value.unstack(level=0)

param,x1,x2,x3
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,2.532258,2.555273,2.193087
2,2.517009,1.886561,2.192966
3,2.49801,1.042883,2.192949
4,2.475428,0.054558,2.193032
5,2.449472,-1.04133,2.193213
6,2.420388,-2.202352,2.193492
7,2.388463,-3.382391,2.193875
8,2.354013,-4.533451,2.19437
9,2.317387,-5.607577,2.194989


In [16]:
# expected value of the overall covariogram calculation
e_covariogram_value = e_covariogram(cov_section_all)
e_covariogram_value.unstack(level=0)

param,x1,x2,x3
h,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.048206,2.064049,4.9e-05
2,0.03195,1.368743,2.6e-05
3,0.012943,0.524264,5e-06
4,-0.008639,-0.438263,-1.5e-05
5,-0.032586,-1.481585,-3.4e-05
6,-0.058656,-2.56376,-5.5e-05
7,-0.086574,-3.639797,-7.8e-05
8,-0.116039,-4.663456,-0.000104
9,-0.146724,-5.589107,-0.000136


In [17]:
# sobol calculation
sobol_value = sobol_eq(variogram_value, e_covariogram_value, var_overall)
sobol_value

param
x1    0.019777
x2    0.845912
x3    0.000022
dtype: float64

In [18]:
# IVARS calculation
ivars_values = [0.1, 0.3, 0.5]
ivars_df = pd.DataFrame({scale: variogram_value.groupby(level=0).apply(ivars, scale=scale, delta_h=delta_h) \
                      for scale in ivars_values})
ivars_df.T

param,x1,x2,x3
0.1,0.000186,0.00785,4.686609e-07
0.3,0.003566,0.153408,6.656532e-06
0.5,0.016002,0.692225,2.297963e-05
