In [1]:
import numpy as np
from typing import Tuple

In [2]:
def symlhs(sp:int, params:int, seed:int=None, criterion:str='maximin', iterations:int=10) -> np.ndarray:
    '''
    Description:
    ------------
    This function generates symmetrical LHS of `sp`
    datapoints in the `params`-dimensional hypercube
    of [0,1] developed based on [1].
    
    
    Arguments:
    ----------
    :param sp: the number of sampling points
    :type sp: int, np.int32, np.int64
    :param params: the number of parameters/variables/factors
    :type params: int, np.int32, np.int64
    :param seed: the seed number for randomization
    :type seed: int, np.int32, np.int64, defaults to `None`
    :param criterion: method for evaluation of the generated
                      sampled array, options are `maximin` 
                      and `correlation`
    :type criterion: str, defaults to maximin
    :param iterations: number of iterations to get the optimal
                       sampled array
    :type iterations: int, np.int32, np.int64
    
    
    Returns:
    --------
    :return symlhs_sample: the returned symmetrical LHS sampled array
    :rtype symlhs_sample: np.ndarray
    
    
    Contributors:
    -------------
    Sheikholeslami, Razi, (2017): code in MATLAB(c)
    Razavi, Saman, (2017): supervision, code in MATLAB(c)
    Keshavarz, Kasra, (2021): code in Python 3
    Matott, Shawn, (2019): code in C/++
    '''
    
    # set the seed number
    if seed:
        np.random.seed(seed)
    
    # Check the inputs and raise appropriate exceptions
    msg_crt = ("'{}' is not defined; available options: 'maximin', 'correlation'")
    if criterion not in ['maximin', 'correlation']:
        raise ValueError(msg_crt.format(criterion))
    
    
    # Check the criterion
    if criterion == 'maximin':
        best_sample = _symlhs_sampled(sp, params)
        best_sample_cost = _get_min_distance(best_sample, k=3)
        
        for it in range(iterations):
            new_sample = _symlhs_sampled(sp, params)
            new_sample_cost = _get_min_distance(new_sample)
            
            # check the cost function value
            if new_sample_cost > best_sample_cost:
                best_sample = new_sample
                best_sample_cost = new_sample_cost
        
        symlhs_sample_maximin = best_sample
        
        return symlhs_sample_maximin


    elif criterion == 'correlation':
        best_sample = _symlhs_sampled(sp, params)
        best_sample_cost = _get_corr(best_sample)
        
        for it in range(iterations):
            new_sample = _symlhs_sampled(sp, params)
            new_sample_cost = _get_corr(new_sample)
            
            # check the cost function value
            if new_sample_cost < best_sample_cost:
                best_sample = new_sample
                best_sample_cost = new_sample_cost
        
        symlhs_sample_correl = best_sample

        return symlhs_sample_correl


def _symlhs_sampled(sp:int, params:int, seed=None) -> np.ndarray:
    '''
    Description:
    ------------
    This function returns a symmetrical LHS sample
    
    
    Arguments:
    ----------
    :param sp: the number of sampling points
    :type sp: int, np.int32, np.int64
    :param params: the number of parameters/variables/factors
    :type params: int, np.int32, np.int64
    
    
    Returns:
    --------
    :return symlhs_sample: the returned sample
    :rtype symlhs_sample: np.ndarray
    
    '''
    
    if seed:
        np.random.seed(seed)
    
    # preparing the array - probably python list is more efficient
    # while being propagated in each loop or within a comprehension
    # list
    
    symlhs = np.ones((sp, params))

    if sp % params == 0:
        start = 0
    else:
        start = 1
        symlhs[0,:] = (sp+1)/2


    for i in np.arange(start, sp, 2):
        symlhs[i,:] = _perm_intv(1, sp, params-1)
        for c in range(symlhs.shape[1]):
            while np.unique(symlhs[0:i+1,c]).size < (i+1):
                symlhs[i,c] = _perm_intv(1, sp, 0)
        symlhs[i+1,:] = sp+1-symlhs[i,:]
    
    symlhs_sample = np.random.uniform(low=symlhs-1, high=symlhs)/sp
    
#     DEBUG
#     print(symlhs)
#     print('-----')
#     print(symlhs_sample)
#     END DEBUG
    
    return symlhs_sample


def _perm_intv(lb:int, ub:int, slices:int, seed:int=None) -> np.ndarray:
    '''
    Description:
    ------------
    A simple random sampling given the lower and upper bounds,
    without permutation, and amongst the integers in the interval


    Arguments:
    ----------
    :param lb: lower bound of the sequence
    :type lb: one of int, np.int32, np.int64
    :param ub: upper bound of the sequence
    :type ub: one of int, np.int32, np.int64
    :param slices: the number of slices
    :type slices: one of int, np.int32, np.int64


    Returns:
    --------
    :return perm: the sampled np.array
    :type perm: np.array


    Contributors:
    -------------
    Sheikholeslami, Razi, (2017): algorithm, code in MATLAB (c)
    Razavi, Saman, (2017): supervision
    Keshavarz, Kasra, (2021): code in Python 3
    Matott, Shawn, (2019): code in C/++
    '''

    # define the randomization seed number
    if seed:
        np.random.seed(seed)

    # a simple sampling without permutation algorithm
    length = np.abs(ub-lb)+1
    perm   = np.arange(start=lb, stop=ub+1, step=1)
    for k in range(2, length+1):
        index1 = np.int(np.ceil(np.random.rand() * k))
        index2 = perm[k-1]
        perm[k-1] = perm[index1-1]
        perm[index1-1] = index2
    perm = perm[0:slices+1]
    
    # DEBUG
    # print('perm is:')
    # print(perm)
    # END DEBUG

    return perm


def _knn(arr1:np.ndarray, arr2:np.ndarray, k:int) -> Tuple[np.ndarray, np.ndarray]:
    
    '''
    Description:
    ------------
    A simple KNN ML algorithm to find the minimum Euclidean distance
    
    
    Arguments:
    ----------
    :param arr1: the first array of data
    :type arr1: np.array, `n` rows and `d` columns
    :param arr2: the second array of data
    :type arr2: np.array, `m` rows and `d` columns
    :param k: the number of neighbors
    :type k: int, np.int32, np.int64
    
    
    Returns:
    --------
    :return distances: Euclidean distances between `arr1` and `arr2` points
    :rtype distances: np.array
    :return indices: the indices of the distances between `arr1` and `arr2` 
                     points
    :rtype indices: np.array
    '''
    
    # calculating the distance between points
    distances = -2 * arr1@arr2.T + np.sum(arr2**2,axis=1) + \
                     np.sum(arr1**2,axis=1)[:, np.newaxis]
    
    # taking into account the floating point discrepancies 
    distances[distances < 0] = 0
    distances = distances**.5
    indices = np.argsort(distances, 0)
    distances = np.sort(distances,0)
    
    # reshaping the arrays
    indices = indices[0:k, : ].T
    
#     DEBUG
#     print(distances)
#     print(distances.shape)
#     END DEBUG
    
    distances = distances[0:k, : ].T.flatten().reshape(arr1.shape[0], k)
    
    return indices, distances


def _get_min_distance(arr:np.ndarray, k:int=3) -> float:
    '''
    Description:
    ------------
    Calculates the minimum Euclidean distance between sample points as a measure
    of sparsity of the sampling space
    
    
    Arguments:
    ----------
    :param arr: the input array of any size
    :type arr: np.array
    
    
    Returns:
    --------
    :return min_distance: the minimum distance calculated
    :rtype min_distance: np.float
    '''
    
    idx, distance = _knn(arr, arr, k) # idx index start from 0
    min_distance = np.min(distance[:, 1])
    
    return min_distance


def _get_corr(arr:np.ndarray) -> float:
    '''
    Description:
    ------------
    Calculates the correlation between the sample columns and
    reports the sum of squared correlation values.
    
    
    Arguments:
    ----------
    :param arr: the input array of any size
    :type arr: np.array
    
    
    Returns:
    --------
    :return sq_corr: sum of the squared correlation values
    :rtype sq_corr: np.float
    '''

    return sum(sum(np.triu(np.corrcoef(arr, rowvar=False)**2, k=1)))

In [3]:
symlhs(sp=20, params=4, seed=5, criterion='maximin', iterations=10)

array([[0.56309969, 0.02507638, 0.7259338 , 0.09834304],
       [0.42185017, 0.99939451, 0.27106745, 0.90559355],
       [0.93110423, 0.42892728, 0.69021433, 0.12889392],
       [0.09117936, 0.57774758, 0.34211922, 0.87550265],
       [0.77623621, 0.82591261, 0.88952078, 0.72324175],
       [0.20962733, 0.18382384, 0.1449957 , 0.25689558],
       [0.71246879, 0.61998818, 0.15733116, 0.43825525],
       [0.28774011, 0.37347709, 0.84646066, 0.59511424],
       [0.14091214, 0.23307661, 0.39792303, 0.83825532],
       [0.89294551, 0.77434105, 0.64695105, 0.18950233],
       [0.62523628, 0.14355961, 0.05706728, 0.22025004],
       [0.3782283 , 0.89107536, 0.94136114, 0.76850585],
       [0.67777999, 0.27204034, 0.21308862, 0.0069396 ],
       [0.32675359, 0.73463862, 0.78563905, 0.99018077],
       [0.53681182, 0.07248267, 0.50746166, 0.6769483 ],
       [0.4701807 , 0.90074473, 0.4956733 , 0.33060316],
       [0.991902  , 0.52517065, 0.40567298, 0.51395242],
       [0.03321939, 0.496157  ,