# universal estimator

### Exp-1: Show that the error decreases when the parameter search space gets smaller.

**Research question**:
Is the search space size of the parameter $d$ of a univariate distribution $f(d)$ has an affect on the learning and the prediction errors?

Let $f(d)$ be a one dimensional function, that returns a samples vector drawn from a univariate distribution (e.g., log-normal).

1. Generate a (256) samples vector using e.g: data = f(d=0.92)
2. Write a function: $estimator(f, data)$ which learns the parameter $d$ of $f$ from the $data$.

> - Init: search_space = (0,1)
> - Iterate (stop when the search-space size reaches some fraction, e.g. 1/128, of the initial search space)
>   1. Generate synthetic data-sets ( train / test ) using $f$ (within the parameter search space)
>   2. Fit a DNN model, error = | pred-set - test-set |
>   3. Predict the parameter $d\_pred$ on the input data
>   4. Narrow the search space:
>           pivot = d_pred
>           margin = 2 * std(error)
>           search_space = ( pivot - margin, pivot  + margin )

3. Plot a graph:
   - $x$: size of search space
   - $y$: error
   - show that the size of the error converges to a lower limit (asymptotically)

In [1]:
# import library
%run lib.ipynb
np.set_printoptions(precision=4)
reset_random_state(SEED)

In [2]:
def next_config(param_search_space):
    """
    return a random (uniform)parameter within param_search_space
    """
    low = param_search_space[0]
    high = param_search_space[1]
    if None != RS:
        return RS.uniform(low, high, size=1)[0]
    return np.random.uniform(low, high, size=1)[0]

def estimator_exp_1(f, data, d_true,
                    initial_param_search_space=np.array([0.0,1.0])):
    """
    Learn parameters of f, from the data.
    Arguments:
        - f: one dimensional function that gives the PMF of a univariate distribution.
        - data: samplesarray generated using f.
    """

    # experiment result: [ (size of search space, test_MAE) ]
    res = {
        'search_space_width': [],
        'test_MAE': [],
        'd_pred_MAE': [],
        'd_pred_best_MAE': [],
        'd_pred': 0,
        'd_pred_best': 0,
    }
   
    # number of samples in data
    M = len(data)
    N = 1000

    d_pred_best = -1
    data_MAE_best = -1
    
    # Iterate
    param_search_space = initial_param_search_space
    while True:
        
        # 1. Generate synthetic learning data-sets (train/test) using f (within param_search_space)
        # -----------------------------------------------------------------------------------------
        print()
        print(f'generating data (M={M}, N={N}) param_search_space: {param_search_space} ...', end=' ')
        raw, H, params = generate_data(N=N, 
                                       M=M, 
                                       sample=f, 
                                       nextConfig=lambda: next_config(param_search_space),
                                       density=False, 
                                       apply_log_scale=False)

        H_train, H_test, train_params, test_params = train_test_split(H, 
                                                                      params, 
                                                                      test_size=0.25, 
                                                                      random_state=RANDOM_STATE)
        print(f'histogram shape: {H_train.shape}')

        # 2. Fit a DNN model to train-set and predict on test-set, error = | pred-set - test-set |
        # -----------------------------------------------------------------------------------------
        
        print(f'fitting dnn model ...', end=' ')
        start_time = time.time()
        dnn_model, history = dnn_fit(X_train=H_train, y_train=train_params)
        train_time = time.time() - start_time

        test_pred = dnn_model.predict(H_test).flatten()
        test_MAE = mean_absolute_error(test_params, test_pred)
        print(f'test_MAE: {test_MAE:.4f}', end=', ')

        # 3. Predict the parameter d on the input data (d_pred)
        # -----------------------------------------------------------------------------------------

        nbins = H_train.shape[1]
        h_data = np.histogram(data, bins=nbins, range=(0,nbins), density=False)[0]
        h_data = np.reshape(h_data, (1, -1))
        d_pred = dnn_model.predict(h_data).flatten()[0]
        print(f'd_pred: {d_pred:.4f}, abs(d_pred - d_true): {abs(d_pred - d_true):.4f}')
        
        #lilo
        data_pred = f(d_pred, size=M)
        data_MAE = mean_absolute_error(data_pred, data)
#         data_pred, _, _ = generate_data(N=10, test_STD_abs_error
#                                        M=M, 
#                                        sample=f, 
#                                        nextConfig=d_pred,
#                                        density=False, 
#                                        apply_log_scale=False)
#         repeat = 100
#         data_MAE = np.mean(mean_absolute_error(data_pred, np.repeat(data, repeat).reshape(repeat,-1)))
        print(f'data_MAE: {data_MAE:.4f}', end= ', ')
        if data_MAE_best < 0 or data_MAE < data_MAE_best:
            data_MAE_best = data_MAE
            d_pred_best = d_pred
        print(f'data_MAE_best: {data_MAE_best:.4f}', end= ', ')
        print(f'd_pred_best: {d_pred_best:.4f}')
        
        # save results
        param_search_space_width = param_search_space[1] - param_search_space[0]
        res['search_space_width'].append(param_search_space_width)
        res['test_MAE'].append(test_MAE)
        res['d_pred_MAE'].append(abs(d_pred - d_true))
        res['d_pred_best_MAE'].append(abs(d_pred_best - d_true))
        res['d_pred'] = d_pred
        res['d_pred_best'] = d_pred_best
        
        # 4. Narrow the search space:
        # -----------------------------------------------------------------------------------------
        #  - margin = MAE + 2 * std(error)
        #  - search_space = [ d_pred - margin, d_pred + margin]
        
        #lilo:
        test_STD_abs_error = np.std( np.abs(test_params, test_pred) )
#         margin_std_factor = 1
        margin_std_factor = 3
        margin = margin_std_factor * test_STD_abs_error
        #margin = test_STD_abs_error
        
        #pivot = d_pred_best
        #print(f'pivot=d_pred_best: {pivot:.4f}, margin={margin_std_factor}*test_STD_abs_error: {margin:.4f}')
        pivot = d_pred
        print(f'pivot=d_pred: {pivot:.4f}, margin={margin_std_factor}*test_STD_abs_error: {margin:.4f}')
        
        next_search_space = np.array([ 
            max(param_search_space[0], pivot - margin), 
            min(param_search_space[1], pivot + margin)])
#         next_search_space = np.array([ 
#             max(initial_param_search_space[0], pivot - margin), 
#             min(initial_param_search_space[1], pivot + margin)])
        
        if np.array_equal(param_search_space, next_search_space):
            # if no change in param_search_space, narrow by epsilon (from both sides)
            epsilon = 0.001
#             epsilon = test_STD_abs_error
#             epsilon = 0.1 * test_STD_abs_error
            print(f'no change in param_search_space. narrowing by epsilon: {epsilon:.4f}')
            param_search_space = np.array([param_search_space[0] + epsilon, param_search_space[1] - epsilon])
        else:
            count_no_change_in_search_space = 0
            param_search_space = next_search_space
        
        # stop condition ?
        # size of the search-space (high-low) is 1/128 of the original
        # -----------------------------------------------------------------------------------------
        
        initial_param_search_space_width = initial_param_search_space[1] - initial_param_search_space[0]
#         if param_search_space_width < 1/128 * initial_param_search_space_width:
        if param_search_space_width < 1/16 * initial_param_search_space_width:
            print('stop -----------------------------------------------------------------')
            print(f'param_search_space_width: {param_search_space_width:.4f}')
            print(f'test_MAE: {test_MAE:.4f}')
            print(f'd_pred: {d_pred:.4f}, abs(d_pred - d_true): {abs(d_pred - d_true):.4f}')
            break
            
    return res

### Plot helper

In [3]:
def plot_search_space_size_vs_error(res):

    param_search_space_width = res['search_space_width']
    test_MAE = res['test_MAE']
    d_pred_MAE = res['d_pred_MAE']
    d_pred_best_MAE = res['d_pred_best_MAE']
    d_pred = res['d_pred']
    d_pred_best = res['d_pred_best']
    
    title = f'search space vs. error \n\
    ( d_true={d_true}, d_pred: {d_pred:.4f}, d_pred_best: {d_pred_best:.4f} )'
    
    plt.figure(figsize=(5,5))
    plt.title(title)
    plt.xlabel('search space')
    plt.ylabel('error')

    x = param_search_space_width
    y = test_MAE
    plt.scatter(x, y, zorder=1, s=2, c='blue', label='test_MSE')
    plt.plot(x, y, zorder=2, c='blue')

    y = d_pred_MAE
    plt.scatter(x, y, zorder=1, s=2, c='orange', label='abs(d_pred - d_true)')
    plt.plot(x, y, zorder=2, c='orange')
    
    y = d_pred_best_MAE
    plt.scatter(x, y, zorder=1, s=2, c='red', label='abs(d_pred_best - d_true)')
    plt.plot(x, y, zorder=2, c='red')
    
    plt.legend()
    plt.show()

# plot_search_space_size_vs_error(res)

## Fit (lognormal)

In [None]:
from scipy import stats
from scipy.stats import lognorm

# reset_random_state(SEED)

# sample from lognormal
def sample_lognormal(config, size):
    return lognorm.rvs(s=config, size=size, random_state=RANDOM_STATE)

# Fit (lognormal)
# d_true = 0.92
# d_true = 0.85
# d_true = 0.66
# d_true = 0.25

for d_true in [0.92, 0.85, 0.66, 0.25]:
    print()
    print(f'param true value: {d_true}')
    data = sample_lognormal(config=d_true, size=256)
    res = estimator_exp_1(f=sample_lognormal, data=data, d_true=d_true)
    plot_search_space_size_vs_error(res)


param true value: 0.92

generating data (M=256, N=1000) param_search_space: [0. 1.] ... histogram shape: (750, 54)
fitting dnn model ... test_MAE: 0.0539, d_pred: 0.9804, abs(d_pred - d_true): 0.0604
data_MAE: 1.6302, data_MAE_best: 1.6302, d_pred_best: 0.9804
pivot=d_pred: 0.9804, margin=3*test_STD_abs_error: 0.8888

generating data (M=256, N=1000) param_search_space: [0.0915 1.    ] ... histogram shape: (750, 49)
fitting dnn model ... test_MAE: 0.0475, d_pred: 0.8721, abs(d_pred - d_true): 0.0479
data_MAE: 1.5366, data_MAE_best: 1.5366, d_pred_best: 0.8721
pivot=d_pred: 0.8721, margin=3*test_STD_abs_error: 0.8022
no change in param_search_space. narrowing by epsilon: 0.0010

generating data (M=256, N=1000) param_search_space: [0.0925 0.999 ] ... histogram shape: (750, 41)
fitting dnn model ... test_MAE: 0.0441, d_pred: 0.9793, abs(d_pred - d_true): 0.0593
data_MAE: 1.7444, data_MAE_best: 1.5366, d_pred_best: 0.8721
pivot=d_pred: 0.9793, margin=3*test_STD_abs_error: 0.8101

generatin