In [None]:
import numpy as np
from scipy.interpolate import CubicSpline
import matplotlib.pyplot as plt
from scipy.interpolate import BSpline
from selectinf.Simulation.spline import b_spline
from selectinf.Simulation.spline_instance import generate_gaussian_instance_nonlinear
from selectinf.group_lasso_query import (group_lasso,
                                         split_group_lasso)
import regreg.api as rr
from selectinf.base import selected_targets
from selectinf.base import restricted_estimator
import scipy.stats

from selectinf.Simulation.test_group_lasso_simulation import (calculate_F1_score,
                                                              naive_inference,
                                                              randomization_inference_fast,
                                                              data_splitting)

In [None]:
a = np.zeros(5)
b = np.array([2,1,0])
a[b] = 1
a

In [None]:
X = np.random.uniform(size=(2000,10), low=-1, high=1)
Z = np.random.normal(size=(2000,0), scale=5)
bs = b_spline(data_nl=X[:,0:10], nknots=4, degree=1, data_l=Z)

In [None]:
bs.construct_splines(equally_spaced=False,use_quantiles=True)
design = bs.get_spline_data()
design.shape

In [None]:
X = bs.get_spline_data()
n = X.shape[0]
X -= X.mean(0)[None, :]
scaling = X.std(0)
X /= scaling[None, :]

In [None]:
np.linalg.inv((X.T @ X) / n + 0*np.eye(20))#((X.T @ X) / n)#np.linalg.inv((X.T @ X) / n)

In [None]:
np.array(range(0,10))

In [None]:
design, Y, Y_mean, groups, active_flag = \
    generate_gaussian_instance_nonlinear(n=2000, p_nl=10, p_l=50,
                                         nknots = 10, degree = 3,
                                         center=False, scale=True)

In [None]:
design.shape

In [None]:
def randomization_inference_spline(design, Y, n, p, Y_mean, groups,
                                   randomizer_scale=1.,
                                   weight_frac=1.25, level=0.9, ridge_term=1.):
    sigma_ = np.std(Y)
    if n > p:
        dispersion = np.linalg.norm(Y - design.dot(np.linalg.pinv(design).dot(Y))) ** 2 / (n - p)
    else:
        dispersion = sigma_ ** 2

    sigma_ = np.sqrt(dispersion)

    #weights = dict([(i, 0.5) for i in np.unique(groups)])
    weights = dict([(i, weight_frac * sigma_ * np.sqrt(2 * np.log(p))) for i in np.unique(groups)])

    conv = group_lasso.gaussian(X=design,
                                Y=Y,
                                groups=groups,
                                weights=weights,
                                useJacobian=True,
                                ridge_term=ridge_term)

    signs, _ = conv.fit()
    nonzero = (signs != 0)

    # print("MLE selection:", conv._ordered_groups)

    def solve_target_restricted():
        X_E = design[:,nonzero]
        _beta_unpenalized = np.linalg.inv(X_E.T @ X_E) @ X_E.T @ Y_mean
        """loglike = rr.glm.gaussian(design, Y_mean)
        # For LASSO, this is the OLS solution on X_{E,U}
        _beta_unpenalized = restricted_estimator(loglike,
                                                 nonzero)"""
        return _beta_unpenalized

    if nonzero.sum() > 0:
        print("MLE |E|:", nonzero.sum())

        conv.setup_inference(dispersion=dispersion)

        target_spec = selected_targets(conv.loglike,
                                       conv.observed_soln,
                                       dispersion=dispersion)

        result,_ = conv.inference(target_spec,
                                method='selective_MLE',
                                level=level)

        pval = result['pvalue']
        intervals = np.asarray(result[['lower_confidence',
                                       'upper_confidence']])

        beta_target = solve_target_restricted()

        coverage = (beta_target > intervals[:, 0]) * (beta_target < intervals[:, 1])
        
        
        np.set_printoptions(precision=3, suppress=True)
        print(beta_target)
        print(intervals)

        return coverage, (intervals[:, 1] - intervals[:, 0]), beta_target, \
               nonzero, intervals[:, 0], intervals[:, 1], target_spec.cov_target
    return None, None, None, None, None, None

In [None]:
n, p = design.shape
noselection = False  # flag for a certain method having an empty selected set

if not noselection:
    # MLE inference
    coverage, length, beta_target, nonzero, conf_low, conf_up, cov_target = \
        randomization_inference_spline(design=design, Y=Y, n=n, p=p, Y_mean=Y_mean,
                                       groups=groups,
                                       weight_frac=1.25, level=0.9, ridge_term=0.)
    # print(MLE_runtime)
    noselection = (coverage is None)

In [None]:
np.mean(coverage)

In [None]:
design[:,nonzero] @ beta_target

In [None]:
Y_mean

In [None]:
Y