In [1]:
import numpy as np
import scipy as sc
import math
from bnb import gen_synthetic
import pandas as pd

In [2]:
y = pd.read_csv("../../hfs/data/tourism_y.csv")
y = y['V1'].to_numpy()

x = pd.read_csv("../../hfs/data/tourism_x.csv")
x = x.to_numpy()

group = pd.read_csv("../../hfs/data/tourism_group.csv")
group_indices = []
for column in group.columns:
    list1 = group[column].astype(int).tolist()
    group_indices.append(list1)

ols_G = pd.read_csv("../../hfs/data/tourism_ols_G.csv")
ols_G = ols_G['x'].to_numpy()

S = pd.read_csv("../../hfs/data/tourism_S.csv")
S = S.to_numpy()

W = np.identity(S.shape[0])

In [3]:
from IPython.display import display
display(y.shape)
display(x.shape)
display(len(group_indices))
display(ols_G.shape)
display(S.shape)
display(W.shape)

(111,)

(111, 8436)

111

(8436,)

(111, 76)

(111, 111)

In [17]:
"""This cell demonstrates how to run the BnB algorithm.

The algorithm optimizes the following objective function:

    0.5 * (y-XB)^T * W^{-1} * (y-XB) + lambda_0 * G(B) + lambda_2 ||B||^2
    
    s.t. kronecker(t(S), I_nb) * B = vec(I_nb)
    
where G(B) counts the number of nonzero groups (as defined in the paper).

"""
from bnb import BNBTree

"""
First, we create a BnBTree object and initialize it with the following:
   x: Data matrix
   y: Response vector
   group_indices: A list of size num_groups. The ith element contains the indices of the predictors in group i.
"""

tree = BNBTree(x, y, group_indices=group_indices, W=W, S=S)

"""
Call tree.solve(...) to run BnB for a given set of reguralization parameters. tree.solve accepts:
    lambda_0: The regularization parameter for the L0 norm.
    lambda_2: The regularization parameter for the L2 norm.
    m: The value of the Big M.
    warm_start: The initial solution.
"""

solver_output = tree.solve(lambda_0=100000, lambda_2=1e-4, m=3, warm_start=ols_G, verbose=True)

used a warm start
initializing took 62.233410120010376 seconds
0 levels of depth used
l: 0, (d: 2603053.6667493144, p: 2603053.6667493144), u: 11100001.401360983, g: 0.765490690259719, t: 194.59952688217163 s
l: 1, (d: 2670128.600558601, p: 2670128.600558601), u: 11000001.379567603, g: 0.7572610667560151, t: 426.70845222473145 s


GurobiError: Unable to retrieve attribute 'x'

In [None]:
print(solver_output)
# solver_output.beta contains the solution.

In [11]:
solver_output.beta[:1000]

array([ 2.55702475e-02,  2.55702554e-02,  4.27375449e-02,  4.27375061e-02,
        3.12070048e-02,  1.38730505e-02,  1.38730539e-02,  1.38730849e-02,
        1.38730454e-02,  1.31778253e-02,  1.31778799e-02,  1.31779078e-02,
        1.31779214e-02,  3.12070405e-02,  1.14184131e-02,  1.14184469e-02,
        1.14184132e-02,  2.12425894e-02,  1.43320323e-02,  1.43319845e-02,
        1.43320113e-02,  6.83452819e-03,  6.83442566e-03,  6.83440495e-03,
        6.83440884e-03,  6.83440333e-03,  6.83446389e-03,  1.83940378e-03,
        1.83943983e-03,  1.83938061e-03,  1.83937585e-03,  1.83934684e-03,
        1.83932572e-03,  1.83946192e-03,  1.83935615e-03,  1.55137183e-02,
        1.55137065e-02,  1.55136496e-02,  7.80617915e-03,  7.80621496e-03,
        7.80617658e-03,  7.80620831e-03,  1.08173127e-02,  1.08173377e-02,
        1.08172418e-02,  1.29658833e-02,  1.29658495e-02,  1.32463532e-02,
        1.32463981e-02,  1.32463555e-02,  1.17261880e-02,  1.17261982e-02,
        1.17261575e-02,  

In [12]:
ols_G[:50]

array([0.01131092, 0.01131092, 0.01131092, 0.01131092, 0.01696638,
       0.00678655, 0.00678655, 0.00678655, 0.00678655, 0.00678655,
       0.00678655, 0.00678655, 0.00678655, 0.01696638, 0.00881798,
       0.00881798, 0.00881798, 0.01763595, 0.00881798, 0.00881798,
       0.00881798, 0.00503884, 0.00503884, 0.00503884, 0.00503884,
       0.00503884, 0.00503884, 0.0039191 , 0.0039191 , 0.0039191 ,
       0.0039191 , 0.0039191 , 0.0039191 , 0.0039191 , 0.0039191 ,
       0.01055052, 0.01055052, 0.01055052, 0.00844042, 0.00844042,
       0.00844042, 0.00844042, 0.01055052, 0.01055052, 0.01055052,
       0.01406736, 0.01406736, 0.01055052, 0.01055052, 0.01055052])

In [13]:
np.savetxt("../../hfs/data/tourism_beta.csv", solver_output.beta, delimiter=",")

In [28]:
warm_start = ols_G
num_groups = len(group_indices)
support = np.nonzero(warm_start)[0]
z_supp = set()
for feature_index in support:
    for group_index in range(num_groups):
         if feature_index in group_indices[group_index]:
                z_supp.add(group_index)
                break
z_support = z_supp

In [29]:
activeset = z_support
group_indices_restricted = [group_indices[index] for index in activeset]
group_indices_restricted_reset_indices = []
start_index = 0
for i in range(len(group_indices_restricted)):
    group_indices_restricted_reset_indices.append(list(range(start_index, start_index+len(group_indices_restricted[i]))))
    start_index += len(group_indices_restricted[i])
active_coordinate_indices = []
for group_index in activeset:
    active_coordinate_indices += group_indices[group_index]

In [31]:
l0 = 100
l2 = 0.0001
m = 2
nb = S.shape[1]
n = S.shape[0]
tS = S.transpose()
I = np.identity(nb)
kron_tSI = np.kron(tS, I)

x = x[:, active_coordinate_indices]
y = y
group_indices = group_indices_restricted_reset_indices
W = np.identity(n)

kron_tSI = kron_tSI[:, active_coordinate_indices]
from gurobipy import Model, GRB, QuadExpr, MQuadExpr, LinExpr, quicksum
model = Model() 
n = x.shape[0]
p = x.shape[1] 
nb = int(math.sqrt(kron_tSI.shape[0]))  # number of bottom-level series
group_num = len(group_indices)

beta = model.addMVar(shape=(p, ), vtype=GRB.CONTINUOUS,
                     name=['B' + str(feature_index) for feature_index in range(p)],
                     ub=np.repeat(m, p), lb=np.repeat(-m, p))
z = model.addMVar(shape=(group_num, ), vtype=GRB.CONTINUOUS,
                      name=['z' + str(group_index) for group_index in range(group_num)],
                      ub=np.repeat(1, group_num), lb=np.repeat(1, group_num))
r = model.addMVar(shape=(n, ), vtype=GRB.CONTINUOUS,
                     name=['r' + str(sample_index) for sample_index in range(n)],
                     ub=GRB.INFINITY, lb=-GRB.INFINITY)
model.update()

model.setObjective(0.5*r.T@W@r + l0*quicksum(z) + l2*beta.T@beta, GRB.MINIMIZE)
model.addConstr(r == y - x@beta)
for group_index in range(group_num):
    l2_sq = [beta[feature_index]*beta[feature_index] for feature_index in group_indices[group_index]]
    model.addConstr(quicksum(l2_sq) <= m * m * z[group_index]*z[group_index])

I = np.identity(nb, dtype = float)
vI = I.reshape((nb*nb,))
model.addConstr(vI == kron_tSI@beta) 
model.update()
model.setParam('OutputFlag', False)

model.optimize()


Interrupt request received


In [32]:
output_beta = np.zeros(beta.shape[0])
output_z = np.zeros(z.shape[0])

In [33]:
beta[1].X

array(0.02571648)

In [34]:
for i in range(beta.shape[0]):
    output_beta[i] = beta[i].x
for group_index in range(group_num):
    output_z[group_index] = z[group_index].x

In [35]:
output_beta

array([ 0.02571658,  0.02571648,  0.04309379, ..., -0.21736572,
       -0.21736572,  0.78263428])

In [36]:
output_z

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [37]:
model.ObjVal

11100.853139086823