In [None]:
import numpy as np
from selection.algorithms.lasso import instance
from selection.algorithms.forward_step import forward_stepwise
np.random.seed(0)



## 10th step, snr=5

In [None]:
X, y, beta, active, sigma = instance(n=100, p=40, snr=5, rho=0.3)
n, p = X.shape
FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n))
for _ in range(10):
    FS.next()

FS.X.shape, FS.Y.shape

In [None]:
con = FS.constraints()
con.linear_part.shape

In [None]:
P = np.dot(FS.X[:,FS.variables[:-1]], np.linalg.pinv(FS.X[:,FS.variables[:-1]]))
R = np.identity(n) - P

In [None]:
def null_sample(sigma, R, PY):
    return sigma * np.dot(R, np.random.standard_normal(n)) + PY

In [None]:
def acceptance(con, sigma, R, PY):
    idx = 0
    while True:
        Z = null_sample(sigma, R, PY)
        if con(Z):
            break
        idx += 1
         
    return idx
PY = np.dot(P, FS.Y)
acceptance(con, sigma, R, PY)

In [None]:
[acceptance(con, sigma, R, PY) for _ in range(10)]

In [None]:
FS.variables

Let's make a function to see how many accept reject steps it takes.

In [None]:
del([X,y,FS,R,P,con,beta,active,sigma])

In [None]:
def howlong(n=100, p=40, snr=5, rho=0.3, maxstep=10):
    X, y, beta, active, sigma = instance(n=n, p=p, snr=snr, rho=rho)
    n, p = X.shape
    FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n))
    for _ in range(maxstep):
        FS.next()

    con = FS.constraints()
    P = np.dot(FS.X[:,FS.variables[:-1]], np.linalg.pinv(FS.X[:,FS.variables[:-1]]))
    R = np.identity(n) - P
    PY = np.dot(P, FS.Y)
    
    return acceptance(con, sigma, R, PY)

## snr=4

In [None]:
[howlong(snr=4) for _ in range(10)]

## p=60, snr=5

In [None]:
[howlong(snr=5, p=60) for _ in range(10)]

## p=60, snr=5, 12th step

In [None]:
[howlong(snr=5, p=60, maxstep=12) for _ in range(10)]

## p=80, snr=5

In [None]:
[howlong(snr=5, p=80) for _ in range(10)]

## p=80, snr=4

In [None]:
[howlong(snr=4, p=80) for _ in range(10)]

## p=200, snr=5

In [None]:
howlong(snr=5, p=200)

## p=200, snr=7, 8th step -- the first noise variable when selection works perfectly!

In [None]:
[howlong(snr=7, p=200, maxstep=8) for _ in range(10)]

In [None]:
## p=200, snr=7, 9th step

In [None]:
[howlong(snr=7, p=200, maxstep=9) for _ in range(10)]

## p=200, snr=7, 10th step

In [None]:
[howlong(snr=7, p=200, maxstep=10) for _ in range(10)]

## Data for Rob

In [None]:
n, p, snr, rho = 100, 200, 7, 0.3
np.random.seed(0)
X, y, beta, active, sigma = instance(n=n, p=p, snr=snr, rho=rho)
np.savetxt("X_100_200.csv", X, delimiter=',')
np.savetxt("y_100.csv", y, delimiter=',')
np.savetxt("sigma.csv", np.array([sigma]), delimiter=',')
n, p = X.shape
FS = forward_stepwise(X, y, covariance=sigma**2 * np.identity(n))
for _ in range(7):
    FS.next()

con = FS.constraints()
np.savetxt("A_step7.csv", con.linear_part, delimiter=',')
P = np.dot(FS.X[:,FS.variables], np.linalg.pinv(FS.X[:,FS.variables]))
R = np.identity(n) - P
PY = np.dot(P, FS.Y)
    
counts = np.array([acceptance(con, sigma, R, PY) for _ in range(200)])
print 'acceptance rate:', 1./np.mean(counts+1)

In [None]:
FS.next()
con = FS.constraints()
np.savetxt("A_step8.csv", con.linear_part, delimiter=',')

P = np.dot(FS.X[:,FS.variables], np.linalg.pinv(FS.X[:,FS.variables]))
R = np.identity(n) - P
PY = np.dot(P, FS.Y)
    
counts = np.array([acceptance(con, sigma, R, PY) for _ in range(200)])
print 'acceptance rate:', 1. / np.mean(counts+1)
np.savetxt("variables.csv", np.array(FS.variables)+1, delimiter=',')

In [None]:
FS.next()
con = FS.constraints()
np.savetxt("A_step9.csv", con.linear_part, delimiter=',')


P = np.dot(FS.X[:,FS.variables], np.linalg.pinv(FS.X[:,FS.variables]))
R = np.identity(n) - P
PY = np.dot(P, FS.Y)
    
counts = np.array([acceptance(con, sigma, R, PY) for _ in range(200)])
print 'acceptance rate:', 1. / np.mean(counts+1)
np.savetxt("variables.csv", np.array(FS.variables)+1, delimiter=',')

In [None]:
FS.next()
con = FS.constraints()
np.savetxt("A_step10.csv", con.linear_part, delimiter=',')


P = np.dot(FS.X[:,FS.variables], np.linalg.pinv(FS.X[:,FS.variables]))
R = np.identity(n) - P
PY = np.dot(P, FS.Y)
    
counts = np.array([acceptance(con, sigma, R, PY) for _ in range(200)])
print 'acceptance rate:', 1. / np.mean(counts+1)
np.savetxt("variables.csv", np.array(FS.variables)+1, delimiter=',')

In [None]:
FS.next()
con = FS.constraints()
np.savetxt("A_step11.csv", con.linear_part, delimiter=',')


P = np.dot(FS.X[:,FS.variables], np.linalg.pinv(FS.X[:,FS.variables]))
R = np.identity(n) - P
PY = np.dot(P, FS.Y)
    
counts = np.array([acceptance(con, sigma, R, PY) for _ in range(200)])
print 'acceptance rate:', 1. / np.mean(counts+1)
np.savetxt("variables.csv", np.array(FS.variables)+1, delimiter=',')

In [None]:
FS.next()
con = FS.constraints()
np.savetxt("A_step12.csv", con.linear_part, delimiter=',')


P = np.dot(FS.X[:,FS.variables], np.linalg.pinv(FS.X[:,FS.variables]))
R = np.identity(n) - P
PY = np.dot(P, FS.Y)
    
counts = np.array([acceptance(con, sigma, R, PY) for _ in range(200)])
print 'acceptance rate:', 1. / np.mean(counts+1)
np.savetxt("variables.csv", np.array(FS.variables)+1, delimiter=',')

## p=200, snr=7, 15th step

In [None]:
# really long time!
# [howlong(snr=7, p=200, maxstep=15) for _ in range(10)]