## Perform and pickle cross-validation

In [1]:
import numpy as np
import pickle
import warnings

from sparseRRR import elastic_rrr_cv_honest
from sklearn.exceptions import ConvergenceWarning

### Load data

In [2]:
data = pickle.load(open('../data/purkinje_sRRR.pickle', 'rb'))

X_linear, X_nonlinear, X_full, Y = data['Linear mix'], data['Nonlinear only'], data['Full mixing'], data['Locomotion']
cell_names = data['cell_names']
locomotion_names = data['locomotion_names']

print('Shape of X_linear:', X_linear.shape, 'Shape of X_nonlinear:', X_nonlinear.shape, 'Shape of X_full:', X_full.shape, 'Shape of Y:', Y.shape)

Shape of X_linear: (1816281, 91) Shape of X_nonlinear: (1816281, 91) Shape of X_full: (1816281, 91) Shape of Y: (1816281, 16)


### The main cross-validation setup

Not all timepoints as we have a lot!

In [3]:
slice = np.linspace(0, 1000, 1000, dtype=int) # time points to use

For fast usage and checking:

In [4]:
cvresults_rank=[]
l1_ratios = np.array([1])
alphas = np.concatenate((np.arange(.04,1.01,.1), np.arange(2,5)))
ranks = np.arange(1, Y.shape[1]+1)
cvresults_rank = {}
with warnings.catch_warnings():
    warnings.simplefilter("ignore", category=(ConvergenceWarning, RuntimeWarning))
    for r in range(1,4):
        cvresults_rank[r] = elastic_rrr_cv_honest(X_full[slice,:], Y[slice,:], rank=r, reps=1, folds=5, alphas=alphas, l1_ratios=l1_ratios, preprocess=True)

1..... Time: 0.0h  0m  2s
1..... Time: 0.0h  0m  3s
1..... Time: 0.0h  0m  5s


In [None]:
# rank : 2 (index is from dict)
# 1: relaxed r2 results (0: r2 results, 2: corr results, 3: relaxed corr results,
#                       (4: sum of nonzero cells, 5: selected cells, 6: W row L2 norms, 7: relaxed W row L2 norms)
# validation fold=3, rep=0, alpha_id = 8, l1_ratio_id=0 

# test r^2 : 

cvresults_rank[2][1][3,0,8,0]

0.367180715378573

In [None]:
# cells selected : 
cvresults_rank[2][5][3,0,8,0,:]

array([False, False, False, False, False, False,  True, False, False,
       False, False, False, False, False, False, False,  True, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False,  True, False,
       False,  True, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False,  True, False, False, False, False,
       False, False, False, False, False, False,  True, False,  True,
       False, False, False, False, False, False, False, False, False,
       False])

In [9]:
# W row L2 norms
cvresults_rank[2][6][3,0,8,0,:]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.07576618, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.10293011, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.91698458,
       0.        , 0.        , 0.00291831, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.2891748 , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [10]:
# relaxed W row L2 norms
cvresults_rank[2][7][3,0,8,0,:]

array([0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.38538182, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.3885603 , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.59418648,
       0.        , 0.        , 0.4504104 , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.36237504, 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.     

In [12]:
cell_names[cvresults_rank[2][5][3,0,8,0,:]]

array(['MC3808_S7', 'MC3810_S9', 'MC5003_S25', 'MC5003_S28', 'MC5006_S11',
       'MC6001_S13', 'MC6001_S15'], dtype='<U10')

Now let us do more timepoints and more sincere cross-validation:

In [13]:
slice = np.linspace(0, 10000, 10000, dtype=int) # time points to use

In [14]:
alphas = np.concatenate((np.arange(.04,1.01,.1), np.arange(2,5)))
ranks = np.arange(1, Y.shape[1]+1)
l1_ratios = np.array([1])

for i, (model, X) in enumerate(zip(['Linear', 'Nonlinear', 'Full'], [X_linear, X_nonlinear, X_full])):
    print('Model: ', model)
    print('Shape of X:', X[slice,:].shape, '\nShape of Y:', Y[slice,:].shape)

    cvresults_rank = {}
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=(ConvergenceWarning, RuntimeWarning))
        for r in ranks:
            cvresults_rank[r] = elastic_rrr_cv_honest(X[slice,:], Y[slice,:], rank=r, reps=1, folds=5, alphas=alphas, l1_ratios=l1_ratios, preprocess=True)
        
    pickle.dump(cvresults_rank, open('../pickles/cvresults-{}.pickle'.format(model), 'wb'))

Model:  Linear
Shape of X: (10000, 91) 
Shape of Y: (10000, 16)
1..... Time: 0.0h  0m 14s
1..... Time: 0.0h  6m 58s
1..... Time: 0.0h 33m 44s
1..... Time: 0.0h 27m 40s
1..... Time: 0.0h 23m 32s
1..... Time: 0.0h 10m 11s
1..... Time: 0.0h  5m  7s
1..... Time: 0.0h  3m 26s
1..... Time: 0.0h  5m 39s
1..... Time: 0.0h  5m 55s
1..... Time: 0.0h  4m 21s
1..... Time: 0.0h  4m 46s
1..... Time: 0.0h  3m 41s
1..... Time: 0.0h  3m 48s
1..... Time: 0.0h  3m 21s
1..... Time: 0.0h  1m 29s
Model:  Nonlinear
Shape of X: (10000, 91) 
Shape of Y: (10000, 16)
1..... Time: 0.0h  0m 12s
1..... Time: 0.0h  4m 25s
1..... Time: 0.0h  3m 34s
1..... Time: 0.0h  3m 36s
1..... Time: 0.0h  3m 16s
1..... Time: 0.0h  3m 44s
1.

KeyboardInterrupt: 

#### Template code for nested CV

In [10]:
# alphas = np.concatenate((np.arange(.04,1.01,.1), np.arange(2,4)))
# l1_ratios = np.array([.25, .5, .75, 1])

# with warnings.catch_warnings():
#    warnings.simplefilter("ignore", category=(ConvergenceWarning, RuntimeWarning))
#    sparseRRR.nested_cv(X[slice,:], Y[slice,:], alphas, l1_ratios, target_n_predictors=10)