## Perform and pickle cross-validation

In [12]:
import numpy as np
import pickle
import warnings

import sparseRRR
from sklearn.exceptions import ConvergenceWarning

In [13]:
def preprocess(data, slice_idx, loc_features_idx):
    X = data['Firing rate']
    X = X[slice_idx,:] # select the time slice
    X = X - np.mean(X, axis=0)
    X = X / np.std(X, axis=0)

    Y = data['Locomotion']
    Y = Y[slice_idx,:] # select the time slice
    Y = Y[:,loc_features_idx] # select the locomotion features
    Y = Y - np.mean(Y, axis=0)
    Y = Y / np.std(Y, axis=0)
    
    return X,Y

In [14]:
file_names = [
    '../data/purkinje_extended_full.pickle',
    '../data/purkinje_extended_nonlinear.pickle',
    '../data/purkinje_extended_linear.pickle'
]

## The main cross-validation setup

In [15]:
data = pickle.load(open('../data/purkinje_extended.pickle', 'rb'))
locomotion_names = data['locomotion_names']

Not all timepoints, not all locomotion features:

In [16]:
slice = np.linspace(0, 100000, 100000, dtype=int) # time points to use

In [17]:
selected_features = [5,6,7,8,9,10,11,12,-8,-7,-6,-5,-4,-3,-2,-1] # locomotion features to use
np.array(locomotion_names)[selected_features]

array(['X-p FR', 'X-p HR', 'X-p FL', 'X-p HL', 'X-s FR', 'X-s HR',
       'X-s FL', 'X-s HL', 'Z-p FR', 'Z-p HR', 'Z-p FL', 'Z-p HL',
       'Z-s FR', 'Z-s HR', 'Z-s FL', 'Z-s HL'], dtype='<U11')

In [None]:
for file_name in file_names[::-1]:
    print('Processing: ', file_name.split('/')[2].split('.')[0])
    
    data = pickle.load(open('../data/'+file_name, 'rb'))
    X,Y = preprocess(data, slice, selected_features)
    print('Shape of X:', X.shape, '\nShape of Y:', Y.shape)

    alphas = np.concatenate((np.arange(.04,1.01,.1), np.arange(2,5)))
    l1_ratios = np.array([.25, .5, .75, 1])
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=(ConvergenceWarning, RuntimeWarning))
        cvresults = sparseRRR.elastic_rrr_cv(X, Y, rank=2, reps=1, folds=10, alphas=alphas, l1_ratios=l1_ratios)
    
    l1_ratios = np.array([1])
    ranks = np.arange(1, Y.shape[1]+1)
    cvresults_rank = {}
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=(ConvergenceWarning, RuntimeWarning))
        for r in ranks:
            cvresults_rank[r] = sparseRRR.elastic_rrr_cv(X, Y, rank=r, reps=1, folds=10, alphas=alphas, l1_ratios=l1_ratios)
        
    pickle.dump([cvresults, cvresults_rank], open('../pickles/cvresults-{}.pickle'.format(file_name.split('/')[2].split('.')[0]), 'wb'))

#### Template code for nested CV

In [None]:
alphas = np.concatenate((np.arange(.04,1.01,.1), np.arange(2,4)))
l1_ratios = np.array([.25, .5, .75, 1])

with warnings.catch_warnings():
   warnings.simplefilter("ignore", category=(ConvergenceWarning, RuntimeWarning))
   sparseRRR.nested_cv(X[slice,:], Y[slice,:], alphas, l1_ratios, target_n_predictors=10)