# Importing data


In [1]:
import numpy as np
import os 
import tensorflow as tf
from time import time
import mneflow


## 1. from MNE epochs

#### If you use MNE-python, all you need is to provide your epochs file (or list of epoch files) to mneflow.produce_tfrecords


In [2]:
#import mne
#from mne.datasets import multimodal
# mne.set_log_level(verbose='CRITICAL')
# #print(__doc__)

# fname_raw = os.path.join(multimodal.data_path(), 'multimodal_raw.fif')
# raw = mne.io.read_raw_fif(fname_raw)

# #event_id = {}
# cond = raw.acqparser.get_condition(raw, None)
# epochs_list = [mne.Epochs(raw, **c) for c in cond]
# #here we concatenate epochs because each input file contains just one condition
# #otherwise mneflow.produce_tfrecords can handle a list of epochs objects
# epochs = mne.concatenate_epochs(epochs_list)

In [3]:
#Specify import options
import_opt = dict(savepath='../my_TFRs/', #path to where the TFR files will be saved
           out_name='mne_sample_epochs', #name of TFRecords files
           input_type='epochs', #can also be "epochs"
           picks={'meg':'grad'}, #used only if input_type is mne.epochs.Epochs or path to saved '*-epo.fif'
           scale=True, #apply baseline_scaling?
           crop_baseline=True,
           bp_filter = (1.,45.),
           decimate = 2,
           scale_interval=78, #baseline, TODO: define automatically for epochs objects
           savebatch=8, # number of input files per TFRecord file           
           save_orig=False, # whether to produce separate TFR-file for inputs in original order
           val_size=0.1)

#whenever you import a dataset a copy of meta is also saved to savepath/meta.pkl so it can be restored at any time
if os.path.exists(import_opt['savepath']+'meta.pkl'):
    meta = mneflow.load_meta(import_opt['savepath'])
else:
    meta = mneflow.produce_tfrecords(epochs,**import_opt)   

Alternatively, if your epochs are saved to disk provide a str (or list of str) with path(s) to your -epo.fif files

e.g. this will work

```python
epochs.save('test_saved_epochs.fif')
meta = mneflow.produce_tfrecords('test_saved_epochs.fif',**opt)
```

if the first argument is str this function can also accept *.mat or *.npz format

e.g.

```python
data_path = '/m/nbe/scratch/braindata/izbrv/detection_data/'
filenames = [data_path +'sub' + str(i) + '-grad.npz' for i in range(1,4)]
meta = mneflow.produce_tfrecords(filenames,**opt)
```
In this case, specify iput_type='array', and also provide array_keys keyword argument

e.g. 

```python
array_keys={'X':'my_data_samples','y':'my_labels'}
```
#note that "picks" works only for input_type="epochs"

# Choose from already implemented models

In [4]:
#specify model parameters
params = dict(l1_lambda=1e-7,
              learn_rate=3e-4,
              dropout = .5,
              patience = 3,# patientce for early stopping
              min_delta = 5e-6,
              test_upd_batch = 20,#pseudo-real time test batch size
              n_epochs = 1000, #total training epochs
              eval_step = 50, #evaluate validation loss each 10 epochs
              n_batch = 200,
              #these are specific to LF-CNN]
              n_ls=32, #number of latent factors
              nonlin_in = tf.identity, #input layer activation for var-cnn and lf-cnn
              nonlin_hid = tf.nn.relu, #convolution layer activation for var-cnn and lf-cnn
              nonlin_out = tf.identity, #output layer activation for var-cnn and lf-cnn
              filter_length=32, #convolutional filter length for var-cnn and lf-cnn
              pooling = 6, #convlayer pooling factor for var-cnn and lf-cnn
              stride = 1, #stride parameter for convolution filter
              ) #training batch size) 

#specify the path to store the saved model
model_path = '/m/nbe/scratch/braindata/izbrv/detection_data/tfr/'

model = mneflow.models.LFCNN(meta,params,model_path)

Instructions for updating:
Colocations handled automatically by placer.
lf-conv _init
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
dense _init
Initialization complete!


In [5]:
#train the model
start = time()
model.train()
stop = time() - start
print('Trained in {:.2f}s'.format(stop))

epoch 0, train_loss 2.21819, train acc 0.152174 val loss 2.49855, val acc 0.117021
epoch 50, train_loss 0.568426, train acc 0.869565 val loss 1.3934, val acc 0.446809
epoch 100, train_loss 0.108891, train acc 1 val loss 0.690103, val acc 0.776596
epoch 150, train_loss 0.0415108, train acc 1 val loss 0.600958, val acc 0.797872
epoch 200, train_loss 0.0132537, train acc 1 val loss 0.44546, val acc 0.87234
* Patience count 1
epoch 300, train_loss 0.0137334, train acc 1 val loss 0.381117, val acc 0.93617
* Patience count 2
* Patience count 3
* Patience count 4
* Patience count 5
early stopping...
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from /m/nbe/scratch/braindata/izbrv/detection_data/tfr/lf-cnn-mne_sample_epochs
stopped at: epoch 500, val loss 0.381117, val acc 0.93617
Trained in 137.08s


In [6]:
#evaluate performance
#TODO: add across-subject example
#test_accs = model.evaluate_performance(meta['orig_paths'], batch_size=120)
#prt_test_acc, prt_logits = model.evaluate_realtime(meta['orig_paths'], batch_size=120, step_size=params['test_upd_batch'])
#results = {'val_acc':model.v_acc[0], 'test_init':np.mean(test_accs), 'test_upd':np.mean(prt_test_acc), 'sid':meta['architecture']} # 'train_time':stop,

In [7]:
model.compute_patterns(output='patterns')
#explore output layer weights
#TODO: Fix bug related to varying sampling rates and pooling factors
#f = model.plot_out_weihts()

#explore informative spatial patterns(LF-CNN only)
#TODO: Fix visualizations
f = model.plot_patterns(sensor_layout='Vectorview-grad', sorting='best', spectra=True, scale=True)

<Figure size 640x480 with 16 Axes>

# Specify your own neural network

In [10]:
#let's make a simple linear classifier using all channels*timepoints as features with keras
params = dict(l1_lambda=0,
              learn_rate=3e-4,
              dropout = .5,
              patience = 3,# patientce for early stopping
              min_delta = 5e-3, #note the increased convergence threshold1
              test_upd_batch = 20,#pseudo-real time test batch size
              n_epochs = 1000, #total training epochs
              #nonlin_out=tf.identity,
              eval_step = 50, #evaluate validation loss each 10 epochs
              n_batch = 200) #training batch size) 

from tensorflow.keras.layers import Dense, Flatten, Activation
from tensorflow.keras.constraints import max_norm



class MyNetwork(mneflow.models.Model):
    #all you need to do is to override the computational graph with your own
    def _build_graph(self):
        self.h_params['architecture'] = 'my_own'
        input_main   = self.X
        flatten      = Flatten()(input_main)
        dense        = Dense(self.h_params['n_classes'], kernel_constraint = max_norm(0.5))(flatten)
        y_pred      = Activation('softmax')(dense)
        return y_pred
    
m2 = MyNetwork(meta,params,model_path)

start = time()
m2.train()
stop = time() - start
print('Trained in {:.2f}s'.format(stop))


# #evaluate performance
# test_accs = m2.evaluate_performance(meta['orig_paths'], batch_size=120)

Initialization complete!
epoch 0, train_loss 1.81289, train acc 0.630435 val loss 1.73704, val acc 0.691489
epoch 50, train_loss 1.27426, train acc 1 val loss 1.39943, val acc 0.93617
epoch 100, train_loss 1.27419, train acc 1 val loss 1.39228, val acc 0.93617
* Patience count 1
* Patience count 2
epoch 250, train_loss 1.27413, train acc 1 val loss 1.38655, val acc 0.93617
* Patience count 3
early stopping...
INFO:tensorflow:Restoring parameters from /m/nbe/scratch/braindata/izbrv/detection_data/tfr/my_own-mne_sample_epochs
stopped at: epoch 300, val loss 1.38655, val acc 0.93617
Trained in 113.67s


We observe that our "custom" model performed equally well as LF-CNN in terms of accuracy on the validation set. Yet, the loss function estimate on the validation set is much lower for LF-CNN. This result is not very surprising, since LF-CNN has much more constrained solution space optimized for across-subjects decoding.

#TODO: across-subject/leave-one-subject-out example