# Importing data


In [2]:
import numpy as np
import os
import tensorflow as tf
from time import time
import mneflow


## 1. from MNE epochs

#### If you use MNE-python, all you need is to provide your epochs file (or list of epoch files) to mneflow.produce_tfrecords


In [3]:
#get to epochs using your mne-python pipeline
import mne
from mne.datasets import multimodal
mne.set_log_level(verbose='CRITICAL')

fname_raw = os.path.join(multimodal.data_path(), 'multimodal_raw.fif')
raw = mne.io.read_raw_fif(fname_raw)

cond = raw.acqparser.get_condition(raw, None)
epochs_list = [mne.Epochs(raw, **c) for c in cond]

#here we concatenate epochs because each input file contains just one condition
#otherwise mneflow.produce_tfrecords can handle a list of epochs objects
epochs = mne.concatenate_epochs(epochs_list)

In [5]:
#Specify import options
import_opt = dict(savepath='../tfr/', #path where TFR files will be saved
           out_name='mne_sample_epochs', #name of TFRecords files
           input_type='epochs', #can also be "array"
           picks={'meg':'grad'}, #used only if input_type is mne.epochs.Epochs or path to saved '*-epo.fif'
           scale=True, #apply baseline_scaling?
           crop_baseline=True,
           decimate = 2,
           scale_interval=(0,60), #indices in time axis corresponding to baseline interval
           savebatch=1, # number of input files per TFRecord file           
           save_origs=False, # whether to produce separate TFR-file for inputs in original order
           val_size=0.1) #validations set size set to 10% of all data

   
if not os.path.exists(import_opt['savepath']):
    os.mkdir(import_opt['savepath'])
        
if os.path.exists(import_opt['savepath']+'meta.pkl'):
    meta = mneflow.load_meta(import_opt['savepath'])
else:
    meta = mneflow.produce_tfrecords(epochs,**import_opt)   

processing epochs
labels (940,)
(940, 204, 151)
Saving TFRecord# 0


# 2. Other data import options

### 2.1 Saved mne.epochs (*-epo.fif) files
Alternatively, if your epochs are saved to disk provide a str (or list of str) with path(s) to your -epo.fif files

e.g. this will work

```python
epochs.save('test_saved_epochs.fif')
meta = mneflow.produce_tfrecords('test_saved_epochs.fif',**opt)
```
### 2.2. Arrays in *.mat or *.npz format
if the first argument is str mneflow.produce_tfrecords can also accept *.mat or *.npz format

e.g.

```python
data_path = '/m/nbe/scratch/braindata/izbrv/detection_data/'
filenames = [data_path +'sub' + str(i) + '-grad.npz' for i in range(1,4)]
meta = mneflow.produce_tfrecords(filenames,**opt)
```
In this case, specify iput_type='array', and also provide array_keys keyword argument

e.g. 

```python
array_keys={'X':'my_data_samples','y':'my_labels'}
```
#note that "picks" works only for input_type="epochs"

# 3.1. Choose from already implemented models

In [12]:
#specify optimizer parmeters
optimizer_params = dict(l1_lambda=3e-7,
              learn_rate=3e-4,
              dropout = .5,
              patience = 3,# patientce for early stopping
              min_delta = 5e-6,
              test_upd_batch = 20,#pseudo-real time test batch size
              n_epochs = 1000, #total training epochs
              eval_step = 25, #evaluate validation loss each 10 epochs
              n_batch = 200,
              
              ) 

#specify parameters specific for the model
#these are specific to LF-CNN]

lf_params = dict(n_ls=64, #number of latent factors
              nonlin_in = tf.identity, #input layer activation for var-cnn and lf-cnn
              nonlin_hid = tf.nn.relu, #convolution layer activation for var-cnn and lf-cnn
              nonlin_out = tf.identity, #output layer activation for var-cnn and lf-cnn
              filter_length=32, #convolutional filter length for var-cnn and lf-cnn
              pooling = 8, #convlayer pooling factor for var-cnn and lf-cnn
              stride = 2, #stride parameter for convolution filter
              )
#specify the path for saving the trained model
#here we will use the same folder as for the TFRecords
model_path = import_opt['savepath'] 

#initialize the model
model = mneflow.models.LFCNN(meta,optimizer_params,model_path,lf_params)

#this will initialize the iterators over the dataset,the computational graph and the optimizer
model.build()

lf-conv _init
dense _init
Initialization complete!


In [13]:
#train the model
start = time()
model.train()
stop = time() - start
print('Trained in {:.2f}s'.format(stop))

epoch 0, train_loss 2.78795, train acc 0.0652174 val loss 2.78331, val acc 0.106383
epoch 25, train_loss 0.429146, train acc 0.869565 val loss 1.17445, val acc 0.553191
epoch 50, train_loss 0.0394581, train acc 1 val loss 0.595269, val acc 0.712766
epoch 75, train_loss 0.0107133, train acc 1 val loss 0.510771, val acc 0.851064
epoch 100, train_loss 0.00499535, train acc 1 val loss 0.341246, val acc 0.882979
* Patience count 1
epoch 150, train_loss 0.003583, train acc 1 val loss 0.305725, val acc 0.893617
* Patience count 2
* Patience count 3
early stopping...
Instructions for updating:
Use standard file APIs to check for files with this prefix.
INFO:tensorflow:Restoring parameters from ../tfr/lf-cnn-mne_sample_epochs
stopped at: epoch 200, val loss 0.305725, val acc 0.893617
Trained in 146.07s


In [14]:
model.compute_patterns(output='patterns')
#explore output layer weights
#TODO: Fix bug related to varying sampling rates and pooling factors
#f = model.plot_out_weihts()

#explore informative spatial patterns(LF-CNN only)
#TODO: Fix visualizations
f = model.plot_patterns(sensor_layout='Vectorview-grad', sorting='best', spectra=True, scale=True)

<Figure size 640x480 with 16 Axes>

# 3.2. Or specify your own neural network

In [15]:
#let's make a simple linear classifier using all channels*timepoints as features with keras
params = dict(l1_lambda=0,
              learn_rate=3e-4,
              dropout = .5,
              patience = 3,# patientce for early stopping
              min_delta = 5e-3, #note the increased convergence threshold1
              test_upd_batch = 20,#pseudo-real time test batch size
              n_epochs = 1000, #total training epochs
              #nonlin_out=tf.identity,
              eval_step = 50, #evaluate validation loss each 10 epochs
              n_batch = 200) #training batch size) 

from tensorflow.keras.layers import Dense, Flatten, Activation
from tensorflow.keras.constraints import max_norm



class MyNetwork(mneflow.models.Model):
    #all you need to do is to override the computational graph with your own
    def _build_graph(self):
        self.h_params['architecture'] = 'my_own'
        input_main   = self.X
        flatten      = Flatten()(input_main)
        dense        = Dense(self.h_params['n_classes'], kernel_constraint = max_norm(0.5))(flatten)
        y_pred      = Activation('softmax')(dense)
        return y_pred
    
m2 = MyNetwork(meta,params,model_path)
m2.build()
start = time()
m2.train()
stop = time() - start
print('Trained in {:.2f}s'.format(stop))


# #evaluate performance
# test_accs = m2.evaluate_performance(meta['orig_paths'], batch_size=120)

Initialization complete!
epoch 0, train_loss 1.75461, train acc 0.630435 val loss 1.8381, val acc 0.531915
epoch 50, train_loss 1.27403, train acc 1 val loss 1.48677, val acc 0.829787
epoch 100, train_loss 1.27403, train acc 1 val loss 1.47608, val acc 0.808511
* Patience count 1
* Patience count 2
epoch 250, train_loss 1.27402, train acc 1 val loss 1.46621, val acc 0.819149
* Patience count 3
early stopping...
INFO:tensorflow:Restoring parameters from ../tfr/my_own-mne_sample_epochs
stopped at: epoch 300, val loss 1.46621, val acc 0.819149
Trained in 118.53s


#TODO: across-subject/leave-one-subject-out example