# NAPE Calcium Imaging Preprocessing Pipeline

Finds any .tif, .tiff, .h5 files in the requested directory and performs SIMA-based motion correction and fft-based bidirection 
offset correction. This code parallelizes the computation at the session level by passing the multiple file paths (if there are 
more than one recordings) to the multiprocessing map function. The script sima_motion_correction contains the wrapping
code for SIMA and the custom-created class for bidirection offset correction.

Two simple ways to execute this in command line:  
A) sima_motion_correct_batch; then input the path to the directory containing your raw files

B) sima_motion_correct_batch.batch_process(path_to_directory)

__Otherwise, in this jupyter notebook, just run all cells in order (shift + enter)__

See these documentations for details
------------------------------------

https://github.com/losonczylab/sima
http://www.losonczylab.org/sima/1.3.2/
https://www.frontiersin.org/articles/10.3389/fninf.2014.00080/full

Required Packages
-----------------
sima, glob, multiprocessing, numpy, h5py, pickle (optional if want to save displacement file) 

Custom code requirements: sima_motion_correction, bidi_offset_correction

Parameters
----------

fdir : string
    root file directory containing the raw tif, tiff, h5 files. Note: leave off the last backslash.

Optional Parameters
-------------------

max_disp : list of two entries
    Each entry is an int. First entry is the y-axis maximum allowed displacement, second is the x-axis max allowed displacement.
    The number of pixel shift for each line cannot go above these values.
    Note: 50 pixels is approximately 10% of the FOV (512x512 pixels)
    
    Defaults to [30, 50]
    
save_displacement : bool 
    Whether or not to have SIMA save the calculated displacements over time. def: False; NOTE: if this is switched to True,
    it can double the time to perform motion correction.
    
    Defaults to False
    
Output
-------
motion corrected file (in the format of h5) with "\_sima_mc" appended to the end of the file name


In [None]:
import sima_motion_bidi_correction # reload(sima_motion_bidi_correction)
import calculate_neuropil
import glob
import multiprocessing as mp
import os


In [99]:
def processes_single_file_unpack(args):
    return processes_single_file(*args)

def processes_single_file(fpath, max_disp, save_displacement):
    
    fdir  = os.path.split(fpath)[0]
    fname = os.path.splitext(os.path.split(fpath)[1])[0]

    sima_motion_bidi_correction.full_process(fpath, max_disp, save_displacement)

    sima_extract_roi_sig.extract(fpath)

    calculate_neuropil.calculate_neuropil_signals_for_session(fdir)

In [101]:
def batch_process(root_dir, max_disp = [30, 50], save_displacement = False):

    # declare initialize variables to do with finding files to analyze
    fpaths = []
    types = ['*.tif', '*.tiff', '*.h5']
    exclude_strs = ['spatialweights', '_sima_mc', '_trim_dims']
    
    # find files to analyze
    subdir = os.walk(root_dir).next()[1] # grabs subdirectories in specified folder
    for fdir in subdir:
        
        fdir_path = os.path.join(root_dir, fdir)
        
        for type in types:
            if not any(exclude_str in fdir_path for exclude_str in exclude_strs): # make sure to exclude non-raw files
            fpaths.extend(glob.glob(os.path.join(fdir_path, type)))
    
    # print info to console
    if len(fpaths) == 0:
        raise Exception("No files to analyze!")
    print(str(len(fpaths)) + ' files to analyze')
    
    # determine number of cores to use and initialize parallel pool
    num_processes = min(mp.cpu_count(), len(fpaths))
    print('Total CPU cores for parallel processing: ' + str(num_processes))
    pool = mp.Pool(processes=num_processes)
    
    # perform parallel processing; pass iterable list of file paths to the motion correction script
    pool.map(processes_single_file_unpack, [(file, max_disp, save_displacement) for file in fpaths])

    pool.close()
    pool.join()

In [None]:
%%time
if __name__ == "__main__":
    fdir = raw_input(r"Input root directory of tif, tiff, h5 files to analyze; note: Use FORWARD SLASHES to separate folder and leave the last backlash off!!  ")
    batch_process(fdir + '\\')

Input root directory of tif, tiff, h5 files to analyze; note: Use FORWARD SLASHES to separate folder and leave the last backlash off!!C:\2pData\Ivan\test
2 files to analyze
Total CPU cores for parallel processing: 2


In [37]:
import sima
from sima.ROI import ROIList
import numpy as np

fpath = r'C:\2pData\Ivan\itp_lhganiii_p7ml_920_0001\itp_lhganiii_p7ml_920_0001_tiff.h5'

fdir  = os.path.split(fpath)[0]
fname = os.path.splitext(os.path.split(fpath)[1])[0]
fext  = os.path.splitext(os.path.split(fpath)[1])[1]

sima_mc_path = os.path.join(fdir, fname + '_sima_mc.h5')

if not os.path.exists(sima_mc_path):
    raise Exception('Data not motion corrected yet; can\'t extract ROI data')

rois = ROIList.load(os.path.join(indir, fname + '_mc_RoiSet.zip'), fmt='ImageJ') # load ROIs as sima polygon objects (list)
dataset = sima.ImagingDataset.load(os.path.join(indir, fname+'_mc.sima')) # reload motion-corrected dataset
dataset.add_ROIs(rois, 'from_ImageJ')
signals = dataset.extract(rois)
extracted_signals = np.asarray(signals['raw']) # turn signals list into an np array
np.save(os.path.join(fdir, fname + '_extractedsignals.npy'), extracted_signals)
print('Done with %s'%fdir)