# Whole brain data processing

In [1]:
import numpy as np
import os, sys
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
from cellProcessing import *

%matplotlib inline
%load_ext autoreload
%autoreload 2

## Set up environments

* Install `fish_proc` from github https://github.com/zqwei/fish_processing

In [2]:
## STEP 1: specify data and save path
dir_root = '/groups/ahrens/ahrenslab/jing/giving_up/20190219/fish2/7dpf-huc_gcamp7ff_gfap_rgeco-GU_slow_fwd-fish02-exp02_20190219_174013/im'
# save_root = '/nrs/ahrens/jing/20190219/fish2/7dpf-huc_gcamp7ff_gfap_rgeco-GU_slow_fwd-fish02-exp02_20190219_174013/processed'

### Set save folder to Ziqiang's folder

In [3]:
save_root = './processed'
if not os.path.exists(save_root):
    os.makedirs(save_root)

## Preproecssing
This will generate the follow files at `save_root` folder
* `motion_fix_.h5` -- reference image
* `trans_affs.npy` -- affine transform
* `Y_2dnorm_ave.h5` -- average image after detrend
* `local_pca_data.zarr` -- local pca denoised image (This might be used a x4 file size comparing to the raw data), which can be removed afterwards

### Notes
* ZW -- (Rechunk from image to time) I used 500 cores, and it broke down several times, but it is still working (41 minutes on rechunks), try at least 700 cores as a start
* ZW -- (later steps) I used number of blocks + 1 as number of cores (this can be precomputed)
* ZW -- computation time of baseline linearly increases with x, y, and baseline_window in each block (about 30 min for 256 x 256 x 400)

In [None]:
%%time
nsplit = 8 # number of split on x and y -- about 256x256 is good
baseline_percentile = 20  
baseline_window = 400   # number of frames
numCores = 1345
preprocessing(dir_root, save_root, numCores=numCores, window=baseline_window, percentile=baseline_percentile, nsplit=nsplit)

Scheduler: tcp://10.36.111.12:36756
Dashboard link: http://10.36.111.12:40711/status
Compute reference image ---
--- Done computing reference image
Registration to reference image ---
--- Done registration reference image
Apply registration ---
dask.array<zarr, shape=(21, 2048, 2048, 5603), dtype=float32, chunksize=(1, 256, 256, 5603)>
Save average data ---
dask.array<zarr, shape=(21, 2048, 2048, 1), dtype=float32, chunksize=(1, 256, 256, 1)>


## Mask
Making a mask for data to decrease the number of computation in demix
* remove low intensity pixel
* remove low snr pixel
* remove low local pixel correlation pixels

This will generate the follow files at `save_root` folder
* `mask_map.h5` -- mask
* `local_correlation_map.h5` -- local correlation
* `masked_local_pca_data.zarr` -- df/f computation
* `masked_downsampled_local_pca_data.zarr` -- downsampled in time -> used for cell segmentation, which can be removed afterwards

### Note
* ZW -- intensity_percentile can be experimented using notebook `Test_for_pipeline_mask`
* ZW -- I used 50 for examplary data

In [None]:
## mask parameters
intensity_percentile = 50  
dt = 5  # time downsample for cell segmentation
mask_brain(save_root, percentile=intensity_percentile, dt=dt, numCores=20, is_skip_snr=True, save_masked_data=True)

## Demix
This will generate the follow files at `save_root` folder
* `demix_rlt` for each block

In [None]:
numCores = numCores//4
demix_cells(save_root, nsplit = nsplit, numCores = numCores)

### Check demix results

In [None]:
block_id = (0, 2, 0, 0)
check_demix_cells(save_root, block_id, nsplit=8, plot_global=True)

## Compute df/f
One of three choices
* df is from NMF components (where background is not included) -- preferred
* df is on raw data based on the recomputation of cell F using NMF weights
* df/f on pixels without using NMF weights

### df/f on pixels

In [None]:
baseline_percentile = 20
baseline_window = 1000
numCores = 200
dff = compute_cell_dff_pixels(dir_root, save_root, 
                              numCores=numCores, 
                              window=baseline_window, 
                              percentile=baseline_percentile)

### df/f on raw cell data

In [None]:
compute_cell_dff_raw(dir_root, save_root, 
                     numCores=numCores, 
                     window=baseline_window, 
                     percentile=baseline_percentile, 
                     nsplit=nsplit)

### df/f on denoised cell data

In [None]:
compute_cell_dff_NMF(dir_root, save_root, 
                     numCores=numCores, 
                     window=baseline_window, 
                     percentile=baseline_percentile, 
                     nsplit=nsplit, dt=dt)