## Plot widefield calcium imaging data
This notebook demonstrates how to plot the data for a selected trial after preprocessing has been performed with the notebook Widefield_Preproc_Spark_Swift. The data (DFF and movie arrays) are read from the corresponding HDF5 files stored on Swift. The following graphics are generated: first frame and average of movie, mask used for processing, ROI locations, DFF traces for selected ROIs, movie of DFF array. Since we only process a single selected trial here, Spark is not required and the notebook runs entirely on the driver node.

### Imports

In [None]:
# Import Python modules
import os, sys
import numpy as np
from matplotlib import pylab as plt
import matplotlib.animation as animation
from __future__ import print_function
import h5py
import getpass
import tempfile
import shutil
import re

%matplotlib inline

# the notebook backend: 'local' or 'openstack'
nbBackend = 'openstack'

# add folder 'utils' to the Python path
# this folder contains custom written code that is required for data import and analysis
utils_dir = os.path.join(os.getcwd(), 'utils')
sys.path.append(utils_dir)
import SwiftStorageUtils

In [None]:
# Import custom-written modules
import WidefieldDataUtils as wf
import PickleUtils as pick
import CalciumAnalysisUtils as calciumTools

### Data and Analysis Parameters

In [None]:
# swift file system
swift_container = 'ariel' # specify name of container in Swift (do not use _ etc.)
swift_provider = 'SparkTest' # in general, this should not change
swift_basename = "swift://" + swift_container + "." + swift_provider + "/"

# storage location of RDDs relative to swift_basename (from Widefield_Preproc_Spark_Swift notebook)
output_folder_mov = 'mov_out'
output_folder_dff = 'dff_out'

# select file to be displayed
selected_file = '20152310_092225_4'

In [None]:
# OpenStack credentials for accessing Swift storage
os_username = 'hluetc'
os_tenant_name = 'helmchen.hifo.uzh'
os_auth_url = 'https://cloud.s3it.uzh.ch:5000/v2.0'
# provide OS password
os_password = getpass.getpass()

In [None]:
# put all these params in a dict for later access
file_params = dict()
file_params['swift_container'] = swift_container
file_params['swift_provider'] = swift_provider
file_params['swift_basename'] = swift_basename
file_params['os_username'] = os_username
file_params['os_tenant_name'] = os_tenant_name
file_params['os_auth_url'] = os_auth_url
file_params['os_password'] = os_password

### Load data from HDF5 file
This part loads the data from the HDF5 file stored on Swift. To do this, the file is first downloaded from Swift storage to a temporary folder. Then, we use standard Python libraries to read the HDF5 file. Finally, the temporary fodler is deleted.

In [None]:
def getArrayFromH5(h5file, dataset_name):
    with h5py.File(h5file,'r') as hf:
        print('List of arrays in HDF5 file: ', hf.keys())
        data = hf.get(dataset_name)
        data = np.array(data)
        print('Shape of the array %s: ' % (dataset_name), data.shape)
        return data

In [None]:
# local storage directory --> remember to delete afterwards
temp_dir = tempfile.mkdtemp()
# file to download from Swift
objects_to_download = [
    '%s/%s.h5' % (output_folder_mov, selected_file),
    '%s/%s.h5' % (output_folder_dff, selected_file)
]

# download options
down_opts = {
    'skip_identical': True,
    'out_directory': temp_dir,
}

# download file to local directory
from SwiftStorageUtils import downloadItems
downloadItems(swift_container, objects_to_download, file_params, down_opts)

# read file from local directory
mov_file = '%s%s%s%s%s.h5' % (temp_dir, os.path.sep, output_folder_mov, os.path.sep, selected_file)
dff_file = '%s%s%s%s%s.h5' % (temp_dir, os.path.sep, output_folder_dff, os.path.sep, selected_file)

mov_data = getArrayFromH5(mov_file, 'mov')
dff_data = getArrayFromH5(dff_file, 'dff')

# delete temp dir
shutil.rmtree(temp_dir)

Now run some checks and get the image dimensions and number of frames.

In [None]:
# the mov and dff arrays must be of equal size, otherwise there is a problem
assert np.array_equal(mov_data.shape, dff_data.shape)

# get the image dimensions and number of timepoints
dims_analysis = (mov_data.shape[0], mov_data.shape[1])
timepoints = mov_data.shape[2]

Setup the time vector, define trial times and specify the files with ROI and trial data.

In [None]:
# time vector and trial times
sample_rate = 20.0 # Hz
t = (np.array(range(timepoints)) / sample_rate) - 3.0

t_stim = -1.9 # stimulus cue (auditory)
t_textIn = 0 # texture in (i.e. stimulus onset)
t_textOut = 2 # texture starting to move out (stimulus offset)
t_response = 4.9 # response cue for licking (auditory)
t_base = -2 # baseline end (for F0 calculation)

# ROI definitions
roi_file = 'rois_OCIA.mat'
dims_roi = (256,256) # image dimensions on which coordinates in roi_file are based

# File with trial indices
trials_index_file = 'trials_ind.mat'

### Display frames from movie

In [None]:
# display from selected file (frame 1, mean, DFF mask)
print('File: %s' % (selected_file))
xy = (mov_data.shape[0]/1.05, mov_data.shape[1] - (mov_data.shape[1]/1.1))
f, axes = plt.subplots(1, 3, figsize=(15, 5))
axes[0].imshow(mov_data[:,:,0], cmap='gray', interpolation='none')
axes[0].annotate('Frame %1.0f' % 0, xy=xy, fontsize=14, color='yellow', horizontalalignment='right')
axes[1].imshow(np.nanmean(mov_data, axis=2), cmap='gray', interpolation='none')
axes[1].annotate('Mean', xy=xy, fontsize=14, color='yellow', horizontalalignment='right')
# calculate mask
dff_data_mean = np.mean(dff_data, axis=2)
masked = np.nanmean(mov_data, axis=2)
masked[np.isnan(dff_data_mean)] = np.nanmax(masked)
axes[2].imshow(masked, cmap='gray', interpolation='none')
axes[2].annotate('Mask', xy=xy, fontsize=14, color='black', horizontalalignment='right')

### Import Rois and trial indices
The approach for importing mat-files from Swift storage is the same as for HDF5 files: first download files from Swift storage to a temporary folder. Then, we use custom-written code to read the mat-files. Finally, the temporary folder is deleted.

In [None]:
# download and import Roi and trial index files
objects_to_download = [
    roi_file,
    trials_index_file
]
# local storage directory --> remember to delete afterwards
temp_dir = tempfile.mkdtemp()

# download options
down_opts = {
    'skip_identical': True,
    'out_directory': temp_dir,
}

from SwiftStorageUtils import downloadItems
downloadItems(swift_container, objects_to_download, file_params, down_opts)

trial_ind = wf.importTrialIndices('%s%strials_ind.mat' % (temp_dir, os.path.sep))

# Specify ROIs to pull out
roi_file = '%s%srois_OCIA.mat' % (temp_dir, os.path.sep)
roi_dict = {'roi_S1BC': [], 'roi_A1': [], 'roi_EC': [], 'roi_M2': []}
roi_dict = wf.importMatlabRois(roi_file, roi_dict, dims_roi, dims_analysis)

# delete temp dir
shutil.rmtree(temp_dir)

Figure out the trial type of the currently selected file.

In [None]:
def getTrialType(selected_file, trial_ind):
    """
    Return trial type of input file from trial_ind.
    """
    # parse file name to get trial_no
    p = re.compile('\d{1,8}')
    file_info = p.findall(selected_file)
    trial_no = int(file_info[2])
    # search trial_ind for trial_type
    trial_type = [i for i in trial_ind if trial_no in trial_ind[i]]
    if not len(trial_type):
        return 'void'
    else:
        return trial_type[0]

In [None]:
trial_type = getTrialType(selected_file, trial_ind)
print('%s Trial type: %s' % (selected_file, trial_type))

Plot ROI positions on average of all frames.

In [None]:
# Sanity check: Plot position of Rois
%matplotlib inline
print('File: %s' % (selected_file))
f, axes = plt.subplots(1, len(roi_dict), figsize=(20,5))
for ix, roi in enumerate(roi_dict):
    avg_img_roi =np.mean(mov_data, axis=2)
    avg_img_roi[roi_dict[roi][0], roi_dict[roi][1]] = np.nan
    axes[ix].imshow(avg_img_roi, cmap='gray', interpolation='none', vmin=100, vmax=25000)
    axes[ix].annotate('%s' % (roi), xy=xy, fontsize=14, color='white', horizontalalignment='right')

### Plot DFF traces
Now we can pull out and plot the dF/F traces for selected ROIs. For plotting we use the Bokeh interactive visualization library, which offers a number of nice features for plotting and interacting with plots.

In [None]:
# Import Bokeh library
from bokeh.plotting import Figure, show
from bokeh.models import Range1d, CrosshairTool, HoverTool
from bokeh.io import output_notebook
%matplotlib inline

In [None]:
# This has to be in a separate cell, otherwise it wont work.
from bokeh import resources
output_notebook(resources=resources.INLINE)

Define some functions for plotting.

In [None]:
def getHover():
    """Define and return hover tool for a plot"""
    # Define hover tool
    hover = HoverTool()
    hover.tooltips = [
        ("index", "$index"),
        ("(x,y)", "($x, $y)"),
        ("fill color", "$color[hex, swatch]:fill_color"),
    ]
    return hover

In [None]:
def plotTimeseries(p, t, y, legend):
    """
    Plot a timeseries in Figure p using the Bokeh library
    
    Input arguments:
    p ... Bokeh figure
    t ... 1d time axis vector (numpy array)
    y ... 2d data numpy array (number of traces x time)
    """
    colors_list = ['red', 'green', 'blue', 'yellow', 'black']
    p.add_tools(CrosshairTool(), getHover())
    for i in range(y.shape[0]):
        p.line(t, y[i, :], line_width=2, legend=legend[i], color=colors_list[i])
    left, right = np.min(t), np.max(t)
    p.set(x_range=Range1d(left, right))
    show(p)
    
    return p

Now we loop through the ROIs in roi_dict, pull out the corresponding dFF traces and plot them. The resulting figure allows interactive zooming, scrolling and inspection of data points.

In [None]:
roi_data_cat = np.array([]).reshape(0,200)
legend = []
for ix, i_roi in enumerate(roi_dict):
    roi_xy = roi_dict[i_roi]
    roi_data = np.nanmean(dff_data[roi_xy[0], roi_xy[1], :], axis=0)
    roi_data_cat = np.concatenate((roi_data_cat, roi_data[:, np.newaxis].T), axis=0)
    legend.append(i_roi)
p = Figure(plot_width=800, plot_height=300, title=('dF/F Traces %s - %s' % (selected_file, trial_type)))    
plotTimeseries(p, t, roi_data_cat, legend)

### DFF movie
Finally, we create a movie from the dFF array and save it under the folder animations on Swift. 

In [None]:
wf.saveMovie(dff_data, trial_type, '%s_dFF' % (selected_file), sample_rate, t, file_params)

## Alternative: reading from Spark pickle file into RDD (do NOT use for now)

In [None]:
# Initialize Spark
# specify the number of cores and the memory of the workers
# each worker VM has 8 cores and 32 GB of memory
# the status of the cluster (ie. how many workers are available) can be checked in the admin console:
# https://cloud.s3it.uzh.ch/project/instances/
# when changing these settings, keep in mind that other people might be using the cluster
# returns the SparkContext object 'sc' which tells Spark how to access the cluster
# from setupSpark import initSpark
# sc = initSpark(nbBackend, max_cores=8, worker_memory='10G')

# # provide OS credentials to the Hadoop configuration
# sc._jsc.hadoopConfiguration().set('fs.swift.service.SparkTest.username', os_username)
# sc._jsc.hadoopConfiguration().set('fs.swift.service.SparkTest.tenant', os_tenant_name)
# sc._jsc.hadoopConfiguration().set('fs.swift.service.SparkTest.password', os_password)

# # add Python files in 'utils' folder to the SparkContext 
# # this is required so that all files are available on all the cluster workers
# for filename in os.listdir(utils_dir):
#     if filename.endswith('.py'):
#         sc.addPyFile(os.path.join(utils_dir, filename))

# # load movie RDD
# mov_rdd = sc.pickleFile('%s%s' % (file_params['swift_basename'], output_folder_mov))

# # load DFF RDD
# dff_rdd = sc.pickleFile('%s%s' % (file_params['swift_basename'], output_folder_dff))

# # Collect data for selected file
# mov_rdd = mov_rdd.filter(lambda (k,v): selected_file in k)
# dff_rdd = dff_rdd.filter(lambda (k,v): selected_file in k)

# # get selected movie (return key-value tuple)
# selected_mov = mov_rdd.first()
# mov_data = selected_mov[1]

# # get DFF data for selected file
# selected_dff = dff_rdd.first()
# dff_data = selected_dff[1]