In [None]:
import numpy as np
import pylab as plt
import h5py
import os, sys
import seaborn as sns
sns.set_style('darkgrid')
sns.set_context('notebook')

%matplotlib inline

In [None]:
# add folder utils to the Python path
utils_dir = os.path.join(os.getcwd(), 'utils')
sys.path.append(utils_dir) # add folder to Python path

In [None]:
# starting Spark depends on where the notebook is running
# choose 'local' or 'openstack'
nbBackend = 'local'
print "Running notebook on " + nbBackend + " backend"

In [None]:
from setupSpark import initSpark
sc = initSpark(nbBackend)

In [None]:
# add py-files in this folder to the SparkContext 
# (this allows that all required files are available on all workers)
for filename in os.listdir(utils_dir):
    if filename.endswith('.py'):
        sc.addPyFile(os.path.join(utils_dir, filename))

In [None]:
# full path to HDF5 file
directory = '/Users/Henry/polybox/Data_temp/NeuroPipeline/LEC_Data'
# select H5 file
h5file = 'Monyer_Leitner_F296_spot01.h5'
h5file = directory + os.sep + h5file

In [None]:
# obtain further information about the dataset (size, sampling rate, number of trials)
from NeuroH5Utils import getFileInfo
dsetSz, sampF, nTrials = getFileInfo(h5file)

In [None]:
# create the RDD and Thunder series
import thunder as td
from NeuroH5Utils import convert2RDD
numPartitions = 10 # how many partitions?
rdd = convert2RDD(sc, h5file, numPartitions=numPartitions)
series = td.series.fromrdd(rdd.map(lambda kv: ((kv[0],), kv[1])))

In [None]:
nNeurons = series.count()
nTimepoints = len(series.index)
t = (np.linspace(1, nTimepoints, nTimepoints)) / sampF

In [None]:
# import stimulus data
from NeuroH5Utils import getStimData
stimData, stimNames = getStimData(h5file)

### Regression analysis

Linear regression of stimulus vector against the timeseries data for each ROI

In [None]:
# build design matrix from stimulus vector
stimId = stimData[stimData>0].astype(np.int64) # need integer for indexing later
stimIx = np.where(stimData>0)
stimIx = stimIx[0]
nStims = len(np.unique(stimId))
X = np.zeros((nStims, nTimepoints), int)
# for iStim in range(len(stimIx)):
#     X[stimId[iStim]-1, stimIx[iStim]] = 1
for ix, iStim in enumerate(stimIx):
    X[stimId[ix]-1, stimIx[ix]] = 1
# plot stimulus matrix
fig = plt.figure(figsize=(20,5))
plt.plot(t, X.T);
plt.xlim((0, np.max(t)));
plt.xlabel('Time [s]');
plt.show()

In [None]:
def convolveStimWithCalciumResponse(stim, t, tauOn, tauOff):
    # canonical calcium indicator response
    # tauOn ... onset time in s
    # tauOff ... offset time in s
    signal = (1-(np.exp(-t/tauOn)))*(np.exp(-(t/tauOff)));
    # plt.plot(t, signal);
    # convolve stimulus with the canonical calcium indicator response
    stim_conv = np.convolve(stim, signal)
    stim_conv = np.delete(stim_conv, range(len(stim),len(stim_conv)))
    # plt.plot(t, stim)
    # plt.plot(t, stim_conv)
    return stim_conv

In [None]:
# convolve X with the calcium response
tauOn = 0.5 # in s
tauOff = 5 # in s
Xconv = np.zeros((nStims, nTimepoints), float)
for iStim in range(nStims):
    Xconv[iStim, :] = convolveStimWithCalciumResponse(X[iStim, :], t, tauOn, tauOff)
# plot convolved stimulus matrix
fig = plt.figure(figsize=(20,5))
plt.plot(t, Xconv.T);
plt.xlim((0, np.max(t)));
plt.xlabel('Time [s]');
plt.show()

In [None]:
# create and fit the model
from regression import LinearRegression
algorithm = LinearRegression(fit_intercept=True, normalize=True)
model,score = algorithm.fit_and_score(Xconv.T, series)

In [None]:
# get betas as array and plot
betas = model.betas.toarray()

plt.figure(figsize=(20,6))
# beta matrix image
fig = plt.imshow(betas, cmap='gray', interpolation='nearest', 
           aspect=0.25, origin='lower', vmin=0, vmax=100)
plt.xlabel('StimID')
plt.ylabel('Neuron ID')
plt.xticks([])
plt.colorbar();
plt.show()
# plt.savefig('GLM_betaImage.tiff', interpolation='nearest')

In [None]:
plt.figure(figsize=(20,5))
plt.bar(range(nNeurons), score.toarray());
plt.xlabel('Neuron ID');
plt.ylabel('Model fit / R^2');
plt.xlim((0, nNeurons));