# Ratterdam Beltway Decoding / ML  / Permutation-Shuffling Approaches
## Mid August 2020 - Last attempts at finding a decoding method that works and we have confidence in the results
### Ideas: 
### 1) RF at a single alley per cell, so we can go back to 'template' approach that is invalid when using multiple alleys/cells
### 2) Cluster-based metrics compared to shuffle (not classification)
### 3) sliding window bayesian decoder (started this in another file, should dump what i've done here)
### 4) Late Sep 2020 - new shuffling approach to capture envelope fluctuations within field

In [1]:
import sklearn as skl
from sklearn import svm, preprocessing, metrics
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, recall_score, f1_score, accuracy_score
from sklearn.metrics import roc_curve, auc
from sklearn.multiclass import OneVsRestClassifier
from sklearn.metrics import confusion_matrix
from sklearn.utils.multiclass import unique_labels
from sklearn import neighbors
from sklearn.ensemble import RandomForestClassifier

import matplotlib.pyplot as plt
from scipy import interp
from scipy.integrate import simps
from scipy.ndimage import center_of_mass
import math
import numpy as np, random, json, pickle, datetime, copy, socket, os, sys, scipy
from scipy.stats import sem
import matplotlib.colors as colors
from importlib import reload
from matplotlib.backends.backend_pdf import PdfPages


import utility_fx as util
import ratterdam_ParseBehavior as Parse
import ratterdam_CoreDataStructures as Core
import ratterdam_Defaults as Def
import ratterdam_DataFiltering as Filt

In [2]:
%matplotlib qt5
%qtconsole --style native

In [186]:
def createVisitSummaryFeatures(unit, alley, visit, features):
    """
    For a given pass for a given unit summarize the 1d ratemap into a simpler,
    explicit vector of attributes. Which attributes to use are given by
    the 'features' list. Visit is the visitnum not the data itself
    """
    feats = np.empty((0))
    rm = unit.alleys[alley][visit]['ratemap1d']
    # i dont know of a better way of doing this other than to just check param name in list and add it if present
    if 'rm' in features:
        feats = np.append(feats, rm)
    if 'time' in features:
        feats = np.append(feats, visit)
    if 'max95' in features:
        maximum = np.nanpercentile(rm, 95)
        feats = np.append(feats, maximum)
    if 'locmax95' in features:
        locmax = np.searchsorted(np.sort(rm), np.percentile(rm, 95))
        feats = np.append(feats, locmax)
    if 'mean' in features:
        mean = np.nanmean(rm)
        feats = np.append(feats, mean)
    if 'median' in features:
        feats = np.append(feats, np.nanmedian(rm))
    if 'auc' in features:
        auc = simps(rm)
        feats = np.append(feats, auc)
    if 'avgdrds' in features:
        avgdrds = np.mean(np.abs(np.diff(rm))) # avg dr/ds change in rate / change in pos. 
        feats = np.append(feats, avgdrds)
    if 'maxdrds' in features:
        maxdrds = np.percentile(np.abs(np.diff(rm)), 95)
        feats = np.append(feats, maxdrds)
    if 'com' in features:
        try:
            com = center_of_mass(np.nan_to_num(rm))[0]
            feats = np.append(feats, com)
        except:
            com = int(round(Def.singleAlleyBins[1]-1)/2)
            feats = np.append(feats, com)
    if 'comval' in features:
        try:
            comval = rm[int(np.round(com))]
            feats  = np.append(feats, comval)
        except:
            comval = np.nanpercentile(rm, 95)
            feats  = np.append(feats, comval)
    if 'boundMaxs' in features:
        # think there may be something going on at entrace/exit to alley so get the max val 
        # within each trisector of alley. NB real intersection ends with alleybounds_manuallyshifted2
        # gives a 3:6:3 ratio of approach L:alley:approach/exit R but I want to squeeze in the bounds to give
        # more space to the flanks (4:4:4 ratio) to capture whats happening at boundary itself as well.
        max1,max2,max3 = np.nanmax(rm[:4]), np.nanmax(rm[4:8]), np.nanmax(rm[9:]) # assumes 12bin rm. make generalized later
        feats = np.append(feats,(max1,max2,max3))
    if 'isi' in features:
        begin, end, bsize = 0, 0.075e6, 5000
        bins = np.arange(begin,end,bsize)
        spikes = unit.alleys[alley][visit]['spikes']
        hist = np.histogram(np.diff(spikes[:,0]),bins=bins)[0]
        feats = np.append(feats, hist)
        
    if 'gamma_params':
        rm = rm[~np.isnan(rm)]
        rm = rm/rm.sum()
        a,loc,b = scipy.stats.gamma.fit(rm)
        feats = np.append(feats, [a,loc,b])
        
    return feats

In [4]:
# Load data into population dict. Each cell will be decoded separately. Within each cell each alley will be decoded separately.
rat = 'R859'
expCode = "BRD5"
datafile = f"E:\\Ratterdam\\{rat}\\{rat}{expCode}\\"

alleyTracking, alleyVisits,  txtVisits, p_sess, ts_sess = Parse.getDaysBehavioralData(datafile, expCode)
population = {}
for subdir, dirs, fs in os.walk(datafile):
    for f in fs:
        if 'cl-maze1' in f and 'OLD' not in f and 'Undefined' not in f:
            clustname = subdir[subdir.index("TT"):] + "\\" + f
            unit = Core.UnitData(clustname, datafile, expCode, Def.alleyBounds, alleyVisits, txtVisits, p_sess, ts_sess)
            unit.loadData_raw()
            rm = util.makeRM(unit.spikes, unit.position)            
            if np.nanpercentile(rm,Def.wholetrack_imshow_pct_cutoff) >= 1.:
                print(clustname)
                population[unit.name] = unit

  n = (hs*np.reciprocal(ho))*30
  n = (hs*np.reciprocal(ho))*30
  n = (ls* np.reciprocal(lo)) * 30
  n = (ls* np.reciprocal(lo)) * 30
  Z=VV/WW
  n = (hs*np.reciprocal(ho))*30
  n = (hs*np.reciprocal(ho))*30


TT1\cl-maze1.1
TT1\cl-maze1.2
TT1\cl-maze1.3
TT1\cl-maze1.4
TT1\cl-maze1.5
TT1\cl-maze1.6
TT10\cl-maze1.10
TT10\cl-maze1.12
TT10\cl-maze1.2
TT10\cl-maze1.3
TT10\cl-maze1.5
TT10\cl-maze1.6
TT10\cl-maze1.7
TT10\cl-maze1.8
TT13\cl-maze1.1
TT13\cl-maze1.2
TT13\cl-maze1.3
TT13\cl-maze1.4
TT13\cl-maze1.5
TT13\cl-maze1.6
TT13\cl-maze1.7
TT13\cl-maze1.8
TT6\cl-maze1.10
TT6\cl-maze1.11
TT6\cl-maze1.12
TT6\cl-maze1.14
TT6\cl-maze1.15
TT6\cl-maze1.16
TT6\cl-maze1.2
TT6\cl-maze1.3
TT6\cl-maze1.5
TT6\cl-maze1.6
TT6\cl-maze1.7
TT6\cl-maze1.8
TT6\cl-maze1.9


In [216]:
def setupAlleyData(unit, alley, repFx, features):
    """
    Create a matrix (n,b) where n is the number of trials at that alley 
    (usually with rewards removed, but that's done in the unit.loadRawData fx)
    and b are the number of spatial bins in the 1d ratemap of each trial at that alley
    """
    X = [] # dont know how size of feature vec ahead of time so array it later
    Y = np.empty((0))
    
    for visitNum,visit in enumerate(unit.alleys[alley]):
        reprm = repFx(unit, alley, visitNum, features)
        X.append(reprm)
        Y = np.append(Y, unit.alleys[alley][visitNum]['metadata']['stimulus'])
    
    X = np.asarray(X)
    X[np.where(~np.isfinite(X))] = 0
    #X = preprocessing.StandardScaler().fit_transform(X)
    
    return X, Y

In [207]:
def runRandomForest(X, Y, parmdict):
    oobs = []
#     fimps = []
#     paths = []
    for i in range(parmdict['nRuns']):
        clf = RandomForestClassifier(n_estimators=parmdict['nTrees'], 
                                     oob_score=True,
                                     max_features = parmdict['Max Feats'],
                                     max_depth = parmdict['Max Depth']
                                    )       
        clf.fit(X,Y)
        oobs.append(clf.oob_score_)
#         fimps.append(clf.feature_importances_)
#         paths.append(clf.decision_path(X))
        
    return oobs

In [78]:
def parmString(parmdict, features):
    string = ''
    for k,v in parmdict.items():
        string += f"{k}:{v}\n"
    for f in features:
        string +=f"{f}\n"
    return string

In [211]:
parmdict = {
    'nRuns':3, # reps of decoding for a given dataset. tech replicates. 
    'nTrees':10000, 
    'Max Depth':None, 
    'Max Feats':'auto',
    'Cell inclusion in population': '95pctile >=1Hz overall',
    'Visit inclusion in data matrix': '12x visits Mean alley activity >=1 Hz, 3 contig bins >=20% max avg field',
    'Bootstrap': 'None',
    'Shuffle': 200
    }

features = [
    'rm'
           ]

In [15]:
clust = 'TT6cl-maze1.8'
unit = population[clust]

string = parmString(parmdict, features)
stamp = util.genTimestamp()

fig, axes = plt.subplots(3,3,figsize=(10,10))
plt.suptitle(f"{clust} RF Decoding by Alley")
plt.text(0.005, 0.2, string, fontsize=6, transform=plt.gcf().transFigure)

for i,alley in enumerate(Def.beltwayAlleys):
    valid = Filt.checkMinimumPassesActivity(unit, alley)
    if valid:
        ax = fig.axes[i]
        print(alley)
        X,Y = setupAlleyData(unit, alley, createVisitSummaryFeatures, features)
        realoobs = runRandomForest(X,Y, parmdict)
        realmean = np.mean(realoobs)
        nulloobs = np.zeros((0,1))
        for i in range(parmdict['Shuffle']):
            Ys = np.random.permutation(Y)
            ssoobs = runRandomForest(X,Ys, parmdict)
            nulloobs = np.vstack((nulloobs, np.mean(ssoobs)))
        
        ax.hist(nulloobs)
        ax.vlines(np.percentile(nulloobs, 95),0,150,'k')
        ax.vlines(realmean,0,100,'r')
        ax.set_title(f"Alley {alley}, mean {realmean} vs {np.percentile(nulloobs, 95)} 95% null pct")
    else:
        fig.axes[i].set_title(f"Insufficient Activity in Alley {alley} for Decoding")

1


  for dir in range(input.ndim)]


KeyboardInterrupt: 

## Cluster Metrics
#### Treat trials under a given texture as a cluster with a centroid. Use basic metrics like avg distance to centroid, intercentroid distance,
#### and intersample distance (compared to shuffle w corrected pvalue) to test effect of stimulus on neural representation

In [217]:
from sklearn import decomposition
from sklearn.neighbors import NearestCentroid
from sklearn import preprocessing
from scipy.spatial.distance import pdist
import numpy as np
import ratterdam_Defaults as Def

def interCentroidDistance(ncObj):
    """
    Input a NearestCentroid object (sklearn)
    and return pairwise Euclidian distances 
    between them
    """
    return pdist(ncObj.centroids_)

def avgDistToCentroid(ncObj,sa,sb,sc):
    """
    Input: ncObj - NearestCentroid object (sklearn)
           sa,sb,sc - vectors which produced centroids
    """
    avga = np.mean(np.linalg.norm(ncObj.centroids_[0]-sa,axis=1))
    avgb = np.mean(np.linalg.norm(ncObj.centroids_[1]-sb,axis=1))
    avgc = np.mean(np.linalg.norm(ncObj.centroids_[2]-sc,axis=1))
    return avga, avgb, avgc

def interSampleDistance(sa,sb,sc):
    """
    Input sa,sb,sc - samples from a,b,c trials
    Return avg Euclidian distance within each set of samples
    """
    avga = np.mean(pdist(sa))
    avgb = np.mean(pdist(sb))
    avgc = np.mean(pdist(sc))
    return avga, avgb, avgc
	

def splitSamplesbyLabel(X,y):
    """
    Given input matrix X with samples of different
    labels, stored in Y, split them into arrays by label
    A=0, B=1, C=2 for texture labels by convention
    """
    a = X[y==0]
    b = X[y==1]
    c = X[y==2]
    return a,b,c

def findCentroids(X,Y):
    """
    Given X (n,f) and Y (n,)
    create NearestCentroid object 
    and return it
    """
    nc = NearestCentroid(metric='euclidean')
    nc.fit(X,Y)
    return nc

def outlierRemoval(X,Y):
    """
    Removes outliers from whole data matrix X
    using IsolationForest (sklearn). 
    Returns X,Y with those samples removed
    """
    clf = IsolationForest()
    clf.fit(X)
    yo = clf.predict(X)
    X = X[yo==1]
    Y = Y[yo==1]
    return X,Y

In [218]:
# Intersample distance	
npca=3
features = ['rm']
X,Y = setupAlleyData(unit, alley, createVisitSummaryFeatures, features)
#X,Y = outlierRemoval(X,Y)
cent = findCentroids(X,Y)
at,bt,ct = splitSamplesbyLabel(X,Y)
ad,bd,cd  = interSampleDistance(at,bt,ct)
mind = min(ad,bd,cd)

nshuff=1000
ss = np.empty((0))
for s in range(nshuff):
    Ys = np.random.permutation(Y)
    scent = findCentroids(X,Ys)
    sat,sbt,sct = splitSamplesbyLabel(X,Ys)
    ssd = np.min(interSampleDistance(sat,sbt,sct))
    ss = np.append(ss,ssd)
plt.figure()
plt.hist(ss)
plt.vlines(mind,0,200,'r')
plt.vlines(np.percentile(ss,1),0,200,'k')
plt.title(f"{unit.name} ISD pca comp = {npca}")


# Distance to centroids
npca=False
features = ['rm']
X,Y = setupAlleyData(unit, alley, createVisitSummaryFeatures, features)
#X,Y = outlierRemoval(X,Y)
cent = findCentroids(X,Y)
at,bt,ct = splitSamplesbyLabel(X,Y)
ad,bd,cd  = avgDistToCentroid(cent,at,bt,ct)
mind = min(ad,bd,cd)

nshuff=1000
ss = np.empty((0))
for s in range(nshuff):
    Ys = np.random.permutation(Y)
    scent = findCentroids(X,Ys)
    sat,sbt,sct = splitSamplesbyLabel(X,Ys)
    ssdc = np.min(avgDistToCentroid(scent,sat,sbt,sct))
    ss = np.append(ss,ssdc)

plt.figure()
plt.hist(ss)
plt.vlines(mind,0,200,'r')
plt.vlines(np.percentile(ss,1),0,200,'k')
plt.title(f"{unit.name} MDC pca comp = {npca}")

# intercentroid distance
npca=False
features = ['rm']
X,Y = setupAlleyData(unit, alley, createVisitSummaryFeatures, features)
#X,Y = outlierRemoval(X,Y)
cent = findCentroids(X,Y)
icd  = interCentroidDistance(cent)

nshuff=1000
ss = np.empty((0))
for s in range(nshuff):
    Ys = np.random.permutation(Y)
    scent = findCentroids(X,Ys)
    sicd = np.max(interCentroidDistance(scent))
    ss = np.append(ss,sicd)

plt.figure()
plt.hist(ss)
plt.vlines(max(icd),0,200,'r')
plt.vlines(np.percentile(ss,99),0,200,'k')
plt.title(f"{unit.name} ICD pca comp = {npca}")



ValueError: A 2-dimensional array must be passed.

### Outlier Detection

In [443]:
a,b,c = splitSamplesbyLabel(X,Y)


In [444]:
# Running isolation forest on trials by txt, one each per loop
s = c
txt = 'C'

clf = IsolationForest()
clf.fit(s)
yo = clf.predict(s)
plt.figure()
plt.xlim([-2,6])
plt.ylim([-2,6])
plt.scatter(s[:,0], s[:,1])
plt.scatter(s[yo==-1][:,0], s[yo==-1][:,1],c='r')
plt.title(txt)

Text(0.5,1,'C')

In [445]:
# Running on all trials. seems like similar results and will do it this way for now
clf.fit(X)
yo = clf.predict(X)
plt.figure()
plt.xlim([-2,6])
plt.ylim([-2,6])
plt.title("All trials")
plt.scatter(X[:,0],X[:,1])
plt.scatter(X[yo==-1][:,0],X[yo==-1][:,1],c='r')

<matplotlib.collections.PathCollection at 0x1f713723cf8>

### Clustering Algs - Spectral Clustering Decoding, KMeans, 

In [495]:
stamp = util.genTimestamp()
parmdict = {
    'name':unit.name,
    'alley':5,
    'npca':False,
    'doSuffle':True,
    'nShuffle':500,
    'sliding':True,
    'ts':stamp,
    'alg':KMeans,
    'algParms':{'n_clusters':3},
    'metric':metrics.adjusted_rand_score,
    'nTrees':700,
    'nRuns':10,
    'Max Feats':None,
    'Max Depth':'auto'
            }

#features = ['com', 'max95', 'auc','locmax95', 'comval','mean']
features = ['rm']
string = parmString(parmdict, features)


X,Y = setupData(unit, parmdict['alley'], features, parmdict['npca'])
X,Y = outlierRemoval(X,Y)

if parmdict['sliding']:
    idx = slidingCArrWindow(X)
    allS = []
    allR = []
    for i in idx:
        Xw = X[:,i[0]:i[1]]
        model = parmdict['alg'](**parmdict['algParms'])
        l=model.fit_predict(Xw,Y)
        r = parmdict['metric'](Y,l)
        allR.append(r)

        if shuffle:
            s = []
            for i in range(nShuffle):
                s.append(parmdict['metric'](np.random.permutation(Y),l))
        allS.append(s)
        
# plt.figure()
# plt.text(0.005, 0.2, string, fontsize=6, transform=plt.gcf().transFigure)
# plt.hist(s)
# plt.vlines(r,0,200,'r')
# npct = np.percentile(s,95)
# plt.vlines(npct,0,150,'k')
# plt.title(f"{unit.name} A{alley} {parmdict['alg']}, ARI {round(r,2)} vs Shuffle 95% {round(npct,2)}")

In [388]:
def slidingCArrWindow(X,stepSize=1,winSize=4):
    """return col idx of arrays to slice in sliding window fashion"""
    _max = X.shape[1]
    idx = []
    for i in range(_max):
        a,b = 0+(i*stepSize), winSize+(i*stepSize)
        if b <= _max:
            idx.append([a,b])
    return idx

In [496]:
plt.plot([np.percentile(i,95) for i in allS],'k--')
plt.plot([min(i) for i in allS],'k--')
plt.plot(allR,'r')
plt.plot([np.percentile(i,99.2) for i in allS],'k')

[<matplotlib.lines.Line2D at 0x1f717707748>]

### Sliding Window RF

In [500]:
stamp = util.genTimestamp()
parmdict = {
    'name':unit.name,
    'alley':5,
    'npca':False,
    'doSuffle':True,
    'nShuffle':100,
    'sliding':True,
    'ts':stamp,
    'alg':KMeans,
    'algParms':{'n_clusters':3},
    'metric':metrics.adjusted_rand_score,
    'nTrees':700,
    'nRuns':10,
    'Max Feats':None,
    'Max Depth':6
            }

#features = ['com', 'max95', 'auc','locmax95', 'comval','mean']
features = ['rm']
string = parmString(parmdict, features)

X,Y = setupData(unit, parmdict['alley'], features, parmdict['npca'])
X,Y = outlierRemoval(X,Y)

if parmdict['sliding']:
    idx = slidingCArrWindow(X)
    allS = []
    allR = []
    for i in idx:
        Xw = X[:,i[0]:i[1]]
        oob = np.mean(runRandomForest(Xw,Y,parmdict)) 
        
        if shuffle:
            s = []
            for i in range(parmdict['nShuffle']):
                s.append(np.mean(runRandomForest(Xw, np.random.permutation(Y), parmdict)))
            allS.append(s)

## Shuffling Test of Envelope Fluctuations 

In [3]:
# Function declarations
def computeTestStatistic_Diffs(groupX, groupY):
    """
    Takes two arrays. Each of which is a stack
    of single trial {RM or avg? decide}. 
    
    Avgs them to a summary trace and returns their bin-wise diff
    """    
    
    maskX= np.ma.masked_invalid(groupX)
    avgX = maskX.mean(axis=0) # ignores inf and nan
    maskY= np.ma.masked_invalid(groupY)
    avgY = maskY.mean(axis=0) # ignores inf and nan
    return avgX-avgY

def shuffleArray(array):
    for row in range(len(array)):
        array[row] = np.random.permutation(array[row])
    return array

In [74]:
# Step 1 - Select and load data
rat = "R859"
expCode = "BRD3"
datafile = f"E:\\Ratterdam\\{rat}\\{rat}{expCode}\\"

alleyTracking, alleyVisits,  txtVisits, p_sess, ts_sess = Parse.getDaysBehavioralData(datafile, expCode)

In [76]:
unit = Core.UnitData('TT6\\cl-maze1.8', datafile, expCode, Def.alleyBounds, alleyVisits, txtVisits, p_sess, ts_sess)
unit.loadData_raw()
alley = 7
labels = np.asarray([visit['metadata']['stimulus'] for visit in unit.alleys[alley]])

  n = (hs*np.reciprocal(ho))*30
  n = (hs*np.reciprocal(ho))*30
  n = (ls* np.reciprocal(lo)) * 30
  n = (ls* np.reciprocal(lo)) * 30
  Z=VV/WW


In [77]:
# Step 2 - Create mean field
rms = np.empty((0, Def.singleAlleyBins[0]-1))
for visit in unit.alleys[alley]:
    rms = np.vstack((rms, np.nan_to_num(visit['ratemap1d'])))
mean = np.nanmean(rms,axis=0)

In [78]:
# Step 3 - Determine spatial extend of field.
# Any bin that is at least 20% of the max, for now. This is standard practice but won't 
# handle certain cases well. Like two fields would be treated as one
thresh = 0.15# pct 
field = mean[np.where(mean>=(thresh*np.nanmax(mean)))] # find bins at least thresh% of max
field_idx = np.where(mean>=(thresh*np.nanmax(mean)))[0]
rmsin = rms[:,field_idx]

In [79]:
# Step 4 Define real test stats
a,b,c = np.nanmean(rmsin[np.where(labels=='A')[0]],axis=0), np.nanmean(rmsin[np.where(labels=='B')[0]],axis=0), np.nanmean(rmsin[np.where(labels=='C')[0]],axis=0)
ab, bc, ac = a-b, b-c, a-c

In [80]:
# Step 5 - shuffle bins within field for each visit. Create array of null test stats
# Note this is for PILOT version of alg. May do more fine-grained test of shuffling spikes
# against behavior if deemed necessary/useful
rmsshuffle = copy.deepcopy(rms)
rmsshuffle = rmsshuffle[:,field_idx]
sab_array, sbc_array, sac_array = np.empty((0, field_idx.shape[0])), np.empty((0, field_idx.shape[0])), np.empty((0, field_idx.shape[0]))

for i in range(5000):
    shuffleArray(rmsshuffle) # shuffle elements of each row in place. 
    sa,sb,sc = np.nanmean(rmsshuffle[np.where(labels=='A')[0]],axis=0), np.nanmean(rmsshuffle[np.where(labels=='B')[0]],axis=0), np.nanmean(rmsshuffle[np.where(labels=='C')[0]],axis=0)
    sab, sbc, sac = sa-sb, sb-sc, sa-sc
    sab_array = np.vstack((sab_array, sab))
    sbc_array = np.vstack((sbc_array, sbc))
    sac_array = np.vstack((sac_array, sac))

In [81]:
# Step 6 - Plot results
plt.figure()
plt.plot(sab_array.T,color='k',alpha=0.2)
plt.plot(ab,'r')
fwer, lower, upper = Perm.global_FWER_alpha(sab_array,unit)
plt.plot(upper,'r--')
plt.plot(lower,'r--')

plt.figure()
plt.plot(sbc_array.T,color='k',alpha=0.2)
plt.plot(bc,'b')
fwer, lower, upper = Perm.global_FWER_alpha(sbc_array,unit)
plt.plot(upper,'r--')
plt.plot(lower,'r--')

plt.figure()
plt.plot(sac_array.T,color='k',alpha=0.2)
plt.plot(ac,'g')
fwer, lower, upper = Perm.global_FWER_alpha(sac_array,unit)
plt.plot(upper,'r--')
plt.plot(lower,'r--')

[<matplotlib.lines.Line2D at 0x13665307d30>]

In [152]:
# sep 21 2020 - this is possibly promising. but this just tests for spatial pattern within a visit but not an overall different between
# textures that is spatially uniform across alley. so integrated this into ratterdam_PermutationTests.py

## Spike Timing Analysis

In [3]:
rat = 'R859'
expCode = "BRD5"
datafile = f"E:\\Ratterdam\\{rat}\\{rat}{expCode}\\"
figpath = f'E:\\Ratterdam\\{rat}\\spike_timing\\{expCode}\\'
    
alleyTracking, alleyVisits,  txtVisits, p_sess, ts_sess = Parse.getDaysBehavioralData(datafile, expCode)
population = {}
for subdir, dirs, fs in os.walk(datafile):
    for f in fs:
        if 'cl-maze1' in f and 'OLD' not in f and 'Undefined' not in f:
            clustname = subdir[subdir.index("TT"):] + "\\" + f
            unit = Core.UnitData(clustname, datafile, expCode, Def.alleyBounds, alleyVisits, txtVisits, p_sess, ts_sess)
            unit.loadData_raw()
            validalleys = []
            for a in [16, 17, 3, 1, 5, 7, 8, 10, 11]:
                valid = Filt.checkMinimumPassesActivity(unit, a, pass_thresh=12)
                validalleys.append(valid)
            if sum(validalleys) > 0:         
                print(clustname)  
                population[clustname] = unit

  n = (hs*np.reciprocal(ho))*30
  n = (hs*np.reciprocal(ho))*30
  n = (ls* np.reciprocal(lo)) * 30
  n = (ls* np.reciprocal(lo)) * 30
  Z=VV/WW


TT1\cl-maze1.1
TT1\cl-maze1.2
TT1\cl-maze1.3
TT1\cl-maze1.4
TT1\cl-maze1.5
TT1\cl-maze1.6
TT10\cl-maze1.10
TT10\cl-maze1.11
TT10\cl-maze1.12
TT10\cl-maze1.5
TT10\cl-maze1.6
TT10\cl-maze1.7
TT13\cl-maze1.1
TT13\cl-maze1.2
TT13\cl-maze1.3
TT13\cl-maze1.4
TT13\cl-maze1.5
TT13\cl-maze1.6
TT13\cl-maze1.7
TT13\cl-maze1.8
TT6\cl-maze1.10
TT6\cl-maze1.11
TT6\cl-maze1.12
TT6\cl-maze1.14
TT6\cl-maze1.15
TT6\cl-maze1.16
TT6\cl-maze1.2
TT6\cl-maze1.4
TT6\cl-maze1.6
TT6\cl-maze1.7
TT6\cl-maze1.8
TT6\cl-maze1.9


In [86]:
stamp = util.genTimestamp()
cmap = util.makeCustomColormap()
plt.rc('xtick',labelsize=8)
plt.rc('ytick',labelsize=8)
for clust in population.keys():
    include, adjAlpha, alleys = checkInclusion(population[clust])
    if include:
        print(clust)
        unit = population[clust]
        with PdfPages(figpath+f"{stamp}_{unit.name}_{Def.velocity_filter_thresh}vfilt_{Def.includeRewards}R_ISI.pdf") as pdf:
            for alley in alleys:          
                valid = Filt.checkMinimumPassesActivity(unit, alley, pass_thresh=12)
                if valid:        

                    begin, end, bsize = 0, 0.150e6, 2000
                    spikes = {txt: [visit['spikes'] for visit in unit.alleys[alley] if visit['metadata']['stimulus']==txt] for txt in ['A','B','C']}   
                    isis = {txt: [np.diff(trial[:,0]) for trial in spikes[txt]] for txt in ['A','B','C']}
                    bins = np.arange(begin,end,bsize)
                    hists = {txt: np.asarray([np.histogram(np.clip(trial,bins[0],bins[-1]),bins=bins)[0] for trial in isis[txt]]) for txt in ['A','B','C']}

                    # for the heatmaps in col 1
                    normed = {txt: hists[txt][:,:-1]/np.sum(hists[txt][:,:-1]) for txt in ['A','B','C']}
                    _vmax = np.nanmax(np.vstack(([hists[txt][:,:-1] for txt in ['A','B','C']])))

                    s = f"MW-U test (alpha={round(adjAlpha,5)}\n"
                    pairs = ['AB','BC','CA']
                    ps = []
                    for pair in pairs:
                        txtX,txtY = pair[0], pair[1]
                        x,y = np.sum(hists[txtX][:,:-1],axis=0)/np.sum(hists[txtX][:,:-1]), np.sum(hists[txtY][:,:-1],axis=0)/np.sum(hists[txtY][:,:-1])
                        _,p = mannwhitneyu(x,y)
                        s += f"{pair}: {round(p,5)}\n"
                        ps.append(p)


                    fig, ax = plt.subplots(3,2, figsize=(7,8), sharey='col')
                    for i, txt in enumerate(['A','B','C']):
                        ax[i,0].bar(range(hists[txt].shape[1]-1), np.sum(hists[txt][:,:-1],axis=0)/np.sum(hists[txt][:,:-1]), color='k') #dont include last bin, its the overflow one and has a much higher freq then the rest. 
                        ax[i,0].set_title(f"{txt}, {pairs[i]} MW-U p={round(ps[i],6)}",fontsize=10)
                        ax[i,1].imshow(hists[txt][:,:-1],aspect='auto',cmap=cmap,vmin=0,vmax=_vmax)
                    ax[0,1].set_title(f"ISIs by Trial, max:{round(_vmax,4)}",fontsize=10)
                    ax[1,0].set_ylabel('Normalized Frequency', fontsize=10)
                    ax[2,0].set_xlabel('Inter-spike Interval', fontsize=10)

                    plt.suptitle(f"{alley} ISIs {begin}-{end/1000}ms, {bsize/1000}ms bins, adj alpha={round(adjAlpha,5)}")


                    validalleys = []
                    for a in [16, 17, 3, 1, 5, 7, 8, 10, 11]:
                        valid = Filt.checkMinimumPassesActivity(unit, a, pass_thresh=12)
                        validalleys.append(valid)
                    adjAlpha = 0.05/(3*sum(validalleys))


                    pdf.savefig()
                    plt.close()

                    velocities = computeInstSpeed(unit,alley)
                    fig,ax = plt.subplots(3,1,figsize=(7,8), sharey=True)
                    for i,txt in enumerate(['A','B','C']):
                        fig.axes[i].hist(velocities[txt],color='k',bins=50,normed=True)
                        fig.axes[i].set_title(txt)
                    plt.suptitle(f"{unit.name} Alley {alley} Instantaneous Speeds")
                    plt.xlabel("Speed (cm/s)")
                    fig.axes[1].set_ylabel("Frequency")

                    pdf.savefig()
                    plt.close()


TT1\cl-maze1.1
TT1\cl-maze1.2
TT1\cl-maze1.3
TT1\cl-maze1.4




TT1\cl-maze1.5
TT1\cl-maze1.6
TT10\cl-maze1.10
TT10\cl-maze1.11
TT10\cl-maze1.12
TT10\cl-maze1.6
TT10\cl-maze1.7
TT13\cl-maze1.1
TT13\cl-maze1.2
TT13\cl-maze1.3
TT13\cl-maze1.4
TT13\cl-maze1.7
TT13\cl-maze1.8
TT6\cl-maze1.10
TT6\cl-maze1.11
TT6\cl-maze1.12
TT6\cl-maze1.14
TT6\cl-maze1.15
TT6\cl-maze1.16
TT6\cl-maze1.2
TT6\cl-maze1.4
TT6\cl-maze1.6
TT6\cl-maze1.7
TT6\cl-maze1.8
TT6\cl-maze1.9


In [80]:
def computeInstSpeed(unit, alley):
    """
    Give unit and alley
    Compute point-to-point speed as inter-sample
    time difference in seconds and distance of LED mvmt in cm.
    return a dict velocities[A,B,C] = arr(n,) where n is 
    number of samples-1 for all trials under a given txt
    """
    
    occs = {'A':np.empty((0,3)), 'B':np.empty((0,3)), 'C':np.empty((0,3))}
    for visit in unit.alleys[alley]:
        occs[visit['metadata']['stimulus']] = np.vstack((occs[visit['metadata']['stimulus']], visit['occs']))
    
    velocities = {txt:np.empty((0)) for txt in ['A','B','C']}
    for txt in ['A','B','C']:
        for i in range(occs[txt].shape[0]-1):
            tsA,tsB = occs[txt][i,0], occs[txt][i+1,0]
            dist = distance(occs[txt][i,1:],occs[txt][i+1,1:])
            dist = dist/Def.ptsCm_macaulay
            v = (dist/(tsB-tsA))*1e6
            velocities[txt] = np.hstack((velocities[txt], v))
    return velocities

In [77]:
def distance(p0, p1):
    return math.sqrt((p0[0] - p1[0])**2 + (p0[1] - p1[1])**2)

In [79]:
def consecutive(data, stepsize=1):
    return np.split(data, np.where(np.diff(data) != stepsize)[0]+1)

def findField(unit,alley,sthresh=3,rthresh=0.5,pctThresh=None):
    """
    Identify a field as a set of sthresh or more contiguous bins
    greater than some thresh
    rthresh - an absolute thresh in Hz
    pct thresh - a pct of max 
    One of these must be None, cant have both
    """
    rms = np.empty((0, Def.singleAlleyBins[0]-1))
    for visit in unit.alleys[alley]:
        rm = visit['ratemap1d']
        rms = np.vstack((rms, rm))
        
    if rthresh is not None and pctThresh is not None:
        print("Error - conflicting thresh definitions")
    mean = np.nanmean(rms, axis=0)
    if rthresh is not None:
        thresh = rthresh        
        fi = np.where(mean>=rthresh)[0]
    elif pctThresh is not None:
        thresh = pctThresh
        fi = np.where(mean>=(pctthresh*np.nanmax(mean)))[0]        
    
    field = True
    try:
        field_idx = np.concatenate(([i for i in consecutive(fi) if len(i)>=sthresh]))
    except:
        field = False
        field_idx = None
    return field, field_idx

def checkInclusion(unit):
    """
    Apply inclusion criteria to a unit, deciding which(if any)
    alleys will be included in analysis. If 0, cell is not used.
    If >0, return: inclusion bool, adj alpha, alley(s) to be included
    adj alpha just takes #alleys into account
    """
    validalleys = []
    for alley in Def.beltwayAlleys:
        passesCheck = Filt.checkMinimumPassesActivity(unit, alley, pass_thresh=12)
        fieldCheck, _ = findField(unit, alley)
        if passesCheck is True and fieldCheck is True:
            validalleys.append(alley)
    if len(validalleys)>0:
        alphaCorr = 0.05/(len(validalleys))
        include = True
    else:
        alphaCorr = None
        include = False
    return include, alphaCorr, validalleys

### 10-1-20 Log transform and other tests. 

In [38]:
unit = population['TT6\\cl-maze1.2']
alley=3

In [50]:
begin, end, bsize = 0, 0.100e6, 1000
spikes = {txt: [visit['spikes'] for visit in unit.alleys[alley] if visit['metadata']['stimulus']==txt] for txt in ['A','B','C']}   
isis = {txt: [np.diff(trial[:,0]) for trial in spikes[txt]] for txt in ['A','B','C']}
bins = np.arange(begin,end,bsize)
hists = {txt: np.asarray([np.histogram(np.clip(trial,bins[0],bins[-1]),bins=bins)[0] for trial in isis[txt]]) for txt in ['A','B','C']}

# for the heatmaps in col 1
normed = {txt: np.sum(hists[txt][:,:-1],axis=0)/np.sum(hists[txt][:,:-1]) for txt in ['A','B','C']}
summed = {txt: np.sum(hists[txt][:,:-1],axis=0) for txt in ['A','B','C']}
_vmax = np.nanmax(np.vstack(([hists[txt][:,:-1] for txt in ['A','B','C']])))

In [161]:
def trialRM(spikes, position, alley, bins):
    longDimBins, shortDimBins = bins
    alleyBins = {i:{'rows':None,'cols':None} for i in range(17)}
    dim=1
    for i,v in enumerate(unit.alleyBounds.values()):
        x,y = v
        if i in [i-1 for i in [1,5,7,2,13,9,16,14,11]]:
            bins = [shortDimBins, longDimBins] #again, np.2dhist takes [x,y] which means [c, r]
        elif i in [i-1 for i in [3,4,6,8,17,15,12,10]]:
            bins = [longDimBins, shortDimBins]
        else:
            print("error")
        alleyBins[i]['rows'] = np.linspace(unit.alleyBounds[i][1][0], unit.alleyBounds[i][1][1],num=bins[0])
        alleyBins[i]['cols'] = np.linspace(unit.alleyBounds[i][0][0], unit.alleyBounds[i][0][1],num=bins[1])


    rbins,cbins = alleyBins[alley-1]['rows'], alleyBins[alley-1]['cols']
    hs = np.histogram2d(spikes[:,2],spikes[:,1],bins=[rbins, cbins])[0]
    ho = np.histogram2d(position[:,2],position[:,1],bins=[rbins, cbins])[0]
    if dim == 2:
        n = (hs*np.reciprocal(ho))*30
        n[np.where(ho==0)] = np.nan
        n = util.weird_smooth(n,Def.smoothing_2d_sigma)
        n[np.where(ho==0)] = np.nan
    elif dim == 1:
        ls,lo  = np.sum(hs,axis=util.getAxType(ho)), np.sum(ho,axis=util.getAxType(ho))
        n = (ls* np.reciprocal(lo)) * 30
        if np.count_nonzero(~np.isnan(n))>1:
            n = util.stepsmooth(n,Def.smoothing_1d_sigma)
        n[np.where(lo==0)] = np.nan
    return n,rbins,cbins

### 10-8-20 Fitting empirical dist to trial and using trial-by-trial fitted params to identify txt 

In [166]:
unit = population['TT6\\cl-maze1.2']
alley=3

In [167]:
alphas, locations, betas, txts = [], [], [], []
for visit in unit.alleys[alley]:
    rm = visit['ratemap1d'][~np.isnan(visit['ratemap1d'])] # flip if field is at far end of alley
    a,loc,b = scipy.stats.gamma.fit(rm)
    alphas.append(a)
    locations.append(loc)
    betas.append(b)
    txts.append(visit['metadata']['stimulus'])
    
alphas = np.asarray(alphas)
locations = np.asarray(locations)
betas = np.asarray(betas)
txts  = np.asarray(txts)

In [177]:
plt.scatter(betas, alphas,c=c)
# plt.yscale('log')
# plt.xscale('log')

<matplotlib.collections.PathCollection at 0x2873eaf4630>

## kNN 