# Ratterdam Data Filtering
### Notebook to test code related to velocity filtering, EEG filtering, normalization,
### and other tertiary pre-processing steps that affect what data enters 
### the core data structures

In [1]:
import numpy as np, matplotlib.pyplot as plt, random, json, pickle, datetime, copy, socket, os
from numpy.linalg import norm as npNorm
from scipy.stats import sem
import matplotlib.colors as colors
from scipy.ndimage import gaussian_filter as gauss # for smoothing ratemaps
import sys
from matplotlib.colors import LinearSegmentedColormap
import matplotlib.gridspec as GS
from matplotlib.backends.backend_pdf import PdfPages
from importlib import reload

if socket.gethostname() == 'Tolman':
    codeDirBase = 'C:\\Users\\whockei1\\Google Drive'
    dataDrive = "E:\Ratterdam"
elif socket.gethostname() == 'DESKTOP-BECTOJ9':
    codeDirBase = 'C:\\Users\\whock\\Google Drive'
    dataDrive = "D:\Knierim_Lab\\Ratterdam"
    
sys.path.insert(0, codeDirBase + '\\KnierimLab\\Ratterdam\\Code')
sys.path.insert(0, codeDirBase + '\\Python_Code\\KLab\\mts_analysis')
import utility_fx as util
import ratterdam_ParseBehavior as pBehav
import ratterdam_CoreDataStructures as core
from ratterdam_Defaults import *

In [2]:
%qtconsole --style native

In [22]:
rat = "R765"
exp = "RFD7"
datafile = f"{dataDrive}\\{rat}\\{rat}{exp}\\"
clustname = 'TT4\\cl-maze1.9'
clustList = util.getClustList(datafile)
behav = core.BehavioralData(datafile, exp, 0)
ts, position, alleyTracking, alleyVisits,  txtVisits = behav.loadData()
unit = core.UnitData(clustname, datafile, exp, alleyBounds, alleyVisits, txtVisits, position, ts)
unit.loadData_raw()

  n = (hs*np.reciprocal(ho))*33
  n = (hs*np.reciprocal(ho))*33
  Z=VV/WW
  n = (ls* np.reciprocal(lo)) * 33
  n = (ls* np.reciprocal(lo)) * 33
  W=0*U.copy()+1


## Velocity Filtering

In [14]:
## Velocity Filtering and Trying to find a good V threshold for ratterdam
winsz = 50
vthresh = 3
ptsCm = 4.85

gradts, gradx, grady = np.gradient(position[:,0]), np.gradient(position[:,1]), np.gradient(position[:,2])
gradx = [np.mean(gradx[0+i:winsz+i]) for i in range(len(gradx))]
grady = [np.mean(grady[0+i:winsz+i]) for i in range(len(grady))]
gradx = np.asarray([i/ptsCm for i in gradx])
grady = np.asarray([i/ptsCm for i in grady])

vx = np.asarray([1e6*a/b for a,b in zip(gradx,gradts)])
vy = np.asarray([1e6*a/b for a,b in zip(grady,gradts)])
v =  np.sqrt((vx**2)+(vy**2))  

sv = [np.mean(v[0+i:winsz+i]) for i in range(len(v))]
sv = np.asarray(sv)

vf_pos = position[sv > vthresh]
belowthresh_pos = position[sv <= vthresh]

## Directionality
logic is get midpoints of alley line bounds, check distance, which is closer?
Then see if that end is the, by convention, NE or SW, and add it to that group.
To get determine which side is NE/SW you just see if either x,y is smaller
than the other alley end size, depending on which orienation the alley is.
which in turn can be checked by seeing if x alley bound range > y

In [4]:
def extractCorners(givenAlleyBounds):
    """Alley bounds gives [[x1, x2], [y1, y2]]. Convert that
    to UL, LL, UR, LL (xn, ym) points n,m <- [1,2]
    ul - x1, y2
    ll - x1, y1
    ur = x2, y2
    lr = x2, y1
    
    Returns ul, ll, ur, lr
    """
    b = givenAlleyBounds # for ease of typing
    ul, ll, ur, lr = [b[0][0], b[1][1]], [b[0][0], b[1][0]], [b[0][1], b[1][1]], [b[0][1], b[1][0]]
    return ul, ll, ur, lr

In [5]:
def checkCloserPoint(p1, p2, pt):
    """
    Given two points p1, p2
    where each is [x,y]
    see which pt is closer to
    (also of form [x,y])
    
    Return string "first" or "second"
    meaning its closer to first point arg
    or second point arg. If equal return "error"
    """
    d1 = npNorm(p1 - pt)
    d2 = npNorm(p2 - pt)
    if d1 < d2:
        return "first"
    elif d2 < d1:
        return "second"
    else:
        return None

In [6]:
def checkVisitEntrySide(visitOccs, bounds):
    """
    visitOccs is [ts,x,y] arr for 1 visit
    bounds is [ul, ll, ur, lr] for alley in question
    Return a label "SW" or "NE"
    """
    begin = visitOccs[0,1:]
    ll, ur = bounds[1], bounds[2]
    closerPt = checkCloserPoint(ll, ur, begin)
    if closerPt is not None:
        if closerPt == "first":
            side = "SW"
        elif closerPt == "second":
            side = "NE"
    else:
        side = None
    return side

In [7]:
def groupVisitsByDir(alley):
    """
    Given a 1-idx alley, consider all
    visits and group them by whether entry
    was from SW or NE side.
    
    Method checks if 1st occ pt is closer
    to LL or UR corner of alley
    
    Returns dict list of visits from SW, NE
    """
    bounds = extractCorners(alleyBounds[alley-1]) # ul, ll, ur, lr format list
    visitDirs = {"SW":[], "NE":[]}
    for i,visit in enumerate(unit.alleys[alley]):
        side = checkVisitEntrySide(visit['occs'], bounds)
        if side is not None:
            visitDirs[side].append(i)
    
    return visitDirs

In [8]:
def visitsByDir_LinearRM(alley):
    """
    Given an alley, separate visits by
    entry on the SW, NE side of it.
    Concat all those linear RM into
    a nXc matrix
    
    Assume unit is in local namespace
    defaults in local namespace
    """
    c = singleAlleyBins[1]-1 #-1 because the nominal val is +1 for histgr reasons
    visitDirs = groupVisitsByDir(alley)
    groupedLinRMs = {"SW":np.empty((0,c)), "NE":np.empty((0,c))}
    for side in ["SW", "NE"]:
        if visitDirs[side] is not []:
            for visitIdx in visitDirs[side]:
                groupedLinRMs[side] = np.vstack((groupedLinRMs[side], unit.alleys[alley][visitIdx]['ratemap1d']))
    return groupedLinRMs

In [9]:
def plot_VisitsDirLinRM(ax, alley, groupedLinRMs):
    """
    Given a dict of matrices, one for all
    visits from each side of alley (SW, NE)
    plot them avg +/- sem on same subplot
    provided externally by ax arg
    
    make sure scipy.stats.sem is imp as 'sem'
    
    dir1 will be blue, dir2 red. Don't confuse w/ longrunning txt color codes
    """
    sides = list(groupedLinRMs.keys()) # initially side closer to SW, NE corner but make arb if changed
    dir1, dir2 = groupedLinRMs[sides[0]], groupedLinRMs[sides[1]]
    dir1mean, dir2mean, dir1sem, dir2sem = np.nanmean(dir1, axis=0), np.nanmean(dir2, axis=0), sem(dir1,axis=0, nan_policy='omit'), sem(dir2,axis=0, nan_policy='omit')
    for mean, err, color in zip([dir1mean, dir2mean], [dir1sem, dir2sem], ['b', 'r']):
        ax.plot(mean, color)
        ax.fill_between(range(len(mean)), mean-err, mean+err, color=color, alpha=0.5)
    ax.set_title(f"Alley {alley}, {sides[0]} (b): {dir1.shape[0]}, {sides[1]} (r): {dir2.shape[0]}",fontsize=12)
    ax.set_xticks([])
    
    #make textbox of counts, passes thru dir by txt present
    # counts is nested dir txt -> side
    counts = tabulateTxtByDir(getTxtVisitsByDir(alley))
    annot = "\n".join((
        f"A: SW: {counts['A']['SW']}, NE:{counts['A']['NE']}",
        f"B: SW: {counts['B']['SW']}, NE:{counts['B']['NE']}",
        f"C: SW: {counts['C']['SW']}, NE:{counts['C']['NE']}",
    ))
    props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
    ax.text(0.05, 0.95, annot, transform=ax.transAxes, fontsize=10,
        verticalalignment='top', bbox=props)


In [10]:
def plotRoutine_VisitsDirLin():
    """
    """
    fig, ax = plt.subplots(5,4, figsize=(10,10))
    for alley in range(1,18):
        axis = fig.axes[alley-1]
        groupedLinRMs = visitsByDir_LinearRM(alley)
        plot_VisitsDirLinRM(axis, alley, groupedLinRMs)
    plt.suptitle(f"{exp} {unit.name}")

In [11]:
def getTxtVisitsByDir(alley):
    """
    Given an alley, group visits
    whether SW/NE entry and crossref
    with txts present to get table
    of txts by dir count
    """
    txtDirTable = {txt:{"SW":[], "NE":[]} for txt in ["A", "B", "C"]}
    visitDirs = groupVisitsByDir(alley)
    for side in ["SW", "NE"]:
        if visitDirs[side] is not []:
            for visitIdx in visitDirs[side]:
                txt = unit.alleys[alley][visitIdx]['metadata']['stimulus'][0] # 0 bc it has dtype as entry 1
                txtDirTable[txt][side].append(visitIdx)
    return txtDirTable

In [12]:
def tabulateTxtByDir(txtDirTable):
    """
    Helper fx to count passes along a dir
    by txt present
    """
    counts = {txt:{"SW":0, "NE":0} for txt in ["A", "B", "C"]}
    for txt in ["A", "B", "C"]:
        for side in ["SW", "NE"]:
            if txtDirTable[txt][side] is not []:
                counts[txt][side] = len(txtDirTable[txt][side])
    return counts

In [32]:
behav = core.BehavioralData(datafile, exp, 0)
ts, position, alleyTracking, alleyVisits,  txtVisits = behav.loadData()

In [33]:
clustname = "TT4\\cl-maze1.1"
unit = core.UnitData(clustname, datafile, exp, alleyBounds, alleyVisits, txtVisits, position, ts)
unit.loadData_raw()
plotRoutine_VisitsDirLin()

  n = (hs*np.reciprocal(ho))*33
  n = (hs*np.reciprocal(ho))*33
  Z=VV/WW
  n = (ls* np.reciprocal(lo)) * 33
  n = (ls* np.reciprocal(lo)) * 33
  


In [13]:
def groupTrials(alley, trialList):
    """
    Given a list of visits and alley
    gather them and vstack
    If empty return None
    """
    if trialList ==[] or trialList == None:
        return None
    else:
        trialMat = np.empty((0, singleAlleyBins[1]-1))
        for trial in trialList:
            rm = unit.alleys[alley][trial]['ratemap1d']
            trialMat = np.vstack((trialMat, rm))
    return trialMat


In [14]:
def calcSmartMax(array2d, cutoff=0.98, scale=2.5,bins=100):
    """
    Given array where each row is a sample, eg a lin rate map
    find a good max visualiz. value for eg. imshow across all samples
    by getting percentile cutoff and boosting it by scale factor
    
    Bins is tricky, depends on how many rate maps
    go into the analysis. 100 bins is good. 0.98 cutoff.
    """
    frs = []
    for row in array2d:
        frs.extend(row)
    frs = np.asarray(frs)
    frs = frs[np.isfinite(frs)]
    h,b = np.histogram(frs, bins=bins)
    frcum = np.cumsum(h)
    propExp = np.asarray([i/h.sum() for i in frcum])
    try:
        thresh = np.where(propExp < cutoff)[0][-1]
    except:
        thresh = np.where(b == np.median(b))
    return b[thresh]*scale

In [15]:
cmap = util.makeCustomColormap()

In [24]:
alley = 9
table = getTxtVisitsByDir(alley)

fig, ax = plt.subplots(3,2, figsize=(7,7))
i=0 # crude counter to get current idx for axes

allmats = np.empty((0, singleAlleyBins[1]-1)) # gather all so you can get an overall max.

for txt in ["A", "B", "C"]:
    for side in ["SW", "NE"]:
        mat = groupTrials(alley, table[txt][side])
        allmats = np.vstack((allmats, mat))
mymax = calcSmartMax(allmats, cutoff=0.90, scale=2.5, bins=100)   

for txt in ["A", "B", "C"]:
    for side in ["SW", "NE"]:
        axis = fig.axes[i]
        mat = groupTrials(alley, table[txt][side])
        axis.imshow(mat, interpolation='None', aspect='auto', cmap = cmap, vmax = mymax)
        i +=1
        
# annotate with labels
fig.axes[0].set_title("SW Entry")
fig.axes[1].set_title("NE Entry")
fig.axes[0].set_ylabel("A")
fig.axes[2].set_ylabel("B")
fig.axes[4].set_ylabel("C")

Text(0,0.5,'C')