# Ratterdam Place Field Repetition
# Temporal Decoding 

In [1]:
import ratterdam_CoreDataStructures as Core
import ratterdam_ParseBehavior as Parse
import numpy as np
from scipy.stats import sem
import utility_fx as util
import os
import matplotlib.gridspec as gridspec
from matplotlib import pyplot as plt
import ratterdam_Defaults as Def
import ratterdam_visBasic as Vis
import RateMapClass_William_20190308 as RateMapClass
import williamDefaults as wmDef
from matplotlib.backends.backend_pdf import PdfPages
import more_itertools, itertools
from sklearn.metrics import auc
import alphashape
from descartes import PolygonPatch
from scipy.interpolate import splrep, splev
from scipy.spatial import ConvexHull
import scipy

In [2]:
%qtconsole --style native
%matplotlib qt5

In [46]:
def loadRepeatingUnit(df, clustName):
    """take a path to a data dir
    load spikes and position into two np arrays
    spikes is (n,1) and pos is typical (3,n) cols of ts,x,y
    use cameraOrientationInfo.txt to flip axes if needed
    use sessionEpochInfo.txt, specific for open Ratterdam exp
    to get session ts and clip spikes/pos"""
    
    with open(df+"sessionEpochInfo.txt","r") as f:
        lines = f.readlines()
    start, end = int(lines[0].split(',')[0]), int(lines[0].split(',')[1])
    pos = util.read_pos(df)
    ts = np.asarray(sorted(list(pos.keys())))
    posx, posy = Parse.adjustPosCamera(df, pos, ts)
    position = np.column_stack((ts, posx, posy))
    position = position[(position[:,0]>=start) & (position[:,0]<=end)]
    clust = np.asarray(util.read_clust(df+clustName))
    clust = clust[(clust >= start) & (clust <= end)]
    spikexy = util.getPosFromTs(clust,position)
    spikes = np.column_stack((clust,spikexy))
    
    return position, spikes

class Unit():
    """
    Wrapper class because rep field ID algorithm looks
    for instance.spikes and instance.position
    """
    
    def __init__(self, s, p, clustname):
        self.name = clustname
        self.spikes = s
        self.position = p
        self.fields = []
        self.visits = [] # nested list. each list is a subfield and values are themselves lists of points in visit
        self.perimeters = [] # has had the convex alg run on it
        self.colors = cnames
        self.smoothing = 2
        self.repUnit = RateMapClass.RateMap(self) # a different unit class from the pf alg someone else wrote
        # self.repUnit.PF is a list of pf objects. each object has pf.perimeter as an attribute which is the well-fitting but out of order [x,y] border lists
        self.alphaHullFactor = 1
        self.alpha = 0
        self.findFields()
        
    def PolyArea(self,x,y):
        """
        Found at https://stackoverflow.com/questions/24467972/calculate-area-of-polygon-given-x-y-coordinates
        """
        return 0.5*np.abs(np.dot(x,np.roll(y,1))-np.dot(y,np.roll(x,1)))
        
    def findFields(self):
        self.fields = []
        self.visits = []
        for i,pf in enumerate(self.repUnit.PF[:]):
            self.visits.append([])
            #create boundary using alphahull alg which allows for concave hulls but does not work all that well as is
#             alpha = self.alphaHullFactor*alphashape.optimizealpha(list(zip(pf.perimeter[1]*binWidth+binWidth/2, pf.perimeter[0]*binWidth+binWidth/2)))
#             hull = alphashape.alphashape(list(zip(pf.perimeter[1]*binWidth+binWidth/2, pf.perimeter[0]*binWidth+binWidth/2)),alpha)
#             hxy = hull.exterior.coords.xy
#             contour = path.Path(list(zip(hxy[0],hxy[1])))

            #create boundary using convex hull
            points = np.asarray(list(zip(pf.perimeter[1]*binWidth+binWidth/2, pf.perimeter[0]*binWidth+binWidth/2))) # use this to go from 2d hist coords to camera coords
            hull = ConvexHull(points)
            vertices = np.append(hull.vertices, hull.vertices[0]) # add the first point to close the contour
            contour = path.Path(points[vertices])

            PinC = self.position[contour.contains_points(self.position[:,1:])]
            posVisits = getVisits(PinC[:,0])
            self.visits[-1].append(posVisits)
            field_FR = []
            field_TS = [] # take middle ts 
            for visit in posVisits:
                spk = self.spikes[np.logical_and(self.spikes[:,0] > visit[0], self.spikes[:,0] < visit[-1])]
                vdur = (visit[-1]-visit[0])/1e6 # do this instead of just multiplying number samples by fr because any lost frames from, e.g. occlusions will introduce a discrepancy
                field_FR.append(spk.shape[0]/vdur)
                field_TS.append(((visit[0]-self.position[0,0])/1e6)/60)
                
            field_FR = util.weird_smooth(np.asarray(field_FR), self.smoothing)
            
            totalRate = sum(field_FR)
            area = self.PolyArea(pf.perimeter[1]*binWidth+binWidth/2, pf.perimeter[0]*binWidth+binWidth/2)
            print(totalRate/area)
            if True:
                self.fields.append(np.column_stack((field_TS, field_FR)))
                self.perimeters.append(points[vertices])
                
def getVisits(data, maxgap=2*1e6):
    """
    """
    data.sort()
    groups = [[data[0]]]
    for x in data[1:]:
        if abs(x - groups[-1][-1]) <= maxgap:
            groups[-1].append(x)
        else:
            groups.append([x])
    return groups

cnames = ['black', 'blue', 'green', 'red', 'brown', 'purple', 'cornflowerblue', 'orchid', 'darkcyan', 'midnightblue', 'saddlebrown', 'darkviolet', 'seagreen', 'indianred', 'goldenrod', 'orange', 'olive']
binWidth = wmDef.binWidth
from matplotlib import path
cmap = util.makeCustomColormap()

def plotRoutine_RepPF_TempDyn(unit,smoothing=0, nf=99, time='time', save=False):
    """
    Plotting routine using unit and ratemapclass classes in local namespace
    Create a ratemap of fields with detected fields outlined
    Below, plot visit FR over time for each field, color coded to match
    
    """
    fig, ax = plt.subplots(2,1, figsize=(10,14))
    
    fig.axes[0].imshow(unit.repUnit.rateMap2D, origin='lower', aspect='auto', interpolation='None', 
                       cmap=cmap, vmax=np.nanpercentile(unit.repUnit.rateMap2D, 98),
                extent=[wmDef.xedges[0], wmDef.xedges[-1], wmDef.yedges[0], wmDef.yedges[-1]])
    fig.axes[0].set_title(f"{clust}, cutoff = {round(np.nanpercentile(unit.repUnit.rateMap2D, 98),2)}Hz", fontsize=20)
    fig.axes[0].axis('equal')
    fig.axes[0].set_ylim([0,480])
    fig.axes[0].set_xlim([0, 640])
    fig.axes[0].set_xticks([])
    fig.axes[0].set_yticks([])
    fig.axes[0].spines['top'].set_visible(False)
    fig.axes[0].spines['right'].set_visible(False)
    fig.axes[0].spines['bottom'].set_visible(False)
    fig.axes[0].spines['left'].set_visible(False)

    #option to not visualize garbage fields. 99 is an 'infinity' value as no cell will have 99 fields. 
    if nf == 99:
        end = len(unit.fields)
    else:
        end = nf
        
    for i, field in enumerate(unit.fields[:end]):
        f = util.weird_smooth(field[:,1],smoothing)
        
        if time == 'time':
            xval = field[:,0]
        elif time  == 'visit' or time == 'visits':
            xval = range(field.shape[0])
        
        fig.axes[1].plot(xval, f, color=unit.colors[i], marker='.',alpha=0.8)
        fig.axes[0].plot(unit.perimeters[i][:,0], unit.perimeters[i][:,1],color=unit.colors[i])
        fig.axes[1].tick_params(axis='y', labelsize=14)
        fig.axes[1].tick_params(axis='x', labelsize=14)
        fig.axes[1].set_xlabel(f"Time in session ({(lambda x: 'min' if x == 'time' else 'visits')(time)})", fontsize=24)
        fig.axes[1].set_ylabel("Firing Rate (Hz, smoothed)", fontsize=24)
        fig.axes[1].spines['right'].set_visible(False)
        fig.axes[1].spines['top'].set_visible(False)
        fig.axes[1].set_title(f"gaussian smoothing sigma = {smoothing+unit.smoothing}", fontsize=12)
    
    if save:
        clustname = clust.replace("\\","_")
        plt.savefig(fname=savepath+clustname+".png", format='png')
        plt.close()

In [240]:
# Take each cell in a day and generate a plot with rate map overlaid w detected field edges
# and field dynamics (Fr over time) below
rat = "R808"
day = "D6"
savepath = f'E:\\Ratterdam\\{rat}\\ratterdam_plots\\{day}\\'
df = f'E:\Ratterdam\\{rat}\\{rat}_RatterdamOpen_{day}\\'
clust = 'TT15\\cl-maze1.1'
p, s = loadRepeatingUnit(df, clust)
unit = Unit(s,p, clust)

0.034339063273122784
0.10316680101289039
0.6842628520340124


  aboveThreshold = np.where(rateMap >= max(fieldThreshold,0), True, False)
C:\Users\whockei1\AppData\Roaming\Python\Python36\site-packages\skimage\morphology\_deprecated.py:5: skimage_deprecation: Function ``watershed`` is deprecated and will be removed in version 0.19. Use ``skimage.segmentation.watershed`` instead.
  def watershed(image, markers=None, connectivity=1, offset=None, mask=None,
C:\Users\whockei1\AppData\Roaming\Python\Python36\site-packages\skimage\morphology\_deprecated.py:5: skimage_deprecation: Function ``watershed`` is deprecated and will be removed in version 0.19. Use ``skimage.segmentation.watershed`` instead.
  def watershed(image, markers=None, connectivity=1, offset=None, mask=None,


In [4]:
def interpolateField(unit,wnSize=5, wnStep=2,s=5,k=3,plot=True, ret=False):
    """
    Input: A unit object with attribute fields (list of [ts,fr] arrays])
           wnSize - size of sliding window in time units (minutes)
           wnStep - shift of sliding window in time units (minutes)
           s - smoothing of spline
           k - degree of spline
           
    Diagnostic function to view spline interpolation performance. Interp is also done in the analysis itself. 
    
    Uses scipy.interpolate.splrep and splev to create an interpolated representation
    of each field. This is because field visits are asynchronous (a rat cannot be in two places at once)
    and so to compare activity between fields at a given time point we need to interpolate (unless time window is large)
    
    The spline rep is contructed in each sliding window with the params above
    Returns (optional) xs, ys which are lists of the interpolated points (w overlap bc sliding window)
    """
    fmax = int(np.ceil(max([max(field[:,0]) for field in unit.fields])))
    wins = []
    for i in range(fmax):
        a,b = 0+(i*wnStep), wnSize+(i*wnStep)
        if b < np.ceil(fmax):
            wins.append((a,b))
    # For each field, get the spline params so two fields can be interpolated to same # pts within a window, allowing for a pearson R calc
    fieldFx = [splrep(d[:,0], d[:,1], k=k, task=0, s=s) for d in unit.fields]
    ## sample spline fx in wins as would be done in analysis and view
    nf = len(unit.fields)
    xs, ys = [], []
    for j in range(nf):
        xc, yc = [], []
        for w in wins:
            start, end = w
            x = np.linspace(start, end, 100)
            interp= splev(x, fieldFx[j])
            xc.append(x)
            yc.append(interp)
        xs.append(xc)
        ys.append(yc)
    for i in range(nf):
        xs[i] = [item for sublist in xs[i] for item in sublist]
        ys[i] = [item for sublist in ys[i] for item in sublist]
    if plot:
        plt.figure()
        for i,c in zip(range(nf),['b','r','g','k']):
            plt.plot(xs[i], ys[i],'.',color=c,markersize=4)
            plt.plot(unit.fields[i][:,0], unit.fields[i][:,1], color=c)
        plt.title(unit.name)
    if ret:
        return xs, ys

In [5]:
def plotMats(mats,mattype='diff',shuff='False'):
    ncol=10
    fig, ax = plt.subplots(int(np.ceil(len(mats)/ncol)),ncol,figsize=(8,8))
    _max, _min = max([arr.max() for arr in mats]), min([arr.min() for arr in mats])
    for i in range(len(mats)):
        im = fig.axes[i].imshow(mats[i], aspect='auto',interpolation='None', cmap=cmap, vmin=_min, vmax=_max)
        fig.axes[i].set_title(f"Mins {wins[i][0]}-{wins[i][1]}")
    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.85, 0.15, 0.05, 0.7])
    fig.colorbar(im, cax=cbar_ax)
    for i in range(len(fig.axes)):
        fig.axes[i].set_xticks([])
        fig.axes[i].set_yticks([])
    plt.suptitle(f"{unit.name} Time Varying Unsigned Mean {mattype} Matrices, shuff = {shuff}", fontsize=20)

In [6]:
def plotCorrOfMats(mats):
    corrOfcorrs = np.empty((len(mats),len(mats)))

    for i in range(len(mats)):
        for j in range(len(mats)):
            corrOfcorrs[i,j] = scipy.stats.pearsonr(mats[i].flatten(),mats[j].flatten())[0]
        
    plt.figure()
    plt.imshow(corrOfcorrs, origin='lower', interpolation='None')
    plt.title("Autocorrelation Matrix of Difference Matrices",fontsize=22)
    plt.xlabel("Difference matrices",fontsize=16)
    plt.ylabel("Difference matrices", fontsize=16)

In [7]:
def makeSemaphores(fieldArray):
    # Semaphore Plot Analysis
    s=1
    k=3 # should be 3 usually
    fieldFx = [splrep(d[:,0], d[:,1], k=k, task=0, s=s) for d in fieldArray]
    fmax = int(np.ceil(max([max(field[:,0]) for field in fieldArray])))
    wnSize=5
    wnStep = 2
    wins = []
    for i in range(0,fmax):
        a,b = 0+(i*wnStep), wnSize+(i*wnStep)
        if b < np.ceil(fmax):
            wins.append((a,b))

    nf = len(fieldArray)
    diffmats = []
    for w in wins:
        start, end = w
        corrmat = np.zeros((nf,nf))
        diffmat = np.zeros((nf,nf))
        for i in range(nf):
            for j in range(nf):
                x = np.linspace(start, end, 100)
                ainterp, binterp = splev(x,fieldFx[i]), splev(x, fieldFx[j])
                diff = np.abs(np.mean(ainterp)-np.mean(binterp))
                diffmat[i,j] = diff
        diffmats.append(diffmat)
    diffmats = np.asarray(diffmats)
    return diffmats

In [8]:
def shuffleFields(unit):
    """
    Takes a unit class as input and shuffles the firing rates of each field
    w.r.t. time. Returns a list of (n,2) numpy arrays where n is the number of
    visits for each field (and thus n varies as a fx of field)
    """
    fields = []
    for field in unit.fields:
        fields.append(np.column_stack((field[:,0],np.random.permutation(field[:,1]))))
    return fields

## k-Nearest Neighbor Temporal Analysis
### For each point, find n nearest neighbors in vector space and find mean actual distance in the array
### Then shuffle the subfield visits, regenerate (null) semaphore plots and repeat (important not to just shuffle semaphores bc sliding window contamination)

In [241]:
# Create real and 1 null semaphore and plot, just to look
diffmats = makeSemaphores(unit.fields)

sfields = shuffleFields(unit)
smats = makeSemaphores(sfields)

plotMats(diffmats,shuff=False)
plotMats(smats,shuff=True)



In [242]:
# kNN analysis
ns = 1000 # num shuffles
# real
flatmats = [i.flatten() for i in diffmats]
neigh = NearestNeighbors(n_neighbors=2).fit(flatmats)
d,idx = neigh.kneighbors(flatmats)
meanDist = np.mean(np.abs(np.diff(idx)))
meanvecDist = np.mean(d[:,1])

# shuffle
shuffmeanIdxDists = []
shuffmeanVecDists = []
for n in range(ns):
    sfields = shuffleFields(unit)
    smats = makeSemaphores(sfields)
    flatsmats = [i.flatten() for i in smats]
    sneigh = NearestNeighbors(n_neighbors=2).fit(flatsmats)
    sd,sidx = sneigh.kneighbors(flatsmats)
    smeanDist = np.mean(np.abs(np.diff(sidx)))
    shuffmeanDists.append(smeanDist)
    smeanvecDist = np.mean(sd[:,1])
    if smeanvecDist < 1000: # some shuff values are like 10e19. Maybe related to the 'impossible result' runtime error?
        shuffmeanVecDists.append(smeanvecDist)

with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
spline with fp=s has been reached. Probable cause: s too small.
(abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
spline with fp=s has been reached. Probable cause: s too small.
(abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)


In [243]:
plt.figure()
plt.hist(shuffmeanDists,bins=15)
plt.vlines(meanDist,0,160,'r')
np.percentile(shuffmeanDists,5)
null5pct = np.percentile(shuffmeanDists,5)
plt.vlines(null5pct,0,160,'k')
plt.title("Average Index Distance kNN vs Shuffle")

plt.figure()
plt.hist(shuffmeanVecDists,bins=100)
plt.vlines(meanvecDist,0,160,'r')
np.percentile(shuffmeanVecDists,5)
null5pct = np.percentile(shuffmeanVecDists,5)
plt.vlines(null5pct,0,160,'k')
plt.title("Average Vector Distance kNN vs Shuffle")

Text(0.5,1,'Average Vector Distance kNN vs Shuffle')

### kNN Classification - Temporal Epoch
### Divide session into n epochs, try to decode. Standard train/test classification

In [299]:
# Setup parameters and setup data
n = 3
testProp = 0.33
idx = np.ceil(np.linspace(0,len(diffmats),n+1)).astype(np.int)
diffmats = makeSemaphores(unit.fields)
flatmats = np.asarray([i.flatten() for i in diffmats])
split = np.split(flatmats,idx)
labels = []
for i in range(len(idx)-1):
    l= idx[i+1]-idx[i]
    labels.extend([i]*l)
labels = np.asarray(labels)

In [300]:
# real
rep = 500
perfs = []
for i in range(rep):
    train, test = train_test_split(list(range(len(diffmats))), test_size=testProp)
    Xtrain, Xtest, Ytrain, Ytest = flatmats[train], flatmats[test], labels[train], labels[test]
    neigh = KNeighborsClassifier(n_neighbors=2)
    neigh.fit(Xtrain, Ytrain)
    perfs.append(sum(Ytest==neigh.predict(Xtest))/len(Xtest))  
real5pct = np.percentile(perfs,5)
plt.figure()
plt.hist(perfs)
plt.vlines(real5pct,0,100,'r')
plt.vlines(1/3,0,100,'grey')
plt.title(f"kNN Classifier of Semaphore Plots, {n} Epochs, {rep} runs", fontsize=18)

Text(0.5,1,'kNN Classifier of Semaphore Plots, 3 Epochs, 500 runs')

In [302]:
# shuffle
rep = 500
perfs = []
for i in range(rep):
    sfields = shuffleFields(unit)
    smats = makeSemaphores(sfields)
    flatsmats = np.asarray([i.flatten() for i in smats])
    
    train, test = train_test_split(list(range(len(flatsmats))), test_size=testProp)
    Xtrain, Xtest, Ytrain, Ytest = flatsmats[train], flatsmats[test], labels[train], labels[test]
    neigh = KNeighborsClassifier(n_neighbors=2)
    neigh.fit(Xtrain, Ytrain)
    perfs.append(sum(Ytest==neigh.predict(Xtest))/len(Xtest))  
    
real5pct = np.percentile(perfs,5)
plt.figure()
plt.hist(perfs)
plt.vlines(real5pct,0,100,'r')
plt.vlines(1/3,0,100,'grey')
plt.title(f"kNN Classifier of Semaphore Plots, {n} Epochs, {rep} runs, shuff visits within field", fontsize=18)

with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
spline with fp=s has been reached. Probable cause: s too small.
(abs(fp-s)/s>0.001)


Text(0.5,1,'kNN Classifier of Semaphore Plots, 3 Epochs, 500 runs, shuff')

## kNN Classification - Same cell fields vs different cell fields
### Performing a kNN classification (predicting what 1/3 of the session a semaphore came from) using 
### the actual fields of a cell (like done above) or shuffling fields between cells (maintaining the real # fields/cell)

In [34]:
# create a list of fields - all the fields from all the units. randomly grab from this
allfields = []
for u in population.values():
    allfields.extend(u.fields)

In [36]:
class GuttedUnit():
    
    def __init__(self):
        self.fields = []

In [37]:
def createShuffledUnit(unit, allfields):
    """
    Input - unit: a unit object. unit.fields is a list of (n,2) arrays. Each entry is [ts,fr] where ts is in min and fr is in Hz
          - allfields: a list of all fields from the population of repeating units. Each entry is a (n,2) array. Each entry is [ts,fr] where ts is in min and fr is in Hz          
    This function will take a unit object and draw randomly from all the fields from all rep cells that day and assign to a 'field shuffled unit' (matching the # real fields / cell)   
    Return - a stripped down unit object (only has fields as attribute) with the scrambled fields
    """
    selectedFields = [allfields[i] for i in np.random.randint(0,len(allfields), len(unit.fields))]
    sunit = GuttedUnit()
    sunit.fields = selectedFields
    return sunit

In [65]:
# Setup parameters and setup data
clust = 'TT12\\cl-maze1.7'
n = 3 # number of temporal epochs to decode
#nshuffles = int(scipy.special.comb(len(allfields), len(population[clust].fields))) # shuffling fields between cells is nCr and is usually not that big given the data so compute what it is and use that
nshuffles = 1000
testProp = 0.33
diffmats = makeSemaphores(population[clust].fields)
idx = np.ceil(np.linspace(0,len(diffmats),n+1)).astype(np.int)
flatmats = np.asarray([i.flatten() for i in diffmats])
labels = []
for i in range(len(idx)-1):
    l= idx[i+1]-idx[i]
    labels.extend([i]*l)
labels = np.asarray(labels)

In [60]:
# real
diffmats = makeSemaphores(population[clust].fields)
flatmats = np.asarray([i.flatten() for i in diffmats])
rep = 500 # this is technical replicates - seeing variability due to e.g. which samples in train/test. In no way comparable to shuffling distribution. Just report avg or something
rperfs = []
for i in range(rep):
    train, test = train_test_split(list(range(len(diffmats))), test_size=testProp)
    Xtrain, Xtest, Ytrain, Ytest = flatmats[train], flatmats[test], labels[train], labels[test]
    neigh = KNeighborsClassifier(n_neighbors=2)
    neigh.fit(Xtrain, Ytrain)
    rperfs.append(sum(Ytest==neigh.predict(Xtest))/len(Xtest))  
real5pct = np.percentile(rperfs,5)
plt.figure()
plt.hist(rperfs)
plt.vlines(real5pct,0,100,'r')
plt.vlines(1/3,0,100,'grey')
plt.title(f"kNN Classifier of Semaphore Plots, {n} Epochs, {rep} runs, real", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

(array([  0.,  25.,  50.,  75., 100., 125., 150., 175., 200.]),
 <a list of 9 Text yticklabel objects>)

In [67]:
# shuffled fields between units (each field real, ownership is shuffled)
diffmats = makeSemaphores(population[clust].fields)
flatmats = np.asarray([i.flatten() for i in diffmats])
sfperfs = []
for r in range(nshuffles):
    sunit = createShuffledUnit(population[clust], allfields)
    diffmats = makeSemaphores(sunit.fields)
    flatmats = np.asarray([i.flatten() for i in diffmats])
    
    train, test = train_test_split(list(range(len(diffmats))), test_size=testProp)
    Xtrain, Xtest, Ytrain, Ytest = flatmats[train], flatmats[test], labels[train], labels[test]
    neigh = KNeighborsClassifier(n_neighbors=2)
    neigh.fit(Xtrain, Ytrain)
    sfperfs.append(sum(Ytest==neigh.predict(Xtest))/len(Xtest))  
    
real5pct = np.percentile(sfperfs,5)
plt.figure()
plt.hist(sfperfs)
plt.vlines(real5pct,0,100,'r')
plt.vlines(1/3,0,100,'grey')
plt.title(f"kNN Classifier of Semaphore Plots, {n} Epochs, {nshuffles} runs, Shuff Fields Between Cells", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

(array([  0.,  50., 100., 150., 200., 250.]),
 <a list of 6 Text yticklabel objects>)

In [73]:
# shuffle visits within fields for a neuron
svperfs = []
for r in range(25000):
    sfields = shuffleFields(unit)
    smats = makeSemaphores(sfields)
    flatsmats = np.asarray([i.flatten() for i in smats])
    
    train, test = train_test_split(list(range(len(flatsmats))), test_size=testProp)
    Xtrain, Xtest, Ytrain, Ytest = flatsmats[train], flatsmats[test], labels[train], labels[test]
    neigh = KNeighborsClassifier(n_neighbors=2)
    neigh.fit(Xtrain, Ytrain)
    svperfs.append(sum(Ytest==neigh.predict(Xtest))/len(Xtest))  
    
real5pct = np.percentile(svperfs,5)
plt.figure()
plt.hist(svperfs)
plt.vlines(real5pct,0,100,'r')
plt.vlines(1/3,0,100,'grey')
plt.title(f"kNN Classifier of Semaphore Plots, {n} Epochs, {nshuffles} runs, shuff visits within field", fontsize=18)
plt.xticks(fontsize=18)
plt.yticks(fontsize=18)

spline with fp=s has been reached. Probable cause: s too small.
(abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
spline with fp=s has been reached. Probable cause: s too small.
(abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable caus

with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
spline with fp=s has been reached. Probable cause: s too small.
(abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(f

with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)
with fp = s. Probable cause: s too small. (abs(fp-s)/s>0.001)


(array([   0., 1000., 2000., 3000., 4000., 5000., 6000., 7000., 8000.]),
 <a list of 9 Text yticklabel objects>)

In [70]:
plt.figure()
plt.violinplot([rperfs, sfperfs, svperfs])
plt.xticks([1,2,3],['Real','Shuffle between Fields','Shuffle within Fields'])
plt.title(clust)

Text(0.5,1,'TT12\\cl-maze1.7')