<a href="https://colab.research.google.com/github/yqwang1/Computational_Neuro/blob/main/Allen_natural_images_population_code.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

To get the data, copy the file at https://drive.google.com/file/d/1jp1NRQuidfjRkwJUVd59pLmmzn8oPeC5/view?usp=sharing
 into your colab drive folder then run the next cell

In [None]:
!unzip siegle_791319847.zip


Archive:  siegle_791319847.zip
  inflating: All_images.npy          
  inflating: clusters.brainLocationAcronyms_ccf_2017.npy  
  inflating: frame_plus_one.spike_histograms.npy  
  inflating: stims_natural_scenes.frame.npy  
  inflating: stims_natural_scenes.intervals.npy  
  inflating: stims_natural_scenes.repeat.npy  
  inflating: stims_natural_scenes.spike_counts.npy  
  inflating: stims_natural_scenes.stimulus_block.npy  
  inflating: stims_natural_scenes.stimulus_condition_id.npy  
  inflating: units.csv               
  inflating: neurocode.py            


In [None]:
pip install rastermap

Collecting rastermap
  Downloading rastermap-1.0-py3-none-any.whl.metadata (20 kB)
Downloading rastermap-1.0-py3-none-any.whl (90 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.5/90.5 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rastermap
Successfully installed rastermap-1.0


In [None]:
import os
import warnings
import neurocode as nc
import numpy as np

import matplotlib.pyplot as plt

from rastermap import Rastermap


In [None]:
# load data
FileDir = r'.'
Experiment = ''

stims=nc.load_object(os.path.join(FileDir, Experiment, 'stims_natural_scenes'))
frame_plus_one = nc.load_object(os.path.join(FileDir, Experiment, 'frame_plus_one'))
clusters = nc.load_object(os.path.join(FileDir, Experiment, 'clusters'))



In [None]:
# frame_plus_one.spike_counts: size nFrames x nClusters x nRepeats
# Frame here means what stimulus was shown
# we use frame_plus_one because Allen use -1 to mean the blank frame
# sorts spike counts into 3d array

nStims = len(stims)
nClusters = len(clusters)
nFrames = len(frame_plus_one)
nRepeats = 50

frame_plus_one.spike_counts = np.histogramdd([stims.frame.repeat(nClusters)+1, np.tile(np.arange(nClusters),nStims),
                                              stims.repeat.repeat(nClusters)],
                    bins=[np.arange(nFrames+1), np.arange(nClusters+1), np.arange(nRepeats+1)],
                    weights=stims.spike_counts.ravel()
                  )[0]

In [None]:
# show stimuli
images = np.load('All_images.npy')
im0 = images[0,:,:]
fig, ax = plt.subplots(7,17,figsize=(34*1.174,14*.918), sharex=True, sharey=True, gridspec_kw={'wspace':.05, 'hspace':.05})
for i in np.arange(-1,118):
    plt.sca(ax.flat[i+1])
    if i==-1:
        plt.imshow(128*np.ones_like(im0), cmap='gray',vmin=0,vmax=255)
    else:
        im = images[i,:,:]
        plt.imshow(im, cmap='gray',vmin=0,vmax=255)
    plt.axis('off')

In [None]:
# visualize the mean responses of all cells to all stimuli with Rastermap
frame_plus_one.mean_spkcnt = frame_plus_one.spike_counts.mean(2)
model = Rastermap().fit(frame_plus_one.mean_spkcnt.T) # sort so correlated neurons are close
model2 = Rastermap().fit(model.X_embedding.T) # sort so correlated stimuli are close

plt.imshow(model2.X_embedding, vmin=0, vmax=3, cmap="gray_r")
plt.colorbar()
plt.xlabel('Cell')
plt.ylabel('Stimulus')

In [None]:
# Prepare data, split into training and testing sets

X = stims.spike_counts
y = stims.frame + 1  # because stims.frame starts from -1

train = stims.repeat<40
test = stims.repeat>=40

X_train = X[train,:]
X_test = X[test,:]

y_train = y[train]
y_test = y[test]

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Nearest-neighbor decoding accuracy: {accuracy}")

In [None]:
# Permutation test to see if prediction is significantly better than random
n_permutations = 999
permuted_accuracies = np.zeros(n_permutations)

for i in range(n_permutations):
    # Shuffle the test set labels (y_test)
    y_test_permuted = np.random.permutation(y_test)

    # Calculate accuracy on permuted test data (using original trained model)
    permuted_accuracies[i] = accuracy_score(y_test_permuted, y_pred)

# Calculate p-value (same as before)
p_value = (np.sum(permuted_accuracies >= accuracy) + 1) / (n_permutations + 1)

print(f"Permutation test p-value: {p_value}")

QUESTION: why is permuting the test set labels a better idea than permuting the training set labels?

In [None]:
from sklearn.linear_model import LogisticRegression

# Logistic regression is classical statistical method that can be thought of as
# a 1-layer neural network

# Create and train the logistic regression classifier
lr_classifier = LogisticRegression(solver='lbfgs', max_iter=1000)
lr_classifier.fit(X_train, y_train)

# Decode stimuli
y_pred = lr_classifier.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Logistic regression decoding accuracy: {accuracy}")

EXERCISE: Run a permutation test on the results of the LR classifier

In [None]:
# Permutation test (permuting test set)
n_permutations = 999
permuted_accuracies = np.zeros(n_permutations)

for i in range(n_permutations):
    # Shuffle the test set labels (y_test)
    y_test_permuted = np.random.permutation(y_test)

    # Calculate accuracy on permuted test data (using original trained model)
    permuted_accuracies[i] = accuracy_score(y_test_permuted, y_pred)

# Calculate p-value (same as before)
p_value = (np.sum(permuted_accuracies >= accuracy) + 1) / (n_permutations + 1)

print(f"Permutation test p-value: {p_value}")

EXERCISE: run the logistic regression classifier predicting from a random selection of 10 cells.  Is it still significant?

How few cells do you need to have before you lose signifiance?

HINT: run this multiple times.  why can you get a different p-value each time?


In [None]:
n_cells = 5 #
use_cells = np.random.choice(nClusters, size=n_cells, replace=False)

# Logistic regression is classical statistical method that can be thought of as
# a 1-layer neural network

# Create and train the logistic regression classifier
lr_classifier = LogisticRegression(solver='lbfgs', max_iter=1000)
lr_classifier.fit(X_train[:,use_cells], y_train)

# Decode stimuli
y_pred = lr_classifier.predict(X_test[:,use_cells])

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Logistic regression decoding accuracy: {accuracy}")

for i in range(n_permutations):
    # Shuffle the test set labels (y_test)
    y_test_permuted = np.random.permutation(y_test)

    # Calculate accuracy on permuted test data (using original trained model)
    permuted_accuracies[i] = accuracy_score(y_test_permuted, y_pred)

# Calculate p-value (same as before)
p_value = (np.sum(permuted_accuracies >= accuracy) + 1) / (n_permutations + 1)

print(f"Permutation test p-value: {p_value}")

In [None]:
# Now we will try a neural network classifier

from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score


# Create and train the backpropagation classifier (MLPClassifier)
bp_classifier = MLPClassifier(hidden_layer_sizes=(100,), activation='relu', solver='adam',
                             max_iter=1000, random_state=0)
bp_classifier.fit(X_train, y_train)

# Decode stimuli
y_pred = bp_classifier.predict(X_test)

# Evaluate performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Neural network decoding accuracy: {accuracy}")

EXERCISE: try running the neural network classifier with fewer cells. How well does it work?  Can you get it to be better than logistic regression?