In [0]:
from pathlib import Path
import pandas as pd
import numpy as np
from tqdm import tqdm_notebook as tqdm
from pathlib import Path
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
import scipy.io as sio
from scipy.sparse.linalg import eigsh

In [0]:
from google.colab import drive
drive.mount('/content/drive')

path = Path('drive/My Drive/Colab Notebooks/Neuroscience')
kind = 'normal'
filenames = [file.name for file in os.scandir(path/f'data/{kind}') if file.name.endswith('mat')]

In [0]:
def divide_space(path, filename, kind, denoising=True):
  mt = sio.loadmat(path/f'data/{kind}/{filename}')
  
  responses = mt['stim'][0]['resp'][0] # stim x neurons
  spont = mt['stim'][0]['spont'][0] # timepts x neurons
  istim = (mt['stim'][0]['istim'][0]).astype(np.int32) # stim ids 
  spatial = mt['med'] # spatial information

  if denoising:
    istim -= 1
    istim = istim[:,0]
    nimg = istim.max() # these are blank stims (exclude them)
    responses = responses[istim<nimg, :]
    istim = istim[istim<nimg]

    mu = spont.mean(axis=0) # get rid of spontaneous noise
    sd = spont.std(axis=0) + 1e-6
    responses = (responses - mu) / sd
    spont = (spont - mu) / sd
    sv,u = eigsh(spont.T @ spont, k=32)
    responses = responses - (responses @ u) @ u.T
    # mean center each neuron
    responses -= responses.mean(axis=0)

  classes = pd.read_csv(path/'class_map.csv')

  human_labels = classes.iloc[istim.flatten() - 1, -1].reset_index(drop=True).to_numpy().flatten()
  
  xs = spatial[:, 0].astype('int')
  ys = spatial[:, 1].astype('int')
  zs = spatial[:, 2].astype('int')

  bins = [(0+i*200, 200+i*200) for i in range(6)]
  subspaces = [(x, y, z) for x in bins for y in bins for z in np.unique(zs)]

  X_subspaces = len(set([subspace[0] for subspace in subspaces]))
  Y_subspaces = len(set([subspace[1] for subspace in subspaces]))
  Z_subspaces = len(set([subspace[2] for subspace in subspaces]))

  filename = path/f"data/{kind}/{''.join(filename.split('_')[1:3])}"

  data = np.zeros((len(responses), Z_subspaces, Y_subspaces, X_subspaces))
  for response in tqdm(range(len(responses))):
    current_X = 0
    current_Y = 0
    current_Z = 0

    for i, subspace in enumerate(subspaces):  
      x, y, z = subspace
      X_idx = (xs > x[0]) & (xs < x[1])
      Y_idx = (ys > y[0]) & (ys < y[1])
      Z_idx = zs == z
      if responses[response, X_idx & Y_idx & Z_idx].size == 0:
        subspace_feat = 0
      else: 
        subspace_feat = responses[response, X_idx & Y_idx & Z_idx].mean()
      
      data[response][current_Z][current_Y][current_X] = subspace_feat
      current_Z += 1
      if current_Z >= len(np.unique(zs)):
        current_Z = 0
        current_Y += 1
        if current_Y >= len(set([subspace[1] for subspace in subspaces])):
          current_Y = 0
          current_X += 1

  np.save(f"{filename}.npy", data)
  labels = np.array(human_labels)
  np.save(f"{filename}_labels.npy", labels)

In [0]:
divide_space(path, filenames[0], kind)

HBox(children=(IntProgress(value=0, max=6645), HTML(value='')))

In [0]:
for file in tqdm(filenames):
  divide_space(path, file, kind)

HBox(children=(IntProgress(value=0, max=7), HTML(value='')))

HBox(children=(IntProgress(value=0, max=6980), HTML(value='')))

HBox(children=(IntProgress(value=0, max=6956), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5880), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5426), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5658), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5880), HTML(value='')))

HBox(children=(IntProgress(value=0, max=5880), HTML(value='')))