In [1]:
!python -m pip install --upgrade pip --quiet
!pip install mindscope_utilities --upgrade --quiet
!pip install icecream --quiet

import pandas as pd
import numpy as np
from icecream import ic # icecream is an alternative to print useful for debugging code
# https://github.com/gruns/icecream
import os

[0m

In [2]:
# this part is for connecting with google drive and saving data there so we don't need to download again
# you might get a prompt asking you to authorize
from google.colab import drive
drive.mount('/content/gdrive')
# https://towardsdatascience.com/downloading-datasets-into-google-drive-via-google-colab-bcb1b30b0166

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [3]:
# copy the data to your own google drive - this whole folder:
# https://drive.google.com/drive/folders/18_Bin6vrtffX5iAim1KxCh1SJkvKYpM0?usp=sharing

etr_filenames = os.listdir('/content/gdrive/MyDrive/neuromatch/mean_cell_etr_data') # replace with path where you copied mean etr cell data from me
# each file in this folder is one random experiment, event triggered responses for all cells in that experiment for a total of 25 experiments and about 125 cells

In [4]:
all_etrs = []
for name in etr_filenames:
  etr = pd.read_csv(f'/content/gdrive/MyDrive/neuromatch/mean_cell_etr_data/{name}').drop(columns = ['Unnamed: 0'])
  all_etrs.append(etr)
all_etrs = pd.concat(all_etrs).reset_index(drop=True)

In [5]:
all_etrs

Unnamed: 0,ophys_experiment_id,cell_specimen_id,time,dff,ophys_session_id,ophys_container_id,mouse_id,cre_line,session_type,imaging_depth,experience_level
0,1081264129,1120091486,-3.00,-0.024646,1081012515,1079027842,546605,Sst-IRES-Cre,OPHYS_2_images_A_passive,229,Familiar
1,1081264129,1120091486,-2.98,-0.024209,1081012515,1079027842,546605,Sst-IRES-Cre,OPHYS_2_images_A_passive,229,Familiar
2,1081264129,1120091486,-2.96,-0.022281,1081012515,1079027842,546605,Sst-IRES-Cre,OPHYS_2_images_A_passive,229,Familiar
3,1081264129,1120091486,-2.94,-0.018495,1081012515,1079027842,546605,Sst-IRES-Cre,OPHYS_2_images_A_passive,229,Familiar
4,1081264129,1120091486,-2.92,-0.012587,1081012515,1079027842,546605,Sst-IRES-Cre,OPHYS_2_images_A_passive,229,Familiar
...,...,...,...,...,...,...,...,...,...,...,...
37620,945586431,1086619236,2.92,0.021127,944888114,1022731531,453991,Vip-IRES-Cre,OPHYS_3_images_A,225,Familiar
37621,945586431,1086619236,2.94,0.022124,944888114,1022731531,453991,Vip-IRES-Cre,OPHYS_3_images_A,225,Familiar
37622,945586431,1086619236,2.96,0.023012,944888114,1022731531,453991,Vip-IRES-Cre,OPHYS_3_images_A,225,Familiar
37623,945586431,1086619236,2.98,0.023498,944888114,1022731531,453991,Vip-IRES-Cre,OPHYS_3_images_A,225,Familiar


In [6]:
wide_etrs = pd.pivot(all_etrs[['ophys_experiment_id', 'cell_specimen_id', 'cre_line', 'experience_level', 'time', 'dff']],
         index = ['ophys_experiment_id', 'cell_specimen_id', 'cre_line', 'experience_level'],
         columns = 'time',
         values = 'dff').reset_index().drop(columns = ['ophys_experiment_id', 'cell_specimen_id'])
wide_etrs

time,cre_line,experience_level,-3.0,-2.98,-2.96,-2.94,-2.92,-2.9,-2.88,-2.86,...,2.8200000000000003,2.84,2.8600000000000003,2.88,2.9000000000000004,2.92,2.9400000000000004,2.96,2.9800000000000004,3.0
0,Vip-IRES-Cre,Novel 1,-0.083928,-0.082117,-0.074059,-0.055879,-0.026093,0.015488,0.063915,0.110883,...,-0.026403,-0.035437,-0.043578,-0.051179,-0.059871,-0.067221,-0.072654,-0.076958,-0.079442,-0.079739
1,Vip-IRES-Cre,Novel 1,-0.009587,-0.008460,-0.004084,0.004617,0.017525,0.034205,0.051518,0.066048,...,-0.019113,-0.020991,-0.022453,-0.023364,-0.023958,-0.023461,-0.022795,-0.022323,-0.021877,-0.021403
2,Vip-IRES-Cre,Novel 1,-0.044358,-0.043281,-0.038294,-0.027171,-0.007935,0.019344,0.050802,0.080198,...,-0.017904,-0.020069,-0.022871,-0.027302,-0.032668,-0.038312,-0.043111,-0.046742,-0.048420,-0.048953
3,Vip-IRES-Cre,Novel 1,-0.026337,-0.025343,-0.021658,-0.013720,-0.000437,0.017417,0.037148,0.056304,...,-0.008880,-0.011763,-0.014029,-0.016573,-0.019104,-0.021638,-0.023740,-0.025141,-0.025941,-0.026186
4,Vip-IRES-Cre,Novel 1,-0.016216,-0.015359,-0.012423,-0.006652,0.002333,0.014424,0.027083,0.038435,...,-0.007983,-0.009843,-0.011842,-0.013850,-0.015749,-0.017249,-0.018320,-0.019165,-0.019692,-0.019805
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120,Sst-IRES-Cre,Novel >1,-0.011811,-0.011732,-0.011038,-0.009358,-0.006679,-0.002804,0.001711,0.005866,...,-0.000027,-0.001472,-0.002777,-0.003869,-0.004930,-0.005745,-0.006396,-0.006832,-0.007132,-0.007167
121,Sst-IRES-Cre,Novel >1,-0.023547,-0.023410,-0.022553,-0.020198,-0.015408,-0.007874,0.002278,0.014155,...,0.013075,0.007597,0.002909,-0.001281,-0.004870,-0.007981,-0.010544,-0.012229,-0.013270,-0.013704
122,Sst-IRES-Cre,Novel >1,-0.017820,-0.017716,-0.016750,-0.014288,-0.009595,-0.002419,0.006767,0.016819,...,-0.003390,-0.005782,-0.007706,-0.009192,-0.010213,-0.011164,-0.011878,-0.012484,-0.012924,-0.013090
123,Sst-IRES-Cre,Novel >1,-0.024020,-0.023208,-0.019716,-0.012983,-0.003325,0.008519,0.020491,0.030227,...,-0.013762,-0.016383,-0.018364,-0.020226,-0.021916,-0.023385,-0.024724,-0.025785,-0.026349,-0.026532


In [8]:
# replacing experience level with just novel / familiar:
new_vals = [x.lower() if x == 'Familiar' else 'novel' for x in wide_etrs['experience_level']] # this trick is called list comprehension it is kind of a for loop in one line for lists
wide_etrs['experience_level'] = new_vals
wide_etrs.head(20) # if all are novel something is wrong, try to rerun everything from the beginning

time,cre_line,experience_level,-3.0,-2.98,-2.96,-2.94,-2.92,-2.9,-2.88,-2.86,...,2.8200000000000003,2.84,2.8600000000000003,2.88,2.9000000000000004,2.92,2.9400000000000004,2.96,2.9800000000000004,3.0
0,Vip-IRES-Cre,novel,-0.083928,-0.082117,-0.074059,-0.055879,-0.026093,0.015488,0.063915,0.110883,...,-0.026403,-0.035437,-0.043578,-0.051179,-0.059871,-0.067221,-0.072654,-0.076958,-0.079442,-0.079739
1,Vip-IRES-Cre,novel,-0.009587,-0.00846,-0.004084,0.004617,0.017525,0.034205,0.051518,0.066048,...,-0.019113,-0.020991,-0.022453,-0.023364,-0.023958,-0.023461,-0.022795,-0.022323,-0.021877,-0.021403
2,Vip-IRES-Cre,novel,-0.044358,-0.043281,-0.038294,-0.027171,-0.007935,0.019344,0.050802,0.080198,...,-0.017904,-0.020069,-0.022871,-0.027302,-0.032668,-0.038312,-0.043111,-0.046742,-0.04842,-0.048953
3,Vip-IRES-Cre,novel,-0.026337,-0.025343,-0.021658,-0.01372,-0.000437,0.017417,0.037148,0.056304,...,-0.00888,-0.011763,-0.014029,-0.016573,-0.019104,-0.021638,-0.02374,-0.025141,-0.025941,-0.026186
4,Vip-IRES-Cre,novel,-0.016216,-0.015359,-0.012423,-0.006652,0.002333,0.014424,0.027083,0.038435,...,-0.007983,-0.009843,-0.011842,-0.01385,-0.015749,-0.017249,-0.01832,-0.019165,-0.019692,-0.019805
5,Sst-IRES-Cre,familiar,-0.038428,-0.038008,-0.036131,-0.031397,-0.022822,-0.010479,0.004808,0.021834,...,-0.004231,-0.009645,-0.014305,-0.018472,-0.021807,-0.024443,-0.026274,-0.027543,-0.028241,-0.028438
6,Sst-IRES-Cre,familiar,-0.037555,-0.037229,-0.035603,-0.03218,-0.026323,-0.018015,-0.007693,0.003786,...,-0.008853,-0.012475,-0.015595,-0.018669,-0.021391,-0.023792,-0.025529,-0.026761,-0.027475,-0.027793
7,Sst-IRES-Cre,familiar,0.004075,0.004549,0.005734,0.007764,0.010187,0.013015,0.015688,0.018153,...,-0.002084,-0.001742,-0.001522,-0.001386,-0.001655,-0.001822,-0.001852,-0.00187,-0.001664,-0.001523
8,Sst-IRES-Cre,familiar,-0.013727,-0.013189,-0.010995,-0.006574,0.00014,0.009138,0.018523,0.026657,...,-0.001242,-0.00345,-0.005274,-0.007189,-0.0092,-0.010844,-0.012135,-0.013091,-0.013586,-0.013707
9,Sst-IRES-Cre,familiar,-0.118615,-0.117741,-0.112992,-0.101228,-0.080441,-0.048937,-0.009388,0.033699,...,-0.022044,-0.034592,-0.045794,-0.05603,-0.065276,-0.073983,-0.081038,-0.086088,-0.089018,-0.08999


In [10]:
x = wide_etrs.drop(columns = 'experience_level')
y = wide_etrs['experience_level']
x.shape

(125, 302)

In [13]:
x = pd.get_dummies(x) # this splits cre line into two columns with 0-1
# it is a very simplified approach to one hot encoding 
# one hot encoding is explained here at the beginning https://www.kaggle.com/code/dansbecker/using-categorical-data-with-one-hot-encoding/notebook
x.head()

Unnamed: 0,-3.0,-2.98,-2.96,-2.94,-2.92,-2.9,-2.88,-2.86,-2.84,-2.82,...,2.8600000000000003,2.88,2.9000000000000004,2.92,2.9400000000000004,2.96,2.9800000000000004,3.0,cre_line_Sst-IRES-Cre,cre_line_Vip-IRES-Cre
0,-0.083928,-0.082117,-0.074059,-0.055879,-0.026093,0.015488,0.063915,0.110883,0.149909,0.178006,...,-0.043578,-0.051179,-0.059871,-0.067221,-0.072654,-0.076958,-0.079442,-0.079739,0,1
1,-0.009587,-0.00846,-0.004084,0.004617,0.017525,0.034205,0.051518,0.066048,0.07598,0.080846,...,-0.022453,-0.023364,-0.023958,-0.023461,-0.022795,-0.022323,-0.021877,-0.021403,0,1
2,-0.044358,-0.043281,-0.038294,-0.027171,-0.007935,0.019344,0.050802,0.080198,0.104242,0.120103,...,-0.022871,-0.027302,-0.032668,-0.038312,-0.043111,-0.046742,-0.04842,-0.048953,0,1
3,-0.026337,-0.025343,-0.021658,-0.01372,-0.000437,0.017417,0.037148,0.056304,0.071407,0.08117,...,-0.014029,-0.016573,-0.019104,-0.021638,-0.02374,-0.025141,-0.025941,-0.026186,0,1
4,-0.016216,-0.015359,-0.012423,-0.006652,0.002333,0.014424,0.027083,0.038435,0.046435,0.050468,...,-0.011842,-0.01385,-0.015749,-0.017249,-0.01832,-0.019165,-0.019692,-0.019805,0,1
