In [None]:
# this notebook is for attempting to download meaningful chunks of the data for making comparisons without google colab crashing

In [None]:
!python -m pip install --upgrade pip --quiet
!pip install mindscope_utilities --upgrade --quiet
!pip install icecream --quiet

import pandas as pd
import numpy as np
from icecream import ic # icecream is an alternative to print useful for debugging code
# https://github.com/gruns/icecream
import matplotlib.pyplot as plt
import seaborn as sns
import os
sns.set_theme(style="darkgrid") # this just sets the plots theme (how plots look)
# https://www.python-graph-gallery.com/104-seaborn-themes
sns.set_palette('colorblind') # you can change the colors to something else if you want, I'm just honestly terrible at visual processing so I like this one
# https://www.codecademy.com/article/seaborn-design-ii

from allensdk.brain_observatory.behavior.behavior_project_cache import VisualBehaviorOphysProjectCache
import mindscope_utilities.visual_behavior_ophys as ophys

[0m

In [None]:
cache = VisualBehaviorOphysProjectCache.from_s3_cache(cache_dir='data')
session_table = cache.get_ophys_session_table()
experiment_table = cache.get_ophys_experiment_table()
experiment_table = experiment_table.reset_index()

# from the experiment table we only want those where project_code is VisualBehaviorMultiscope
multiscope_experiment_ids = experiment_table[experiment_table['project_code'] == 'VisualBehaviorMultiscope']['ophys_experiment_id'].tolist()
multiscope_experiment_table = experiment_table[experiment_table['ophys_experiment_id'].isin(multiscope_experiment_ids)]

# reminder: an experiment is a single imaging plane acquired in a single session
# https://allensdk.readthedocs.io/en/latest/visual_behavior_optical_physiology.html

In [None]:
# this part is for connecting with google drive and saving data there so we don't need to download again
# you might get a prompt asking you to authorize
from google.colab import drive
drive.mount('/content/gdrive')
# https://towardsdatascience.com/downloading-datasets-into-google-drive-via-google-colab-bcb1b30b0166
if not os.path.exists('/content/gdrive/MyDrive/neuromatch'):
  os.mkdir('/content/gdrive/MyDrive/neuromatch')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
len(multiscope_experiment_ids) # how many multiscope experiments we have

867

In [None]:
# narrowing down the data we need:
# focus on layer 2/3 ('imaging_depth' >=175) & ('imaging_depth' <= 330)
multiscope_experiment_table = multiscope_experiment_table[(multiscope_experiment_table['imaging_depth'] >=175) & (multiscope_experiment_table['imaging_depth'] <= 330)]
multiscope_experiment_table.shape

(438, 26)

In [None]:
# targeted_structure == 'VISp'
multiscope_experiment_table = multiscope_experiment_table[multiscope_experiment_table['targeted_structure'] == 'VISp']
multiscope_experiment_table.shape

(205, 26)

In [None]:
multiscope_experiment_table['mouse_id'].nunique()

22

In [None]:
multiscope_experiment_table['cre_line'].value_counts()

Slc17a7-IRES2-Cre    86
Sst-IRES-Cre         60
Vip-IRES-Cre         59
Name: cre_line, dtype: int64

In [None]:
multiscope_experiment_table['session_type'].value_counts()

OPHYS_4_images_B            42
OPHYS_1_images_A            37
OPHYS_3_images_A            36
OPHYS_6_images_B            32
OPHYS_2_images_A_passive    29
OPHYS_5_images_B_passive    28
OPHYS_2_images_B_passive     1
Name: session_type, dtype: int64

In [None]:
multiscope_experiment_table.columns

Index(['ophys_experiment_id', 'equipment_name', 'full_genotype', 'mouse_id',
       'reporter_line', 'driver_line', 'sex', 'age_in_days', 'cre_line',
       'indicator', 'session_number', 'prior_exposures_to_session_type',
       'prior_exposures_to_image_set', 'prior_exposures_to_omissions',
       'ophys_session_id', 'behavior_session_id', 'ophys_container_id',
       'project_code', 'imaging_depth', 'targeted_structure',
       'date_of_acquisition', 'session_type', 'experience_level', 'passive',
       'image_set', 'file_id'],
      dtype='object')

In [None]:
multiscope_experiment_table = multiscope_experiment_table[['ophys_experiment_id', 'ophys_session_id', 'ophys_container_id', 
                                                           'mouse_id', 'cre_line', 'session_type', 'imaging_depth', 'experience_level']]

In [None]:
multiscope_experiment_table.shape

(205, 8)

In [None]:
multiscope_experiment_table['imaging_depth'].unique()

array([225, 263, 267, 269, 177, 259, 219, 221, 211, 231, 217, 213, 300,
       306, 229, 298, 232, 175, 275, 285, 179, 270, 272, 251, 302, 227,
       278, 274, 276, 223, 215, 218, 273, 261, 230, 284, 233, 304, 292,
       235, 310, 280, 187, 183, 279, 320, 321, 228, 220, 322, 226, 315,
       181])

In [None]:
multiscope_experiment_table.groupby(['cre_line', 'experience_level', 'session_type'])['ophys_experiment_id'].count().reset_index()

Unnamed: 0,cre_line,experience_level,session_type,ophys_experiment_id
0,Slc17a7-IRES2-Cre,Familiar,OPHYS_1_images_A,14
1,Slc17a7-IRES2-Cre,Familiar,OPHYS_2_images_A_passive,14
2,Slc17a7-IRES2-Cre,Familiar,OPHYS_3_images_A,16
3,Slc17a7-IRES2-Cre,Novel 1,OPHYS_4_images_B,11
4,Slc17a7-IRES2-Cre,Novel >1,OPHYS_4_images_B,8
5,Slc17a7-IRES2-Cre,Novel >1,OPHYS_5_images_B_passive,12
6,Slc17a7-IRES2-Cre,Novel >1,OPHYS_6_images_B,11
7,Sst-IRES-Cre,Familiar,OPHYS_1_images_A,13
8,Sst-IRES-Cre,Familiar,OPHYS_2_images_A_passive,7
9,Sst-IRES-Cre,Familiar,OPHYS_3_images_A,10


In [None]:
multiscope_experiment_table.experience_level.unique()

array(['Familiar', 'Novel 1', 'Novel >1'], dtype=object)

In [None]:
multiscope_experiment_table.to_csv('/content/gdrive/MyDrive/neuromatch/multiscope_experiment_table_filtered.csv')

In [None]:
  multiscope_experiment_table[(multiscope_experiment_table['cre_line'] == 'Sst-IRES-Cre') &
                              (multiscope_experiment_table['experience_level'] == 'Familiar') &
                              (multiscope_experiment_table['session_type'] == 'OPHYS_2_images_A_passive')]['ophys_experiment_id'].tolist()

[953659743, 1081264133, 1081264129, 867410509, 977247468, 851093285, 857698006]

In [None]:
def download_data_chunk_to_google_drive(multiscope_experiment_table,
                                        cre_line : str,
                                        experience_level : str,
                                        session_type : str,
                                        data_type : str = 'neural_data'):
  
  """
  Function that downloads data and saves it to google drive.

    Args:
      cre_line (str) : one of 'Sst-IRES-Cre', 'Vip-IRES-Cre', 'Slc17a7-IRES2-Cre'
      experience_level (str) : one of 'Familiar', 'Novel 1', 'Novel >1'
      session_type (str): one of 'OPHYS_1_images_A', 'OPHYS_2_images_A_passive', 'OPHYS_3_images_A',
        'OPHYS_4_images_B', 'OPHYS_5_images_B_passive', 'OPHYS_6_images_B', 'OPHYS_2_images_B_passive'
      data_type (str): one of 'neural_data', 'stimulus_presentations', 'eye_tracking', 'trials'

    Returns:   
      nothing, saves data files to google drive
  """

  drive_folder = f'/content/gdrive/MyDrive/neuromatch/{cre_line}_{experience_level}_{session_type}'
  if not os.path.exists(drive_folder):
    os.mkdir(drive_folder)
  
  ic(cre_line, experience_level, session_type, data_type)
  selected_experiments_ids = \
  multiscope_experiment_table[(multiscope_experiment_table['cre_line'] == cre_line) &
                              (multiscope_experiment_table['experience_level'] == experience_level) &
                              (multiscope_experiment_table['session_type'] == session_type)]['ophys_experiment_id'].tolist()
  ic(selected_experiments_ids)

  metadata_keys = ['cre_line', 'imaging_depth', 'mouse_id', 'ophys_experiment_id', 'session_type', 'targeted_structure', 'ophys_session_id']

  if data_type == 'neural_data':
    try:
      all_neural_data = []
      for id in selected_experiments_ids:
        
        ic('EXPERIMENT', id)
        experiment = cache.get_behavior_ophys_experiment(id)

        neural_data = ophys.build_tidy_cell_df(experiment)
        for metadata_key in metadata_keys:
          neural_data[metadata_key] = experiment.metadata[metadata_key]
        all_neural_data.append(neural_data)
        ic(neural_data.shape)
        del neural_data

      all_neural_data = pd.concat(all_neural_data).reset_index()
      ic(all_neural_data.shape)
      all_neural_data.to_csv(f'{drive_folder}/neural_data.csv')
      del all_neural_data
    except:
      pass
    
  if data_type == 'stimulus_presentations':
    try:
      all_stimulus_presentations = []
      for id in selected_experiments_ids:
        
        ic('EXPERIMENT', id)
        experiment = cache.get_behavior_ophys_experiment(id)

        stimulus_presentations = experiment.stimulus_presentations.reset_index().drop(columns = ['image_set'])
        stimulus_presentations.insert(0, 'ophys_experiment_id', [id for i in range(stimulus_presentations.shape[0])])
        all_stimulus_presentations.append(stimulus_presentations)
        ic(stimulus_presentations.shape)
        del stimulus_presentations

      all_stimulus_presentations = pd.concat(all_stimulus_presentations).reset_index()
      ic(all_stimulus_presentations.shape)
      all_stimulus_presentations.to_csv(f'{drive_folder}/stimulus_presentations.csv')
      del all_stimulus_presentations
    except:
      pass

  if data_type == 'eye_tracking':
    try:
      all_eye_tracking = []
      for id in selected_experiments_ids:
        
        ic('EXPERIMENT', id)
        experiment = cache.get_behavior_ophys_experiment(id)

        eye_tracking = experiment.eye_tracking.reset_index()
        eye_tracking = eye_tracking[['frame', 'timestamps', 'pupil_area',
            'likely_blink']]
        eye_tracking.insert(0, 'ophys_experiment_id', [id for i in range(eye_tracking.shape[0])])
        all_eye_tracking.append(eye_tracking)
        ic(eye_tracking.shape)
        del eye_tracking
      
      all_eye_tracking = pd.concat(all_eye_tracking).reset_index()
      ic(all_eye_tracking.shape)
      all_eye_tracking.to_csv(f'{drive_folder}/eye_tracking.csv')
      del all_eye_tracking
    except:
      pass

  if data_type == 'trials':
    try:
      all_trials =[]
      for id in selected_experiments_ids:
        
        ic('EXPERIMENT', id)
        experiment = cache.get_behavior_ophys_experiment(id)

        trials = experiment.trials.reset_index()
        trials.insert(0, 'ophys_experiment_id', [id for i in range(trials.shape[0])])
        all_trials.append(trials)
        ic(trials.shape)
        del trials

      all_trials = pd.concat(all_trials).reset_index()
      ic(all_trials.shape)
      all_trials.to_csv(f'{drive_folder}/trials.csv')
      del all_trials
    except:
      pass

In [None]:
for d in ['neural_data', 'stimulus_presentations', 'eye_tracking', 'trials']:
  download_data_chunk_to_google_drive(multiscope_experiment_table, 
                                      cre_line = 'Vip-IRES-Cre', 
                                      experience_level = 'Familiar',
                                      session_type = 'OPHYS_2_images_A_passive', 
                                      data_type = d)

In [None]:
# for c in ['Sst-IRES-Cre', 'Vip-IRES-Cre', 'Slc17a7-IRES2-Cre']:
#   for e in ['Familiar', 'Novel 1', 'Novel >1']:
#     for s in ['OPHYS_1_images_A', 'OPHYS_2_images_A_passive', 'OPHYS_3_images_A', 'OPHYS_4_images_B', 'OPHYS_5_images_B_passive', 'OPHYS_6_images_B', 'OPHYS_2_images_B_passive']:
#       for d in ['neural_data', 'stimulus_presentations', 'eye_tracking', 'trials']:
#         download_data_chunk_to_google_drive(multiscope_experiment_table, cre_line = c, experience_level = e, session_type = s, data_type = d)