#  Intro

This notebook is to test the conversion from raw neural data to processed xarrays.

---
# Setup

In [2]:
#%% Imports
import pandas as pd

from popy.io_tools import load_behavior, load_neural_data, load_metadata, load_neural_metadata
from popy.behavior_data_tools import *
from popy.neural_data_tools import *
from popy.decoding.population_decoders import linear_decoding
from popy.plotting.plotting_tools import plot_keypoints


---
# Compare neural data to clement's

## Load infos

In [3]:
# Load summary of all data that he passed to me (behavior and neural)

behav_metadata = load_metadata()
new_session = [f'{monkey}_{session}' for monkey, session in zip(behav_metadata['monkey'], behav_metadata['session'])]
behav_metadata['session'] = new_session
behav_metadata

Unnamed: 0,monkey,recording_day,session,depth,session_of_day,block_len_valid,n_blocks,position_MCC,coords_MCC,position_LPFC,coords_LPFC,LPFC_subregion,MCC_spikes_exist,MCC_file_name,LPFC_spikes_exist,LPFC_file_name,behav_file_name,interrupted_trials,session_complete
0,ka,2020-05-23,ka_230520,7042,1,True,10.0,gm,"(3, 3)",ki,"(-1, -1)",vLPFC,True,spk_dataset_MCC_ka230520_001.txt,False,,ka230520_gmki_7042001.mat,1.0,True
1,ka,2020-06-05,ka_050620,6914,2,True,7.0,ej,"(5, 0)",jf,"(0, -4)",vLPFC,True,spk_dataset_MCC_ka050620_002.txt,True,spk_dataset_LPFC_ka050620_002.txt,ka050620_ejjf_6914002.mat,0.0,True
2,ka,2020-06-08,ka_080620,7621,3,True,9.0,fn,"(4, 4)",kh,"(-1, -2)",vLPFC,True,spk_dataset_MCC_ka080620_003.txt,True,spk_dataset_LPFC_ka080620_003.txt,ka080620_fnkh_7621003.mat,1.0,False
3,ka,2020-06-10,ka_100620,7639,1,True,10.0,el,"(5, 2)",lg,"(-2, -3)",vLPFC,True,spk_dataset_MCC_ka100620_001.txt,True,spk_dataset_LPFC_ka100620_001.txt,ka100620_ellg_7639001.mat,0.0,True
4,ka,2020-06-16,ka_160620,8565,1,True,10.0,go,"(3, 5)",lj,"(-2, 0)",vLPFC,True,spk_dataset_MCC_ka160620_001.txt,True,spk_dataset_LPFC_ka160620_001.txt,ka160620_golj_8565001.mat,1.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
124,po,2022-07-20,po_200722,5782,2,True,9.0,di,"(6, -1)",le,"(-2, -5)",vLPFC,True,spk_dataset_MCC_po200722_002.txt,True,spk_dataset_LPFC_po200722_002.txt,po200722_dile_5782002.mat,0.0,True
125,po,2022-07-28,po_280722,5869,2,True,11.0,gh,"(3, -2)",pg,"(-6, -3)",vLPFC,True,spk_dataset_MCC_po280722_002.txt,True,spk_dataset_LPFC_po280722_002.txt,po280722_ghpg_5869002.mat,1.0,True
126,po,2022-08-09,po_090822,5184,3,True,7.0,gp,"(3, 6)",ne,"(-4, -5)",vLPFC,True,spk_dataset_MCC_po090822_003.txt,True,spk_dataset_LPFC_po090822_003.txt,po090822_gpne_5184003.mat,0.0,True
127,po,2022-08-31,po_310822,5522,1,True,10.0,gj,"(3, 0)",ld,"(-2, -6)",vLPFC,True,spk_dataset_MCC_po310822_001.txt,True,spk_dataset_LPFC_po310822_001.txt,po310822_gjld_5522001.mat,2.0,True


In [4]:
# Lead the csv that includes all neurons that we processed 
neural_metadata_zsombi = load_neural_metadata()
# combine monkey and session to have {monkey}_{session} as unique identifier
new_session = [f'{monkey}_{session}' for monkey, session in zip(neural_metadata_zsombi['monkey'], neural_metadata_zsombi['session'])]
neural_metadata_zsombi['session'] = new_session
neural_metadata_zsombi

Unnamed: 0,monkey,session,area,subregion,channel,unit,unit_zs
0,ka,ka_010720,LPFC,dLPFC,4,8,LPFC_04_01
1,ka,ka_010720,LPFC,dLPFC,5,12,LPFC_05_01
2,ka,ka_010720,LPFC,dLPFC,5,68,LPFC_05_02
3,ka,ka_010720,LPFC,dLPFC,5,18,LPFC_05_03
4,ka,ka_010720,LPFC,dLPFC,6,74,LPFC_06_01
...,...,...,...,...,...,...,...
5083,po,po_310822,MCC,MCC,12,29,MCC_12_02
5084,po,po_310822,MCC,MCC,12,30,MCC_12_01
5085,po,po_310822,MCC,MCC,13,166,MCC_13_01
5086,po,po_310822,MCC,MCC,13,165,MCC_13_02


In [5]:
# load the same csv of neurons tha Clement analysed
floc = '/Users/zsombi/ZSOMBI/SBRI/PoPy/data/neural_summary_Clement.csv'
neural_metadata_clement = pd.read_csv(floc, sep=" ")

new_df = []
for i, row in neural_metadata_clement.iterrows():
    area = row['session'].split('_')[0]
    monkey, session = row['session'].split('_')[1][:2], row['session'].split('_')[1][2:]
    unit = row['unit']

    new_df.append({
        'monkey': monkey,
        'session': f'{monkey}_{session}',
        'area': area,
        'unit_id': unit,
    })

neural_metadata_clement = pd.DataFrame(new_df)
neural_metadata_clement

Unnamed: 0,monkey,session,area,unit_id
0,ka,ka_010720,LPFC,8
1,ka,ka_010720,LPFC,12
2,ka,ka_010720,LPFC,18
3,ka,ka_010720,LPFC,24
4,ka,ka_010720,LPFC,33
...,...,...,...,...
5655,po,po_310822,MCC,159
5656,po,po_310822,MCC,161
5657,po,po_310822,MCC,163
5658,po,po_310822,MCC,165


## Create stats

In [22]:
print('\nNumber of sessions & neurons per monkey in Clement data:')
for monkey, subdf in neural_metadata_clement.groupby('monkey'):
    print(f'\t{monkey}: # Sessions: {len(np.unique(subdf.session))}, # Neurons: {len(subdf)}')
print(f'\tboth: # Sessions: {len(np.unique(neural_metadata_clement.session))}, # Neurons: {len(neural_metadata_clement)}')

print('\nNumber of sessions per monkey that we received from Clement:')
for monkey, subdf in behav_metadata.groupby('monkey'):
    print(f'\tSessions {monkey}: {len(subdf)} in total; {len(subdf[subdf.block_len_valid == 0])} invalid, {len(subdf[subdf.block_len_valid == 1])} valid')
print(f'\tSessions both: {len(behav_metadata)} in total; {len(behav_metadata[behav_metadata.block_len_valid == 0])} invalid, {len(behav_metadata[behav_metadata.block_len_valid == 1])} valid')

print('\nNumber of sessions & neurons per monkey in neural data we analyse:')
for monkey, subdf in neural_metadata_zsombi.groupby('monkey'):
    print(f'\t{monkey}: # Sessions: {len(np.unique(subdf.session))}, # Neurons: {len(subdf)}')
print(f'\tboth: # Sessions: {len(np.unique(neural_metadata_zsombi.session))}, # Neurons: {len(neural_metadata_zsombi)}')

print('\nNumber of sessions & neurons per monkey in Clement data (if we consider only sessions that we have as well):')
neural_metadata_clement_temp = neural_metadata_clement[neural_metadata_clement.session.isin(neural_metadata_zsombi.session)]
for monkey, subdf in neural_metadata_clement_temp.groupby('monkey'):
    print(f'\t{monkey}: # Sessions: {len(np.unique(subdf.session))}, # Neurons: {len(subdf)}')
print(f'\tboth: # Sessions: {len(np.unique(neural_metadata_clement_temp.session))}, # Neurons: {len(neural_metadata_clement_temp)}')

print('\nNumber of sessions & neurons per monkey in Zsombi data (if we consider only sessions that Cldment has as well):')
neural_metadata_zsombi_temp = neural_metadata_zsombi[neural_metadata_zsombi.session.isin(neural_metadata_clement.session)]
for monkey, subdf in neural_metadata_zsombi_temp.groupby('monkey'):
    print(f'\t{monkey}: # Sessions: {len(np.unique(subdf.session))}, # Neurons: {len(subdf)}')
print(f'\tboth: # Sessions: {len(np.unique(neural_metadata_zsombi_temp.session))}, # Neurons: {len(neural_metadata_zsombi_temp)}')



Number of sessions & neurons per monkey in Clement data:
	ka: # Sessions: 71, # Neurons: 3108
	po: # Sessions: 60, # Neurons: 2552
	both: # Sessions: 131, # Neurons: 5660

Number of sessions per monkey that we received from Clement:
	Sessions ka: 69 in total; 7 invalid, 62 valid
	Sessions po: 60 in total; 5 invalid, 55 valid
	Sessions both: 129 in total; 12 invalid, 117 valid

Number of sessions & neurons per monkey in neural data we analyse:
	ka: # Sessions: 62, # Neurons: 2715
	po: # Sessions: 55, # Neurons: 2373
	both: # Sessions: 117, # Neurons: 5088

Number of sessions & neurons per monkey in Clement data (if we consider only sessions that we have as well):
	ka: # Sessions: 61, # Neurons: 2704
	po: # Sessions: 55, # Neurons: 2351
	both: # Sessions: 116, # Neurons: 5055

Number of sessions & neurons per monkey in Zsombi data (if we consider only sessions that Cldment has as well):
	ka: # Sessions: 61, # Neurons: 2687
	po: # Sessions: 55, # Neurons: 2373
	both: # Sessions: 116, # N

In [None]:
# which sessions are missing from session metadata (that are in neural metadata clement)
sessions_clement = set(np.unique(neural_metadata_clement.session))
sessions_zsombi = set(np.unique(behav_metadata.session))

print(f'\nNumber of sessions in neural metadata Clement: {len(sessions_clement)}')
print(f'Number of sessions in behavior metadata: {len(sessions_zsombi)}')

# sessions in clement but not in zsombi
missing_sessions = sessions_clement - sessions_zsombi
print(f'sessions in clement but not in zsombi: {missing_sessions}')

# sessions in zsombi but not in clement
missing_sessions = sessions_zsombi - sessions_clement
print(f'sessions in zsombi but not in clemet: {missing_sessions}')



Number of sessions in neural metadata Clement: 131
Number of sessions in behavior metadata: 129
sessions in clement but not in zsombi: {'ka_300721', 'ka_190722', 'ka_130820', 'ka_150322', 'ka_310522'}
sessions in zsombi but not in clemet: {'ka_220121', 'ka_290721', 'ka_120820'}


---
# Check one processed session file

In [8]:
monkey, session = 'ka', '010720'

neural_data = load_neural_data(monkey, session, hz=1000)  # load neural data (spikes)
#neural_data_100 = load_neural_data(monkey, session, hz=100)  # load neural data (spikes)

print(f'N units 1000Hz: {neural_data.unit.shape[0]}')

print(f'T min 1000Hz: {neural_data.time.min()}')

print(f'T max 1000Hz: {neural_data.time.max()}')

print(f'N_spikes 1000Hz: {neural_data.spike_trains.sum()}')

neural_data

N units 1000Hz: 38
T min 1000Hz: <xarray.DataArray 'time' ()>
array(11.354)
T max 1000Hz: <xarray.DataArray 'time' ()>
array(3615.047)
N_spikes 1000Hz: <xarray.DataArray 'spike_trains' ()>
array(832432.)
