# Photometry Preprocessing Notebook
1. import packages
2. load functions
3. process your folders!

### 1. import packages

In [1]:
import pandas as pd
import numpy as np
import os
import pyarrow
from pyarrow import csv
import shutil

### 2. load functions (run both function definitions)

In [5]:
def remove_consecutive(input_list, limit=5):
    result = [input_list[0]] 

    for value in input_list[1:]:
        if abs(value - result[-1]) > limit:
            result.append(value)

    return result

def process_one_folder(folder, method, trialtype):
    dcol = ['D' + str(i) for i in range(128)]

    rewardtrials = []
    ititrials = []
    puntrials = []

    
    if trialtype == 'rt':
        rewardtrials = [x for x in range(1, 38)]
    
    elif trialtype == 'rn':
        rewardtrials = [0, 2, 3, 4, 6, 8, 10, 12, 14, 15, 16, 18, 20, 22, 24, 26, 27, 28, 29, 30, 32, 34, 36, 38, 40, 42, 44, 46, 47, 48, 50]
        ititrials = [1, 5, 7, 9, 11, 13, 17, 19, 21, 23, 25, 31, 33, 35, 37, 39, 41, 43, 45, 49]
        
        rewardtrials = [x + 1 for x in rewardtrials]
        ititrials = [x + 1 for x in ititrials]
    
    elif trialtype == 'rnp':
        
        df2 = pd.read_csv(r"C:\Users\tsaivs\Downloads\Rew_Neutral_Pun_HEY1.csv", header=None)
        trials = df2.iloc[:,1]
        for i in range(0, len(df2.iloc[:,1])):
            if (df2.iloc[:,1][i] == 'p'):
                puntrials.append(i + 1)
            elif (df2.iloc[:,1][i] == 'i'):
                ititrials.append(i + 1)
            elif (df2.iloc[:,1][i] == 't'):
                rewardtrials.append(i + 1)
    
    if method == 'guppy':
        for file in os.listdir(folder):
            cur_csv = os.path.join(folder,file)
            df = pd.read_csv(cur_csv)
            print(file)

            if file.startswith('TTL'):

                df['ttls'] = df[dcol].max(axis=1)

                newdf = pd.DataFrame()
                t = list(df['TIME'].loc[df['ttls'] == 1])
                if (len(t) == 0):
                    continue
                else:
                    newdf['timestamps'] = remove_consecutive(t, limit=8.5)
                    newdf = newdf.reset_index(drop=True)

                    rew = newdf.iloc[rewardtrials]
                    iti = newdf.iloc[ititrials]
                    pun = newdf.iloc[puntrials]

                    newdf.to_csv(os.path.join(folder,'TTL_guppy_processed_2.csv'), index=None)
                    rew.to_csv(os.path.join(folder, 'rew_TTL_guppy_processed.csv'), index=None)
                    iti.to_csv(os.path.join(folder, 'neutral_TTL_guppy_processed.csv'), index=None)
                    pun.to_csv(os.path.join(folder, 'pun_TTL_guppy_processed.csv'), index=None)
                
            elif file.startswith('405') or file.startswith('470'):
                hz =  1 / df['Sampling_Freq'][0] 
                newdf = pd.DataFrame({'data': df[dcol].to_numpy().flatten(), 'sampling_rate': df['Sampling_Freq'][0]})
                newdf['timestamps'] = newdf.index * hz
                newdf['sampling_rate'][1:] = None
                newdf = newdf[['timestamps', 'data' , 'sampling_rate']]
                newdf_ = pyarrow.Table.from_pandas(newdf)
                pyarrow.csv.write_csv(newdf_, os.path.join(folder, file.split('.')[0] + '_guppy_processed.csv') )
    
    elif method == 'pmat':
        df_405 = pd.read_csv(os.path.join(folder,'405.csv'))
        df_470 =  pd.read_csv(os.path.join(folder,'470.csv'))

        newdf = pd.DataFrame()
        newdf['TimeStamp'] = df_470['TIME'].copy()
        newdf['Signal'] = df_470[dcol].mean(axis=1)
        newdf['Control'] = df_405[dcol].mean(axis=1)

        newdf.to_csv(os.path.join(folder,'pmat_processed.csv'))
    
    elif method == 'pmatttl':
        df_ttl = pd.read_csv(os.path.join(folder,'TTL.csv'))
        df_405 = pd.read_csv(os.path.join(folder,'405.csv'))
        df_470 =  pd.read_csv(os.path.join(folder,'470.csv'))

        newdf = pd.DataFrame()
        newdf['TimeStamp'] = df_470['TIME'].copy()
        newdf['Signal'] = df_470[dcol].mean(axis=1)
        newdf['Control'] = df_405[dcol].mean(axis=1)
        newdf['ttls'] = df_ttl[dcol].max(axis=1)

        firstidx = np.where(newdf.ttls == 1)[0][0]

        newdf['TimeStamp'] -= newdf['TimeStamp'][firstidx]
        newdf = newdf[['TimeStamp', 'Signal', 'Control']][firstidx:].reset_index(drop=True)

        newdf.to_csv(os.path.join(folder,'pmat_ttl_processed.csv'))

In [4]:
def process_all_folders(folders, method, trialtype):
    
    for root, directories, files in os.walk(folders):
        for directory in directories:
            directory_path = os.path.join(root, directory)
            # if all three files exists
            required_strings = ('405', '470', 'TTL')
            if all(any(string in filename for filename in os.listdir(directory_path)) for string in required_strings):
                process_one_folder(directory_path, method, trialtype)
            
            print(f'Processed: {directory_path} with {method}')

### 3. processing
- there are two functions: `process_one_folder()` and `process_all_folders()`
- `process_one_folder()` will process the contents of a folder of raw data extracted from synapse
- `process_one_folder()` must be given 3 arguments: (**filepath**, **processing_method**, **trialtype**)
- **filepath**: path to your folder or directory of folders
- **processing_method**: either *'guppy'*, *'pmat'*, or *'pmatttl'*
-*'guppy'* formats for processing in GuPPy
- *'pmat'* formats for processing in PMAT,
-*'pmatttl'* formats for processing in pmat and aligns data to the first ttl
- **trialtype**: exclusive to guppy processing, specify if the trialtypes are reward only (*'rt'*), reward/neutral (*'rn'*), or reward/neutral/punishment (*'rnp'*)
        -folder must contain files named 405.csv, 470.csv, and TTL.csv
- `process_all_folders()` will iteratively process all folders and subfolders within a directory that can be processed with `process_one_folder()`

In [112]:
%%time
process_one_folder(r'C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1018_cohort\test\rew_neutral_punish\RNP_D3\99_3', 'guppy', 'rnp')

CPU times: total: 4.58 s
Wall time: 4.46 s


In [5]:
%%time
folder = r"C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass"

process_all_folders(folder, 'guppy', 'rnp')

Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD1_umass with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD2_umass with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD3_umass with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD1_umass\47_1 with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD1_umass\47_3 with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD1_umass\48_1 with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD1_umass\48_2 with guppy
Processed: C:\Users\Valerie\Documents\Tejeda_Lab\headfixed\1130_umass\Raw_Photom_Data\RTD2_umass\47_1 with guppy
Processed: C:\