In [56]:
import os
import re
from pathlib import Path

import numpy as np
from scipy.io import loadmat
import pandas as pd

DATASET_DIR = "/home/nero/datasets/CBD/"
OUTPUT_DIR  = "/home/nero/datasets/processed/"
OVERVIEW_PATH = "/home/nero/datasets/CBD/overview.csv"

def rem_extract(lfp, sleep_trans):
    """
    Extract REM sleep data from a LFP using sleep transition times.

    Parameters:
        lfp (numpy.ndarray): A NumPy array.
        sleep_trans (numpy.ndarray): A NumPy array containing pairs of sleep transition times.

    Returns:
        list of numpy.ndarray: A list of NumPy arrays, each representing a segment of REM sleep data.
    """
    rems = []

    for rem in sleep_trans:
        t1 = int(rem[0])
        t2 = int(rem[1])
        rems.append(lfp[t1:t2])

    return rems

def create_name(file, overview_df):
    #pattern for matching the information on the rat
    pattern = r'Rat(\d+)_.*_SD(\d+)_([A-Z]+).*posttrial(\d+)'

    # extract the information from the file path
    match = re.search(pattern, file)
    rat_num = int(match.group(1))
    sd_num = int(match.group(2))
    condition = str(match.group(3))
    posttrial_num = int(match.group(4))

    mask = (overview_df['Rat no.'] == rat_num) & (overview_df['Study Day'] == sd_num) & (overview_df['Condition'] == condition)

    # use boolean indexing to extract the Treatment value
    treatment_value = overview_df.loc[mask, 'Treatment'].values[0]
    
    # Extract the value from the "treatment" column of the matching row
    if treatment_value == 0:
        treatment = '0'
    else:
        treatment = '1'
       
    title_name = 'Rat' + str(rat_num) +'_' + 'SD' + str(sd_num) + '_' + condition + '_' + treatment + '_' + 'posttrial' + str(posttrial_num)
    #RatID,StudyDay,condition,conditionfull, treatment, treatmentfull, posstrial number

    return title_name

In [57]:
pattern = r"[\w-]+posttrial[\w-]+"
mapped = {}

for root, dirs, fils in os.walk(DATASET_DIR):
    for dir in dirs:
        # Check if the directory is a post trial directory
        if re.match(pattern, dir, flags=re.IGNORECASE):
            dir = Path(os.path.join(root, dir))
            HPC_file = next(dir.glob("*HPC*continuous*"))
            states = next(dir.glob('*-states*'))
            mapped[states] = HPC_file

len(mapped)

170

In [58]:
mapped

{PosixPath('/home/nero/datasets/CBD/Rat5/Rat_OS_Ephys_cbd_chronic_Rat5_411358_SD8_HC_20210721/2021-07-21_13-33-49_posttrial3/2021-07-21_13-33-49_posttrial3-states_ES.mat'): PosixPath('/home/nero/datasets/CBD/Rat5/Rat_OS_Ephys_cbd_chronic_Rat5_411358_SD8_HC_20210721/2021-07-21_13-33-49_posttrial3/HPC_100_CH14_0.continuous.mat'),
 PosixPath('/home/nero/datasets/CBD/Rat5/Rat_OS_Ephys_cbd_chronic_Rat5_411358_SD8_HC_20210721/2021-07-21_11-50-58_posttrial1/2021-07-21_11-50-58_posttrial1-states_ES.mat'): PosixPath('/home/nero/datasets/CBD/Rat5/Rat_OS_Ephys_cbd_chronic_Rat5_411358_SD8_HC_20210721/2021-07-21_11-50-58_posttrial1/HPC_100_CH14_0.continuous.mat'),
 PosixPath('/home/nero/datasets/CBD/Rat5/Rat_OS_Ephys_cbd_chronic_Rat5_411358_SD8_HC_20210721/2021-07-21_12-41-46_posttrial2/2021-07-21_12-41-46_posttrial2-states_ES.mat'): PosixPath('/home/nero/datasets/CBD/Rat5/Rat_OS_Ephys_cbd_chronic_Rat5_411358_SD8_HC_20210721/2021-07-21_12-41-46_posttrial2/HPC_100_CH14_0.continuous.mat'),
 PosixPath

In [61]:
overview_df = pd.read_csv(OVERVIEW_PATH)
for state in mapped.keys():
    hpc = mapped[state]
    lfp = loadmat(hpc)['HPC']
    lfp = np.squeeze(lfp)
    sleep = loadmat(state)
    states = np.squeeze(sleep['states'])
    transitions = sleep['transitions']

    if(np.any(states == 5)):
        sleep_transitions = transitions[transitions[:, 0] == 5][:, -2:]
        sleep_transitions = np.floor(sleep_transitions * 2500)
        lfpREM = rem_extract(lfp, sleep_transitions)
        title = create_name(str(hpc), overview_df)
        fname = OUTPUT_DIR + title
        np.savez(fname, *lfpREM)
        print(fname)

/home/nero/datasets/processed/Rat5_SD8_HC_0_posttrial3
/home/nero/datasets/processed/Rat5_SD8_HC_0_posttrial2
/home/nero/datasets/processed/Rat5_SD8_HC_0_posttrial4
/home/nero/datasets/processed/Rat5_SD8_HC_0_posttrial5
/home/nero/datasets/processed/Rat5_SD16_OR_1_posttrial5
/home/nero/datasets/processed/Rat5_SD16_OR_1_posttrial4
/home/nero/datasets/processed/Rat5_SD2_OR_0_posttrial4
/home/nero/datasets/processed/Rat5_SD2_OR_0_posttrial2
/home/nero/datasets/processed/Rat5_SD2_OR_0_posttrial5
/home/nero/datasets/processed/Rat5_SD2_OR_0_posttrial3
/home/nero/datasets/processed/Rat5_SD2_OR_0_posttrial1
/home/nero/datasets/processed/Rat5_SD1_OR_1_posttrial5
/home/nero/datasets/processed/Rat5_SD1_OR_1_posttrial3
/home/nero/datasets/processed/Rat5_SD1_OR_1_posttrial4
/home/nero/datasets/processed/Rat5_SD3_OD_0_posttrial4
/home/nero/datasets/processed/Rat5_SD3_OD_0_posttrial3
/home/nero/datasets/processed/Rat5_SD3_OD_0_posttrial5
/home/nero/datasets/processed/Rat5_SD14_OR_0_posttrial5
/home/n