# Rebuild a Dataset from raw Data

### from a folder of like this:

```
raw/
    2023_04_20/
        TSeries-04202023-001/
        TSeries-04202023-003/
        TSeries-04202023-005/
        13-06-20/
        15-26-40/
        17-06-20/
    2023_04_21/
        TSeries-04212023-001/
        TSeries-04212023-003/
        TSeries-04212023-005/
        14-06-20/
        16-26-40/
        18-06-20/
```

### builds the NWB files under the following structure:

```
raw/
    sub-01/
        sub-01_ses-01-suffix.nwb
        sub-01_ses-02-suffix.nwb
        sub-01_ses-03-suffix.nwb
    sub-02/
        sub-02_ses-01-suffix.nwb
        sub-02_ses-02-suffix.nwb
```

In [None]:
import sys, os, json
import numpy as np
sys.path.append('../../..')
import physion

# Get the default `args` options for the `build_NWB` function

In [None]:
%run ~/work/physion/src/physion/assembling/build_NWB.py

## Modify some metadata for this dataset

In [None]:
# for general NWB file infos
args.surgery  = "headplate fixation, cranial window, viral injection" 
args.virus  = "AAV9 Syn-Flex-GCaMP6s-WPRE-SV40" 
args.genotype  = "SST-IRES-Cre"
args.species  = "Mus musculus"
# for calcium imaging
args.silent = False
args.with_raw_CaImaging = False

In [None]:

def extract_subject(datafolder):
    
    if os.path.isfile(os.path.join(datafolder, 'metadata.json')):
        with open(os.path.join(datafolder, 'metadata.json'),
                  'r', encoding='utf-8') as f:
            metadata = json.load(f)
    else:
        metadata = np.load(os.path.join(datafolder, 'metadata.npy'),
                           allow_pickle=True).item()
    return metadata['subject_props']['Subject-ID']

def extract_protocol(datafolder):
    if os.path.isfile(os.path.join(datafolder, 'metadata.json')):
        with open(os.path.join(datafolder, 'metadata.json'),
                  'r', encoding='utf-8') as f:
            metadata = json.load(f)
    else:
        metadata = np.load(os.path.join(datafolder, 'metadata.npy'),
                           allow_pickle=True).item()
    return metadata['protocol']

def rebuild_dataset(datafolder, 
                    iSubject_start=1,
                    filename_suffix='V1-ffGratingStim-2Prec'):

    dayfolders = np.sort([f for f in os.listdir(datafolder) if (len(f)==10) and (len(f.split('_'))==3)])

    i=0
    iSubject = iSubject_start-1
    subjects, renaming_dict = [], {}
    
    for day in dayfolders:
        times = np.sort([f for f in os.listdir(os.path.join(datafolder, day)) if (\
                                                 ('TSeries' not in f) and (len(f.split('-'))==3) )])
        # find the subject remapping
        for time in times:
            subject = extract_subject(os.path.join(datafolder, day, time))
            if subject not in subjects:
                iSubject +=1 
                renaming_dict[subject] = 'sub-%.2i' % iSubject
                subjects.append(subject)

    session_count = {s:0 for s in subjects}
    for day in dayfolders:
        TSeries = np.sort([f for f in os.listdir(os.path.join(datafolder, day)) if ('TSeries' in f)])
        times = np.sort([f for f in os.listdir(os.path.join(datafolder, day)) if (\
                                                 ('TSeries' not in f) and (len(f.split('-'))==3) )])
        for Ts, time in zip(TSeries, times):
            # subject and session info, renamed
            subject = extract_subject(os.path.join(datafolder, day, time))
            pathlib.Path(os.path.join(datafolder, renaming_dict[subject])).mkdir(exist_ok=True)
            session_count[subject] += 1
            session = 'ses-%.2i' % session_count[subject] 
            # build the NWB file
            args.filename = os.path.join(datafolder, renaming_dict[subject], 
                                         renaming_dict[subject]+'_'+session+'_'+filename_suffix+'.nwb')
            args.datafolder = os.path.join(datafolder, day, time)
            args.subject_id = renaming_dict[subject]
            print(i+1, ')', 'building', args.filename, ' [...]')
            protocol = extract_protocol(os.path.join(datafolder, day, time))

            #########################################
            #### some protocol specific curation ####
            #########################################
            
            if time=='13-30-47':
                args.max_episode = 119 # problem of synchro afterwards...(frozen pc)
            else:
                args.max_episode = -1
                
            if protocol=='GluN3-BlankFirst':
                args.indices_forced = [0,1,2]
                args.times_forced = [30,350,660]
                args.durations_forced = [250,250,250]
            elif protocol=='GluN3-BlankLast':
                args.indices_forced = [160,161,162]
                args.times_forced = [1000,1300,1600]
                args.durations_forced = [250,250,250]
            else:
                args.indices_forced = []
                args.times_forced = []
                args.durations_forced = []
            
            #########################################
            #### build the nwb file              ####
            #########################################
            build_NWB_func(args)
            
            #########################################
            #### add the calcium imaging         ####
            #########################################
            args.imaging = os.path.join(datafolder, day, Ts)
            args.nwb = args.filename
            append_to_NWB(args) 
            i+=1
            
datafolder = os.path.join(os.path.expanduser('~') ,'DATA', 'redo', 'Morabito-et-al-WT')
rebuild_dataset(datafolder,
                iSubject_start=1)


In [None]:
# we just change the genotype
args.genotype  = "SST-IRES-Cre X GluN1-Flox"
# and we rebuild form the datafodler
datafolder = os.path.join(os.path.expanduser('~') ,'DATA', 'redo', 'Morabito-et-al-KO')
rebuild_dataset(datafolder, iSubject_start=8)