In [5]:
# CREATED: 6-SEP-2023
# LAST EDIT: 20-SEP-2023
# AUTHOR: DUANE RINEHART, MBA (drinehart@ucsd.edu)

import os, sys, math, pynwb, re, glob
from pathlib import Path, PurePath
import argparse
import pandas as pd
from pandas_ods_reader import read_ods
from datetime import datetime
from dateutil.tz import tzlocal
from scipy.io import loadmat
import pytz

from pynwb import NWBHDF5IO, NWBFile
from pynwb.image import ImageSeries

import inspect
src_file_path = inspect.getfile(lambda: None)
parent = Path(src_file_path).parents[1] #2 levels up
sys.path.append(parent)
print(f'USING PARENT PATH REFERENCES FOR IMPORTS: {parent}')

sys.path.insert(1, 'lib')
#sys.path.insert(1, 'converters')

import lib.utils
import lib.behavior
#from converters.ConvertIntanToNWB import convert_to_nwb


def load_data(input_file: str) -> pd.DataFrame:
    '''
    READS DATA SOURCE SPREADSHEET INTO PANDAS DATAFRAME
    SUPPORTS EXCEL (.xlsx) OR OPEN DOCUMENT FORMAT [OPENOFFICE, LIBREOFFICE] (.ods)

    :param input_file: str
    :return pd.DataFrame
    '''

    lstNWBFields = ['session_id', 'subject_id', 'subject_description', 'sex', 'species', 'subject_strain', 'session_description', 'src_folder_directory', 'session_start_time', 'institution', 'stimulus_notes', 'experimenters', 'experiment_description', 'identifier', 'video_sampling_rate', 'device_name', 'device_description', 'device_manufacturer']
    file_extension = Path(input_file).suffix
    if file_extension == '.xlsx':
        df = pd.read_excel(input_file)
    else: #EXPECTED .ods FILE
        df = read_ods(input_file)
        
    matched_fields = []
    matched_fields = lstNWBFields
    filtered_df = df[matched_fields]
    filtered_df = filtered_df[filtered_df.notnull().all(1)].drop_duplicates() #CAPTURE ALL ROWS IN DATAFRAME WHERE VALUE IS NOT NULL (THERE IS A VALUE); THEN GET UNIQUE VALUES
    
    return filtered_df


def get_subject(age, subject_description, genotype, sex, species, subject_id, subject_weight, date_of_birth, subject_strain):
    '''Used for meta-data '''

    try:
        if math.isnan(age):
            subject_age = ''
    except:
        if re.search("^P*D$", age):  # STARTS WITH 'P' AND ENDS WITH 'D' (CORRECT FORMATTING)
            subject_age = age
        else:
            if isinstance(age, str) != True:  # POSSIBLE int, FORMAT FOR ISO 8601
                subject_age = "P" + str(int(age)) + "D"  # ISO 8601 Duration format - assumes 'days'
            else:
                subject_age = "P0D"  # generic default
            
    if date_of_birth != '':
        dob = date_of_birth.to_pydatetime() #convert pandas timestamp to python datetime format
        if isinstance(dob.year, int) and isinstance(dob.month, int) and isinstance(dob.day, int) == True:
            date_of_birth = datetime(dob.year, dob.month, dob.day, tzinfo=tzlocal())
        else:
            date_of_birth = None

    subject = pynwb.file.Subject(age=subject_age,
                             description=subject_description,
                             genotype=str(genotype),
                             sex=sex,
                             species=species,
                             subject_id=subject_id,
                             weight=str(subject_weight),
                             #date_of_birth=date_of_birth,
                             strain=subject_strain
                            )
    return subject


##################################################################################
#META-DATA
input_path = Path('Z:/', 'U19', 'Deschenes_Group', 'Grimace ammonia', 'Figure 3 Awake-rat measurement')
input_file = Path(input_path, 'Link Between Movie Labchart file and Matlab.ods')
##################################################################################

##################################################################################
#MOVIES
input_movie_path = Path('Z:/', 'U19', 'Deschenes_Group', 'Grimace ammonia', 'Figure 3 Awake-rat measurement')
#input_file = Path(input_path, 'Link Between Movie Labchart file and Matlab.ods')
##################################################################################

##################################################################################
output_path = Path('Z:/', 'U19', 'Deschenes_Group', 'Grimace ammonia', 'output')
##################################################################################

lstRecords = load_data(input_file)

for index, dataset in lstRecords.iterrows():
    print(f"PROCESSING DATASET #{index}")
    print(f"\tsession_id: {dataset['session_id']}")
    
    session_id = dataset['session_id']
    subject_description = dataset['subject_description']
    
    age = ''
    genotype = ''
    if dataset['sex'] == 'Male' or dataset['sex'] == 'M':
        sex = 'M'
    elif dataset['sex'] == 'Female' or dataset['sex'] == 'F' :
        sex = 'F'
    else:
        sex = 'U'  # unknown
    species = dataset['species']
    subject_id = dataset['subject_id']
    subject_weight = ''
    date_of_birth = ''
    subject_strain = ''
    
    try: 
        institution
    except NameError: 
        institution = dataset['institution']
        
        
    ##################################################################################
    # CREATE EXPERIMENTAL SUBJECT OBJECT
    subject = get_subject(age,
                          subject_description,
                          genotype,
                          sex,
                          species,
                          subject_id,
                          subject_weight,
                          date_of_birth,
                          subject_strain)
    ##################################################################################
    
    #capture experiment date and add time zone info (required for NWB)
    session_start_time = dataset['session_start_time']
    session_start_time = datetime.strptime(datetime.strptime(session_start_time, '%Y-%m-%dT%H:%M:%S').strftime('%Y-%m-%d'), '%Y-%m-%d')
    session_start_time = pytz.utc.localize(session_start_time) #adds timezone info
        
    nwbfile = pynwb.NWBFile(session_description = dataset['session_description'],
                                    identifier = str(dataset['identifier']),
                                    session_start_time = session_start_time,
                                    experiment_description = dataset['experiment_description'],
                                    # keywords = keywords,
                                    # surgery=surgery,
                                    # pharmacology=pharmacology,
                                    stimulus_notes=dataset['stimulus_notes'],
                                    experimenter = dataset['experimenters'],
                                    institution = institution,
                                    # subject=subject,
                                    # notes=notes
                                    )
    
    output_filename = str(dataset['session_id']) + str(dataset['identifier'] + '.nwb')
    dest_path = Path(output_path, output_filename)
        
    video_file_path = Path(str(input_movie_path), str(dataset['src_folder_directory']))
    
    print(f'\tINPUT FOLDER: {video_file_path}')
    print(f'\tOUTPUT FILE: {dest_path}')  
    
    glob_pattern = '*.avi'
    base_path_with_pattern = str(Path(video_file_path, glob_pattern))
    
    for video_file_path in glob.glob(base_path_with_pattern, recursive=False):
        print(f'\tINCLUDING/REFERENCING VIDEO FILE: {video_file_path}')
    
    video_sampling_rate = dataset['video_sampling_rate']
    
    relative_path_video_location_file = lib.behavior.get_video_reference_data(video_file_path, dest_path)
    print(f'RELATIVE PATH: {relative_path_video_location_file}')
    
    device = nwbfile.create_device(
        name=dataset['device_name'],
        description=dataset['device_description'],
        manufacturer=dataset['device_manufacturer']
    )
    # nwbfile.add_device(device)
    ##################################################################################
    # https://pynwb.readthedocs.io/en/stable/tutorials/domain/images.html
    # Note: This approach references the video files and does not include them in nwb file
    behavior_external_file = ImageSeries(
        name="IxmageSeries",
        external_file=[relative_path_video_location_file],
        description=dataset['session_description'],
        format="external",
        rate=float(video_sampling_rate),
        comments=""
    )
    nwbfile.add_acquisition(behavior_external_file)
    ################################################################################

    
    #WRITE NWB FILE TO STORAGE
    with pynwb.NWBHDF5IO(dest_path, 'w') as io:
        io.write(nwbfile)
        
# mat_data = loadmat(input_file)
# spike_data = mat_data['thissdata']['Spike']
# breathing_data = mat_data['thissdata']['Breathing']
#
# #print(mat_data['thissdata'])
# #print(spike_data)
# df = pd.DataFrame(spike_data)
# df

USING PARENT PATH REFERENCES FOR IMPORTS: C:\Users\duane\AppData\Local\Temp
PROCESSING DATASET #4
	session_id: 3.0
	INPUT FOLDER: Z:\U19\Deschenes_Group\Grimace ammonia\Figure 3 Awake-rat measurement\Movies\22september_rat332_20220922_144545
	OUTPUT FILE: Z:\U19\Deschenes_Group\Grimace ammonia\output\3.0sept_22_2022_Rat332_s1.nwb
	INCLUDING/REFERENCING VIDEO FILE: Z:\U19\Deschenes_Group\Grimace ammonia\Figure 3 Awake-rat measurement\Movies\22september_rat332_20220922_144545\22september_rat332_20220922_132641_20220922_144545.avi
	ATTEMPTING TO CREATE SYMBOLIC LINK
windows detected
	TESTING FILE IN DESTINATION TO SEE IF IDENTICAL TO SOURCE
	SKIPPING FILE COPY; FILE ALREADY MOVED TO DESTINATION
RELATIVE PATH: external_files\22september_rat332_20220922_132641_20220922_144545.avi
PROCESSING DATASET #5
	session_id: 4.0
	INPUT FOLDER: Z:\U19\Deschenes_Group\Grimace ammonia\Figure 3 Awake-rat measurement\Movies\22september_rat332_20220922_144545
	OUTPUT FILE: Z:\U19\Deschenes_Group\Grimace amm

KeyboardInterrupt: 