# Various File Cleanup

### Sort .bag images by creation date
There was a bug in earlier versions of the bag conversion code. Remove any that were created before date of frame number fix, which was early 2024.

In [69]:
import os
import datetime
from pathlib import Path
import numpy as np

#function c/o chatgpt
def remove_old_files(directory_path,ymd):
    # Set the cutoff date (year, month, day)
    year,month,day = ymd
    cutoff_date = datetime.datetime(year, month, day)

    def get_creation_time(file_path):
        """Get the creation time of a file, works for both Windows and Unix."""
        if os.name == 'nt':  # Windows compatibility
            return datetime.datetime.fromtimestamp(os.path.getctime(file_path))
        else:  # Unix/Linux (we'll use this)
            stat = os.stat(file_path)
            try:
                return datetime.datetime.fromtimestamp(stat.st_birthtime)
            except AttributeError:
                # We're probably on Linux. No easy way to get creation dates here,
                # so we'll settle for when the file was last modified instead
                return datetime.datetime.fromtimestamp(stat.st_mtime)

    # Loop through files in the specified directory
    i=0
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        if Path(file_path).is_file():  # Check if it's a file
            creation_time = get_creation_time(file_path)
            if creation_time < cutoff_date:
                i+=1
                os.remove(file_path)  # Remove the file
                #print(f"Removed {file_path}")
    print(i)
    return()


# #test out fucntion on a single path
# #path = "/hmet_analysis/dd/book/0/rgb_aligned_depth/"  # Replace with the directory path you want to list files from
# path = "/hmet_analysis/dd/book/0/ximea_aligned_depth/"  # Replace with the directory path you want to list files from
# date = (2024,1,1)

# remove_old_files(path,date)

In [52]:
#run this for all subjects, all trials
subs = ['dd','ad','sm','bu']
basedirs = ['/hmet_analysis','/hmet_analysis_2']
tasks = ['book','movie','phone','computer','sandwich','movie','chat','laundry','indoor_stand','indoor_walk','catch','patio_stand','patio_walk','road_stand','road_walk']
cameras = ['ximea_aligned_depth','rgb_aligned_depth']
date = (2024,1,1)

for bd in basedirs:
    for sub in subs:
        for task in tasks:
            for camera in cameras:
                path = os.path.join(bd,sub,task,'0',camera)
                if os.path.isdir(path):
                    print(path)
                    remove_old_files(path,date)
                else:
                    print('not a path: ',path)
print('alldone!')

In [57]:
#some bonus ones (trial 1 instead of 0 because 0 failed):
extra_paths = ["/hmet_analysis_2/sm/sandwich/1/ximea_aligned_depth/",
               "/hmet_analysis_2/sm/sandwich/1/rgb_aligned_depth/",  
               "/hmet_analysis_2/ad/movie/1/ximea_aligned_depth/",
               "/hmet_analysis/ad/movie/1/rgb_aligned_depth/",
               "/hmet_analysis/bu/road_stand/1/ximea_aligned_depth/",  
               "/hmet_analysis/bu/road_stand/1/rgb_aligned_depth/"]
for path in extra_paths:
    print(path)
    remove_old_files(path,date)

/hmet_analysis_2/sm/sandwich/1/ximea_aligned_depth/
4662
/hmet_analysis_2/sm/sandwich/1/rgb_aligned_depth/
4662
/hmet_analysis_2/ad/movie/1/ximea_aligned_depth/
14612
/hmet_analysis/ad/movie/1/rgb_aligned_depth/
14612
/hmet_analysis/bu/road_stand/1/ximea_aligned_depth/
2
/hmet_analysis/bu/road_stand/1/rgb_aligned_depth/
18638


In [67]:
#do this for all calibrations:
subs = ['dd','ad','sm']
basedirs = ['/hmet_analysis','/hmet_analysis','/hmet_analysis_2']
#tasks = ['book','movie','phone','computer','sandwich','movie','chat','laundry','indoor_stand','indoor_walk','catch','patio_stand','patio_walk','road_stand','road_walk']
cameras = ['ximea_aligned_depth','rgb_aligned_depth']
date = (2024,1,1)

for i, sub in enumerate(subs):
    bd = basedirs[i]
    #calibration folders
    calib_folder = os.path.join(bd,sub,'calib')
    for calib_filename in os.listdir(calib_folder):
        for camera in cameras:
            path = os.path.join(calib_folder,calib_filename, camera)
            if os.path.isdir(path):
                print(path)
                remove_old_files(path,date)
            else:
                print('not a path: ',path)
print('alldone!')

not a path:  /hmet_analysis/dd/calib/2021_05_09_14/ximea_aligned_depth
not a path:  /hmet_analysis/dd/calib/2021_05_09_14/rgb_aligned_depth
not a path:  /hmet_analysis/dd/calib/2021_06_29_41/ximea_aligned_depth
not a path:  /hmet_analysis/dd/calib/2021_06_29_41/rgb_aligned_depth
not a path:  /hmet_analysis/dd/calib/2021_05_09_22/ximea_aligned_depth
not a path:  /hmet_analysis/dd/calib/2021_05_09_22/rgb_aligned_depth
/hmet_analysis/dd/calib/2021_06_29_23/ximea_aligned_depth
8664
/hmet_analysis/dd/calib/2021_06_29_23/rgb_aligned_depth
3136
/hmet_analysis/dd/calib/2021_06_29_25/ximea_aligned_depth
2488
/hmet_analysis/dd/calib/2021_06_29_25/rgb_aligned_depth
2484
/hmet_analysis/dd/calib/2021_06_29_4/ximea_aligned_depth
768
/hmet_analysis/dd/calib/2021_06_29_4/rgb_aligned_depth
230
not a path:  /hmet_analysis/dd/calib/2021_05_09_33/ximea_aligned_depth
not a path:  /hmet_analysis/dd/calib/2021_05_09_33/rgb_aligned_depth
not a path:  /hmet_analysis/dd/calib/2021_05_09_8/ximea_aligned_depth
no

### Now find the missing files for a given trial.
Despite many months of trying to address this, the bag conversion script still drops frames. However, we've now modified the script to run a single frame per bag file, so it now names the frames correctly, and we know which frames have been dropped.   

So, to fix this, we can run the bag alignment script the first time, and then run it again only on the dropped frames. Each time we run it we'll get ~90% of the remaining frames aligned. To do this we need a script to create a list of missing frames from the alignment.  

In [10]:
full_list = list(zip([1,2,3,4,5],['a','b','c','d','e']))

keep_idxes = [1,4]

keep_list = [full_list[i] for i in keep_idxes]
print(full_list)
keep_list

[(1, 'a'), (2, 'b'), (3, 'c'), (4, 'd'), (5, 'e')]


[(2, 'b'), (5, 'e')]

In [12]:
int('000016')

16

In [100]:
def find_largest_framenum(aligned_frames_folder):
    #take off the last part of the path and go to the pngs folder
    raw_depth_folder = os.path.join(os.path.dirname(aligned_frames_folder),'depth')
    max_frame = 0
    for f in os.listdir(raw_depth_folder):
        num = f.replace('depth_frame_','').replace('.npy','')
        max_frame = max(max_frame,int(num))
    print('max: ', max_frame)
    return(max_frame)
          
def list_missing_frames(aligned_frames_folder):
    total_frames = find_largest_framenum(aligned_frames_folder)
    missing_frames_list = []
    for i in range(total_frames):
        fname =os.path.join(aligned_frames_folder,f'depth_frame_{str(i).zfill(8)}.npy')
        #print(fname)
        if not os.path.isfile(fname):
            missing_frames_list.append(i)
    return(missing_frames_list)

trialpath = '/hmet_analysis/dd/book/0/rgb_aligned_depth'
#calibpath = '/hmet_analysis/dd/calib/2021_05_09_1/rgb_aligned_depth'

find_largest_framenum(trialpath)
#maxframe = find_largest_framenum()
  
missing_list = list_missing_frames(trialpath)
#print(missing_list)

max:  25999
max:  25999


Yay this works. Now We'll now incorporate it within our main analysis script, where the script for a single trial can run recursively until the missing frames list is empty! 



In [None]:
# #run this for all subjects, all trials
# subs = ['dd','ad','sm']
# basedirs = ['/hmet_analysis','/hmet_analysis','/hmet_analysis_2'] #these are paired to subjects
# tasks = ['book','movie','phone','computer','sandwich','movie','chat','laundry','indoor_stand','indoor_walk','catch','patio_stand','patio_walk','road_stand','road_walk']
# cameras = ['ximea_aligned_depth','rgb_aligned_depth']
# date = (2024,1,1)

# for i, sub in enumerate(subs):
#     bd = basedirs[i] #basedirs are paired to subjects
#     for task in tasks:
#         for camera in cameras:
#             path = os.path.join(bd,sub,task,'0',camera)
#             if os.path.isdir(path):
#                 print(path)
#                 list_missing_frames(path)
#             else:
#                 print('not a path: ', path)