# Step1: Extract time from photos

## 1.1 extract Exif data from a photo's timestamp

In [1]:
from exiftool import ExifTool # only need the ExifTool Class within the exiftool package
from datetime import datetime, timedelta # convert time strings into timestamps

In [2]:
# extract metadata
# get_metadata() is an instance method, so it requires an actual instance (object) of ExifTool to be invoked. 
with ExifTool() as et: 
    metadata = et.get_metadata("../example-data/photos-sdcards/071224/bottom/IMG_0851.CR3")

In [3]:
# get year/month/date/ hours/minutes/seconds
datetime_original = metadata.get("EXIF:DateTimeOriginal")
datetime_original

'2024:07:03 07:55:23'

In [4]:
# get miliseconds
subsec_time_original = metadata.get("EXIF:SubSecTimeOriginal")
subsec_time_original

25

In [5]:
precise_timestamp = f"{datetime_original}.{subsec_time_original}"
precise_timestamp

'2024:07:03 07:55:23.25'

In [6]:
# convert time string to timestamp
datetime.strptime(precise_timestamp, '%Y:%m:%d %H:%M:%S.%f')

datetime.datetime(2024, 7, 3, 7, 55, 23, 250000)

def extract_time(photo_path):
    
    with ExifTool() as et: 
        metadata = et.get_metadata(photo_path)
        datetime_original = metadata.get("EXIF:DateTimeOriginal")
        #subsec_time_original = metadata.get("EXIF:SubSecTimeOriginal")
        #precise_timestamp = f"{datetime_original}.{subsec_time_original}"
        datetime_formatted = datetime.strptime(datetime_original, '%Y:%m:%d %H:%M:%S')
        
        
    # return datetime.strptime(precise_timestamp, '%Y:%m:%d %H:%M:%S.%f')
    return datetime_formatted

## 1.2 extract timestamp for all the photos within a folder

In [10]:
import os
import glob # to sort CR3 files

In [58]:
def extract_time_for_photos_in_one_camera(date_folder, camera):
    # Get camera folder
    camera_folder = os.path.join(date_folder, camera)

    # Use glob to directly get only .CR3 files, sort photos
    photo_dirs = sorted(glob.glob(os.path.join(camera_folder, "*.CR3")))
    
    # Initialize a list of tuple (photo_name, timestamp)
    timestamps = []
    
    # iterate through each photo, extract datetime, and store the results into a tuple (photo_name, timestamp)
    with ExifTool() as et: 
        for photo_dir in photo_dirs:
            # extract meta
            metadata = et.get_metadata(photo_dir)
            dt_orig = metadata.get("EXIF:DateTimeOriginal")
            #subsec_time_origi = metadata.get("EXIF:SubSecTimeOriginal")
            #precise_timestamp = f"{dt_orig}.{subsec_time_origi}"
            
            # format the precise timestamp
            #precise_formatted = datetime.strptime(precise_timestamp, '%Y:%m:%d %H:%M:%S.%f')
            dt_format = datetime.strptime(dt_orig, '%Y:%m:%d %H:%M:%S')
            
            # add to the list
            #timestamps.append((os.path.basename(photo_dir),precise_formatted))
            timestamps.append((os.path.basename(photo_dir),dt_format))

    return timestamps

In [34]:
timestamps = extract_time_for_photos_in_one_camera(date_folder="../example-data/photos-sdcards/071224", camera = "top")

In [35]:
timestamps

[('IMG_6953.CR3', datetime.datetime(2024, 7, 3, 8, 2, 25)),
 ('IMG_6954.CR3', datetime.datetime(2024, 7, 3, 8, 4, 4)),
 ('IMG_6955.CR3', datetime.datetime(2024, 7, 3, 8, 4, 37)),
 ('IMG_6956.CR3', datetime.datetime(2024, 7, 3, 8, 7, 26)),
 ('IMG_6958.CR3', datetime.datetime(2024, 7, 3, 8, 12, 35)),
 ('IMG_6959.CR3', datetime.datetime(2024, 7, 3, 8, 14, 7)),
 ('IMG_6960.CR3', datetime.datetime(2024, 7, 3, 8, 14, 9)),
 ('IMG_6961.CR3', datetime.datetime(2024, 7, 3, 8, 14, 11)),
 ('IMG_6962.CR3', datetime.datetime(2024, 7, 3, 8, 14, 12)),
 ('IMG_6963.CR3', datetime.datetime(2024, 7, 3, 8, 14, 14)),
 ('IMG_7009.CR3', datetime.datetime(2024, 7, 3, 8, 19)),
 ('IMG_7010.CR3', datetime.datetime(2024, 7, 3, 8, 19, 12)),
 ('IMG_7011.CR3', datetime.datetime(2024, 7, 3, 8, 23, 27)),
 ('IMG_7012.CR3', datetime.datetime(2024, 7, 3, 8, 24, 12)),
 ('IMG_7013.CR3', datetime.datetime(2024, 7, 3, 8, 24, 14)),
 ('IMG_7014.CR3', datetime.datetime(2024, 7, 3, 8, 24, 15)),
 ('IMG_7015.CR3', datetime.datetime

# Step2: create new folders to store images

In [14]:
import os

In [15]:
# Define the folder path
organized_photo_folder_path = "../example-data/photos-organized"

# Create the folder (if it doesn't exist)
os.makedirs(organized_photo_folder_path, exist_ok=True)

# Step3: Sort photos based on the timestamps

In [16]:
import shutil

colorprofiles_namelabels/ → Contains the earlier photos before the continuous sequence.\
bottom/ → Contains the continuous set of images that have very close timestamps.

In [59]:
# Function to group photos into sequences based on timestamp continuity
def group_photos_by_timestamp(timestamps, max_time_diff=timedelta(minutes=3)):
    
    # create an empty list to store the sorted photos 
    groups = []
    
    # put photos that have similar timestamps into the same group
    current_group = []

    # iterate through each photo
    for idx, (photo, timestamp) in enumerate(timestamps):
        # put the first photo into the first group
        if idx == 0:
            current_group.append(photo)
        # for the rest of the photos, check timestamp to decide if put this photo into the current group or a new group
        else:
            # get the timestamp of the previous photo
            prev_timestamp = timestamps[idx-1][1]

            # If time difference is within threshold, add to current group
            if timestamp - prev_timestamp <= max_time_diff:
                current_group.append(photo)
            else:
                groups.append(current_group)  # Store completed group
                current_group = [photo]  # Start new group

    # Append the last group if not empty
    if current_group:
        groups.append(current_group)

    return groups


In [36]:
groups = group_photos_by_timestamp(timestamps, max_time_diff=timedelta(minutes=3))

In [37]:
groups

[['IMG_6953.CR3', 'IMG_6954.CR3', 'IMG_6955.CR3', 'IMG_6956.CR3'],
 ['IMG_6958.CR3',
  'IMG_6959.CR3',
  'IMG_6960.CR3',
  'IMG_6961.CR3',
  'IMG_6962.CR3',
  'IMG_6963.CR3'],
 ['IMG_7009.CR3', 'IMG_7010.CR3'],
 ['IMG_7011.CR3',
  'IMG_7012.CR3',
  'IMG_7013.CR3',
  'IMG_7014.CR3',
  'IMG_7015.CR3',
  'IMG_7016.CR3',
  'IMG_7017.CR3',
  'IMG_7018.CR3']]

# Step 4: copy photos from sd card folders to new folders based on the sorted result

In [60]:
def sort_photos(groups, camera, date_folder, sorted_folder):
    # get the number of groups from the last function
    num_groups = len(groups)
    
    # I want to create a bunch of flower models to store photos from the same flower
    # In each flower model, there are three folders
    # cameras (bottom, middle or top)
    # For each date, there is a color profile folder:
    # which store photos that might contain color profiles or sample labels
    
    # set flower folder index (flower1, flower2, flower3 for different flowers)
    flower_idx = 1
    
    # set group index (from 0, 1, 2, to the num_groups)
    group_idx = 0
    
    # iterate each group in groups
    while group_idx < num_groups:
        # create a new flower folder with its two subfolders
        flower_folder = os.path.join(sorted_folder, f"flower{flower_idx}")
        colorprofiles_folder = os.path.join(sorted_folder, "colorprofiles_namelabels", f"flower{flower_idx}", camera)
        camera_folder = os.path.join(flower_folder, camera)
        
        # create if not exist
        os.makedirs(colorprofiles_folder, exist_ok=True)
        os.makedirs(camera_folder, exist_ok=True)
        
        # In each group, assign consecutive small groups (fewer than 6 images) to the colorprofile folder
        group = groups[group_idx]
        
        if len(group) < 6:
            # copy photo to the colorprofile folder
            for photo in group:
                photo_path = os.path.join(date_folder, camera, photo)
                shutil.copy(photo_path, colorprofiles_folder)

        # if the next group is large (6 or more images), assign it to the camera folder.
        else:
            # copy photo to the camera_folder
            for photo in group:
                photo_path = os.path.join(date_folder, camera, photo)
                shutil.copy(photo_path, camera_folder)
            
            # move to the next flower
            flower_idx += 1
            
        # move to the next group
        group_idx += 1

In [39]:
sort_photos(groups, camera = "top", date_folder = "../example-data/photos-sdcards/071224", 
                sorted_folder="../example-data/photos-organized/")

# Step 5: check bugs if each flower has three camera folders

In [64]:
os.listdir(sorted_folder)

['colorprofiles_namelabels', 'flower1', 'flower2']

In [75]:
def check_bugs(sorted_folder):
    
    # Create or open the bug.txt file to log missing folders
    with open(os.path.join(os.path.dirname(sorted_folder),'bug.txt'), 'a') as bug_file:
    
        subfolders = os.listdir(sorted_folder)
        
        # iterate each subfolder
        for subfolder in subfolders:
            subfolder_dir = os.path.join(sorted_folder, subfolder)

            # go to flower folder
            if subfolder.startswith("flower"):
                # Check for the presence of the "top", "middle", and "bottom" cameras
                missing_cameras = []
                required_cameras = ["top", "middle", "bottom"]

                # Check each required camera
                for camera in required_cameras:
                    if not os.path.isdir(os.path.join(subfolder_dir, camera)):
                        missing_cameras.append(camera)

                    # If any required subfolder is missing, log the issue in bug.txt
                    if missing_cameras:
                        bug_file.write(f"{subfolder_dir} is missing: {', '.join(missing_cameras)} camera\n. Please go to species's corresponding colorprofile_namelabels to check.\n")


# Test

three time differences
- within color profiles
- within the same flower between color profile and flowers
- between flowers

def test (timestamps, camera_time = timedelta(minutes=1), flower_time = timedelta(minutes=3)
          camera, date_folder, sorted_folder):
    
    # iterate through each photo
    for idx, (photo, timestamp) in enumerate(timestamps):
        # put the first photo into color profiles
        if idx == 0:
            # create a new flower folder
            flower_folder = os.path.join(sorted_folder, f"flower{flower_idx}")
            colorprofiles_folder = os.path.join(sorted_folder, "colorprofiles_namelabels", f"flower{flower_idx}", camera)
            #camera_folder = os.path.join(flower_folder, camera)

            # create if not exist
            os.makedirs(colorprofiles_folder, exist_ok=True)
            #os.makedirs(camera_folder, exist_ok=True)
            
            photo_path = os.path.join(date_folder, camera, photo)
            shutil.copy(photo_path, colorprofiles_folder)
        
        else:
            # check time differences, it time difference is smaller than 1 min
            # cr
            
            
            
            
            current_group.append(photo)
        # for the rest of the photos, check timestamp to decide if put this photo into the current group or a new group
        else:
            # get the timestamp of the previous photo
            prev_timestamp = timestamps[idx-1][1]

            # If time difference is within threshold, add to current group
            if timestamp - prev_timestamp <= max_time_diff:
                current_group.append(photo)
            else:
                groups.append(current_group)  # Store completed group
                current_group = [photo]  # Start new group

    # Append the last group if not empty
    if current_group:
        groups.append(current_group)

# Summary

In [62]:
sdcard_folder = "../example-data/photos-sdcards/"
dates = os.listdir(sdcard_folder)
cameras = ["bottom", "middle", "top"]
output_folder = "../example-data/photos-organized/"

In [74]:
for date in dates:
    date_folder = os.path.join(sdcard_folder, date)
    sorted_folder = os.path.join(output_folder, date)
    for camera in cameras:
        timestamps = extract_time_for_photos_in_one_camera(date_folder = date_folder, camera = camera)
        groups = group_photos_by_timestamp(timestamps, max_time_diff=timedelta(minutes=3))
        sort_photos(groups, camera = camera, date_folder = date_folder, 
                    sorted_folder=sorted_folder)
    # check bugs
    check_bugs(sorted_folder)