In [None]:
import pandas as pd
import numpy as np

In [None]:
actual = r"D:\Wild deserts photos\concatenated_Dave.xlsx"

In [None]:
df = pd.read_excel(actual, dtype=str)
df['Time'] = pd.to_timedelta(df['Time']) 
df['Date'] = pd.to_datetime(df['Date'])  
df["datetime_converted"] = df["Date"]+df["Time"]

In [None]:
from pathlib import Path
import pandas as pd

root_dir = Path(r"D:\Wild deserts photos\2024_12_WCAM_originals\WCAM01\100RECNX\night_no_fences")

all_dataframes = []

# Recursively search for all .csv files under root_dir
for csv_file in root_dir.rglob("*.csv"):
    # Read the CSV into a DataFrame
    df = pd.read_csv(csv_file)
    
    # Extract information from the parent directory name
    parent_dir = csv_file.parent.name
    
    # Create a new column based on the directory name
    df["model_name"] = parent_dir
    
    # If you want the entire path (relative to root_dir), you could do:
    relative_path = csv_file.relative_to(root_dir)  # e.g., "subfolder/another_subfolder/data.csv"
    # df["relative_path"] = str(relative_path.parent)  # e.g., "subfolder/another_subfolder"

    # If you need multiple levels of folders in separate columns, for example:
    level_one_folder = relative_path.parts[0]  
    df["Location"] = level_one_folder

    # Store the modified DataFrame
    all_dataframes.append(df)

# Concatenate all DataFrames into one (optional)
if all_dataframes:
    master_df = pd.concat(all_dataframes, ignore_index=True)
    print(master_df)
    # Optionally save to a new CSV
    # master_df.to_csv("combined_data.csv", index=False)
else:
    print("No CSV files found.")


In [None]:
master_df["datetime_converted"] = pd.to_datetime(master_df["time"], format="%Y:%m:%d %H:%M:%S")
master_df["Date"] = master_df["datetime_converted"].dt.date
master_df["Time"] = master_df["datetime_converted"].dt.time


This code joins the two dfs together to compare. 

In [None]:

df = df[
    (df["datetime_converted"] > master_df["datetime_converted"].min()) &
    (df["datetime_converted"] < master_df["datetime_converted"].max())
]
joined_df = pd.merge(master_df, df, on = "datetime_converted", how = "right")
cleaned = joined_df[[
    "species", 
    "confidence",
    "Species", 
    "path_original", 
    "datetime_converted", 
    "Location_x", 
    "Location_y", 
    "model_name"
]]
cleaned = cleaned.replace("Red Kangaroo", "Kangaroo")
not_correct = cleaned.loc[~(cleaned["Species"]==cleaned["species"])]
night = not_correct.loc[not_correct["model_name"]=="night_0402"]


In [None]:

master_df = master_df[(master_df["confidence"]!="none")]


In [None]:
master_df['confidence'] = master_df["confidence"].astype(float)

In [None]:
master_df = master_df[(master_df["confidence"]>0.8)]


Extract only images within a 30sec timeframe

In [None]:
images_sorted = master_df.sort_values(by=['datetime_converted'])
selected_images = []
current_anchor = None
records = images_sorted.to_dict(orient="records")  
time_window_sec = 600

In [None]:
for img in records:
    print(img)
    if current_anchor is None:
        # First image in the list becomes our anchor
        current_anchor = img
        continue
    
    time_diff = (img['datetime_converted'] - current_anchor['datetime_converted']).total_seconds()
    
    if time_diff <= time_window_sec:
        # If within the window, pick the higher-confidence image
        if img['confidence'] > current_anchor['confidence']:
            current_anchor = img
    else:
        # Time window exceeded => finalize current_anchor & start a new one
        selected_images.append(current_anchor)
        current_anchor = img
    
    # Don’t forget to append the last anchor if it exists
    

In [None]:
for img in selected_images:
    print(img['datetime_converted'], img['confidence'], img['species'])

In [None]:
selected_images_df = pd.DataFrame(selected_images)

In [None]:

df = df[
    (df["datetime_converted"] > master_df["datetime_converted"].min()) &
    (df["datetime_converted"] < master_df["datetime_converted"].max())
]
joined_df = pd.merge(selected_images_df, df, on = "datetime_converted", how = "outer")
cleaned = joined_df[[
    "species", 
    "confidence",
    "Species", 
    "path_original", 
    "datetime_converted", 
    "Location_x", 
    "Location_y", 
    "model_name"
]]
cleaned = cleaned.replace("Red Kangaroo", "Kangaroo")
not_correct = cleaned.loc[~(cleaned["Species"]==cleaned["species"])]
night = not_correct.loc[not_correct["model_name"]=="night_0402"]


In [None]:
test = pd.merge_asof(selected_images_df, df.sort_values(by=['datetime_converted']), on = "datetime_converted", direction='nearest')

In [None]:
cleaned_test = test[[
    "species", 
    "confidence",
    "Species", 
    "path_original", 
    "datetime_converted", 
    "Location_x", 
    "Location_y", 
    "model_name",
    "datetime_converted_x",
    "datetime_converted_y"
]]

In [None]:
selected_images_df.to_csv("december_ai.csv")