In [15]:
#dataset preperation of complete frame

import os
import pandas as pd
from PIL import Image
import shutil

class DatasetPreprocessor:
    def __init__(self, csv_folder, base_frames_folder, output_csv, output_frames_folder):
        """
        Args:
            csv_folder (str): Path to the folder containing the CSV files.
            base_frames_folder (str): Path to the base folder containing subfolders for frames.
            output_csv (str): Path to save the combined CSV file.
            output_frames_folder (str): Path to save the renamed frames.
        """
        self.csv_folder = csv_folder
        self.base_frames_folder = base_frames_folder
        self.output_csv = output_csv
        self.output_frames_folder = output_frames_folder

        # Ensure the output frames folder exists
        os.makedirs(self.output_frames_folder, exist_ok=True)

    def combine_and_rename(self):
        all_data = []
        current_frame_number = 0

        # Get sorted list of CSV files
        # csv_files = sorted([f for f in os.listdir(self.csv_folder) if f.startswith("processed_v") and f.endswith(".csv")])
        csv_files = sorted([f for f in os.listdir(self.csv_folder) if f.startswith("v") and f.endswith(".csv")])

        for csv_file in csv_files:
            csv_path = os.path.join(self.csv_folder, csv_file)
            df = pd.read_csv(csv_path)

            # Extract folder name for frames (e.g., "v1" for "processed_v1.csv")
            # folder_name = csv_file.replace("processed_", "").replace(".csv", "")
            folder_name = csv_file.replace(".csv", "")
            frames_folder = os.path.join(self.base_frames_folder, f"{folder_name}")

            if not os.path.exists(frames_folder):
                print(f"Frames folder {frames_folder} does not exist. Skipping {csv_file}.")
                continue

            # Update frame numbers and rename frames
            for index, row in df.iterrows():
                old_frame_number = row['Frame']

                if pd.isna(old_frame_number):
                    print(f"Skipping row {index} in {csv_file}: 'frame' is NaN.")
                    continue

                try:
                    old_frame_number = int(old_frame_number)
                except ValueError:
                    print(f"Skipping row {index} in {csv_file}: Invalid frame number {old_frame_number}.")
                    continue

                new_frame_number = current_frame_number

                # Rename frame file
                old_frame_name = f"frame_{int(old_frame_number):04d}.jpg"
                new_frame_name = f"frame_{int(new_frame_number):04d}.jpg"
                old_frame_path = os.path.join(frames_folder, old_frame_name)
                new_frame_path = os.path.join(self.output_frames_folder, new_frame_name)

                if os.path.exists(old_frame_path):
                    shutil.copy(old_frame_path, new_frame_path)
                    print(f"Copied: {old_frame_path} -> {new_frame_path}")

                # Update frame number in the row
                row['Frame'] = new_frame_number
                all_data.append(row)

                current_frame_number += 1

        # Save the combined data to a new CSV
        combined_df = pd.DataFrame(all_data)
        combined_df.to_csv(self.output_csv, index=False)

# Example usage
if __name__ == "__main__":
    csv_folder = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output/new_dataset"
    base_frames_folder = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output"
    output_csv = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hybrid/combined.csv"
    output_frames_folder = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hybrid/frmaes"

    preprocessor = DatasetPreprocessor(csv_folder, base_frames_folder, output_csv, output_frames_folder)
    preprocessor.combine_and_rename()
    print(f"Combined CSV saved to {output_csv}")
    print(f"Renamed frames saved to {output_frames_folder}")


Copied: C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output\v1\frame_0000.jpg -> C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hybrid/frmaes\frame_0000.jpg
Copied: C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output\v1\frame_0001.jpg -> C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hybrid/frmaes\frame_0001.jpg
Copied: C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output\v1\frame_0002.jpg -> C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hybrid/frmaes\frame_0002.jpg
Copied: C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output\v1\frame_0003.jpg -> C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hybrid/frmaes\frame_0003.jpg
Copied: C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output\v1\frame_0004.jpg -> C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hybrid/frmaes\frame_0004.jpg
Copied: C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frame

In [28]:
# dataset preperation of screenshots

import os
import shutil
import pandas as pd

# Directories
csv_dir = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/handraisedataset/generalize"  # Directory containing CSV files
frame_dir1 = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hrdataset_yolo/train/handraise"  # First folder with frames
frame_dir2 = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/hrdataset_yolo/train/Not_handraise"  # Second folder with frames
output_frame_dir = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output/cropped_output/frames"  # New folder for copied frames
output_csv_path = "C:/OsamaEjaz/Qiyas_Gaze_Estimation/Wajahat_Yolo_keypoint/frames_output/cropped_output/ssdata.csv"  # Path for the new combined CSV

# Create output directory if it doesn't exist
os.makedirs(output_frame_dir, exist_ok=True)

# Initialize combined CSV data
combined_data = []

# Function to find a frame in both directories
def find_frame(frame_name):
    for directory in [frame_dir1, frame_dir2]:
        frame_path = os.path.join(directory, frame_name)
        # print(f"Checking: {frame_path}")
        if os.path.exists(frame_path):
            return frame_path
    return None

# Process each CSV file
csv_files = [f for f in os.listdir(csv_dir) if f.endswith(".csv")]
person_counters = {}  # To keep track of counts for each person

for csv_file in csv_files:
    csv_path = os.path.join(csv_dir, csv_file)
    df = pd.read_csv(csv_path)

    # Extract the prefix (e.g., v1, v2) from the CSV filename
    prefix = os.path.splitext(csv_file)[0]

    for _, row in df.iterrows():
        frame_number = row["frame"]  # Adjust column name if needed
        person = row["person"]  # Adjust column name if needed

        # Convert person to crop suffix (e.g., Person1 -> crop1)
        crop_suffix = f"crop{person[-1]}"
        frame_name = f"{prefix}_frame_{frame_number:04d}_{crop_suffix}.jpg"

        print(f"Searching for frame: {frame_name}")

        # Find and copy the frame
        frame_path = find_frame(frame_name)
        if frame_path:
            # Update person counter
            if person not in person_counters:
                person_counters[person] = 0
            person_counters[person] += 1

            # New frame name
            new_frame_name = f"{person.lower()}_{person_counters[person]}"
            new_frame_path = os.path.join(output_frame_dir, new_frame_name + os.path.splitext(frame_path)[1]+".jpg")

            # Copy the frame
            shutil.copy(frame_path, new_frame_path)

            # Append to combined data
            row_data = row.to_dict()
            row_data["frame"] = new_frame_name
            row_data["person"] = person
            combined_data.append(row_data)
            # combined_data.append({"frame": new_frame_name, "person": person})

        # else:
            # print(f"Frame not found: {frame_name}")
            # with open("missing_frames.log", "a") as log_file:
            #     log_file.write(f"{frame_name}\n")

# Save the combined CSV
combined_df = pd.DataFrame(combined_data)
combined_df.to_csv(output_csv_path, index=True)

print("Processing complete. Frames copied and combined CSV created.")


Searching for frame: v1_frame_0000_crop1.jpg
Searching for frame: v1_frame_0001_crop1.jpg
Searching for frame: v1_frame_0002_crop1.jpg
Searching for frame: v1_frame_0003_crop1.jpg
Searching for frame: v1_frame_0004_crop1.jpg
Searching for frame: v1_frame_0005_crop1.jpg
Searching for frame: v1_frame_0006_crop1.jpg
Searching for frame: v1_frame_0007_crop1.jpg
Searching for frame: v1_frame_0008_crop1.jpg
Searching for frame: v1_frame_0009_crop1.jpg
Searching for frame: v1_frame_0010_crop1.jpg
Searching for frame: v1_frame_0011_crop1.jpg
Searching for frame: v1_frame_0012_crop1.jpg
Searching for frame: v1_frame_0013_crop1.jpg
Searching for frame: v1_frame_0014_crop1.jpg
Searching for frame: v1_frame_0015_crop1.jpg
Searching for frame: v1_frame_0016_crop1.jpg
Searching for frame: v1_frame_0017_crop1.jpg
Searching for frame: v1_frame_0018_crop1.jpg
Searching for frame: v1_frame_0019_crop1.jpg
Searching for frame: v1_frame_0020_crop1.jpg
Searching for frame: v1_frame_0021_crop1.jpg
Searching 