### Annotation
Annotate each video and save data in the csv and rename the videos also,
Take input for each person hand in pocket and can also automate the annotation for one person if press a 

In [None]:
""" The script is used to annotate hand in pocket videos and generating a csv file with the keypoints and distances values. 
It take camera number and video number as input and name the csv file to that number with camera number and video number as cN_vN."""

from ultralytics import YOLO
import os
import cv2
import numpy as np
import csv
import json
import sys
import shutil

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

roi_data_list = []
frame_count = 0
saved_TP_frames = set()


def draw_lines(frame, keypoints, connections):
    for start_idx, end_idx in connections:
        if start_idx < len(keypoints) and end_idx < len(keypoints):
            x1, y1, conf1 = keypoints[start_idx]
            x2, y2, conf2 = keypoints[end_idx]
            if conf1 > 0.5 and conf2 > 0.5:
                cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 255), 2)

def assign_roi_index(x):
    for roi in roi_data_list:
        if roi["xmin"] <= x < roi["xmax"]:
            return roi["desk"]
    return -1

if __name__ == "__main__":
    model = YOLO("C:/wajahat/hand_in_pocket/bestv8-1.pt")
    input_dir = "C:/Users/LT/Downloads/TP_S2/TP_S2"
    # video_name = "c2_v4"
    output_dir = "C:/wajahat/hand_in_pocket/dataset/training3/tp_s2_w1"
    # json_path = "qiyas_multicam.camera_final.json"
    json_path = "qiyas_multicam_2.camera.json"

    os.makedirs(output_dir, exist_ok=True)

    video_files = [f for f in os.listdir(input_dir) if f.endswith(".mp4")]
    if not video_files:
        print("No video files found in the input directory.")
        exit()

    for video_file in video_files:
        video_name = os.path.splitext(video_file)[0]
        video_path = os.path.join(input_dir, video_file)
        print(f"Processing {video_name}...")
    
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print("Error: Could not open video.")
            exit()

        frame_width = 1280
        frame_height = 720
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)

        ret, frame = cap.read()
        if not ret:
            print("Error reading first frame.")
            exit()

        frame = cv2.resize(frame, (1280, 720))
        cv2.imshow("Select Camera View", frame)
        cv2.waitKey(1)

        with open(json_path, "r") as f:
            camera_config = json.load(f)

        skip_video = False
        while True:
            cam_id = input("Enter camera ID: ")
            if cam_id.lower() == 's':
                with open(f"{output_dir}/video_skip.csv", "a", newline='') as f:
                    f.write(f"skipped the video: {video_file} \n")
                skip_video = True
                cap.release()
                cv2.destroyWindow("Select Camera View")
                break
            camera_id_input = cam_id
            video_num = input("Enter video num:")
            camera_id = f"camera_{camera_id_input}"
            camera_data = next((cam for cam in camera_config if cam["_id"] == camera_id), None)
            if camera_data:
                break
            print(f"Invalid camera ID: {camera_id}. Please try again.")

        if skip_video:
            continue

        cv2.destroyWindow("Select Camera View")

        roi_data_list = list(camera_data["data"].values())
        roi_lookup = {roi["desk"]: roi for roi in roi_data_list}

        connections = [
            (0, 1), (0, 2), (0, 3),
            (1, 4), (1, 7),
            (4, 5), (5, 6),
            (7, 8), (8, 9)
        ]
        video_name = f"c{camera_id_input}_v{video_num}"
        csv_filename = os.path.join(output_dir, video_name + ".csv")

        keypoint_headers = [f"kp_{i}_x" for i in range(10)] + [f"kp_{i}_y" for i in range(10)] + [f"kp_{i}_conf" for i in range(10)]
        headers = ["frame", "person_idx", "position", "desk_no"] + keypoint_headers + ["hand_in_pocket"]

        csv_file = open(csv_filename, "w", newline="")
        csv_writer = csv.DictWriter(csv_file, fieldnames=headers)
        csv_writer.writeheader()

        all_frames_data = []
        frame_count = 0
        processed_videos= []

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.resize(frame, (1280, 720))
            results = model(frame, verbose=False)
            person_info_list = []

            for result in results:
                keypoints = result.keypoints
                if keypoints is not None:
                    keypoints_data = keypoints.data

                    temp_person_info = []

                    for person_keypoints in keypoints_data:
                        keypoint_list = []
                        row_data = {"frame": frame_count}

                        for kp_idx, kp in enumerate(person_keypoints):
                            x, y, conf = kp[0].item(), kp[1].item(), kp[2].item()
                            if conf < 0.5:
                                x, y = 0, 0
                            keypoint_list.append((x, y, conf))
                            row_data[f"kp_{kp_idx}_x"] = x
                            row_data[f"kp_{kp_idx}_y"] = y
                            row_data[f"kp_{kp_idx}_conf"] = conf
                            if conf > 0.5:
                                cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 0), -1)

                                draw_lines(frame, keypoint_list, connections)

                        if not keypoint_list:
                            continue

                        roi_x = keypoint_list[0][0]
                        roi_idx = assign_roi_index(roi_x)
                        roi_data = roi_lookup.get(roi_idx)

                        if roi_data is None:
                            print(f"⚠️ No ROI config for roi_idx {roi_idx}, skipping.")
                            continue

                        cv2.putText(frame, f"ROI: {roi_idx}", (int(roi_x), 50 + 30 * roi_idx), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                        
                        row_data["desk_no"] = roi_idx
                        row_data["position"] = roi_data["position"]

                        temp_person_info.append((roi_idx, row_data))


                    # Remap person_idx based on sorted roi
                    temp_person_info.sort(key=lambda x: x[0])
                    for new_idx, (_, row) in enumerate(temp_person_info):
                        row["person_idx"] = new_idx
                        person_info_list.append(row)

            all_frames_data.append((frame.copy(), person_info_list))

        cap.release()

        # Rearranged annotation loop by person across all frames
        max_persons = max(len(info) for _, info in all_frames_data)
        print(max_persons)

        frame_hand_labels = {}
        auto_labels = {}
        for person_idx in range(max_persons):
            print(f"\n\u25ba Now annotating for Person #{roi_idx} of video {video_name}across all frames.")

            for frame_num, (frame, person_list) in enumerate(all_frames_data):
                if person_idx >= len(person_list):
                    continue
                row_data = person_list[person_idx]
                frame_to_show = frame.copy()
                save_frame = frame_to_show.copy()

                cv2.imshow("frame", frame_to_show)
                cv2.waitKey(1)

                roi_idx = row_data["desk_no"]
                position = row_data["position"]
                prompt = f"Frame {frame_num} | ROI {roi_idx} (Position: {position}): Enter hand_in_pocket (0 or 1) [Default: 0]: "

                if roi_idx in auto_labels:
                    hand_in_pocket = auto_labels[roi_idx]
                    print(f"Auto label applird: ROI {roi_idx} -> {hand_in_pocket}")
                
                else:
                    while True:
                        hand_in_pocket = input(prompt).strip()
                        if hand_in_pocket.lower() == "a":
                            value = input(f"Enter value for ROI {roi_idx} (0 or 1): ").strip()
                            if value not in ["0", "1"]:
                                print("❌ Invalid value. Please enter 0 or 1.")
                                continue
                            auto_labels[roi_idx] = value
                            hand_in_pocket = value
                            print(f"Auto label set: ROI {roi_idx} -> {hand_in_pocket}")
                            break

                        elif hand_in_pocket in ["","0","1"]:
                            hand_in_pocket = hand_in_pocket or "0"
                            break

                        else:
                            print("❌ Invalid input. Please enter 0 or 1 or press Enter for default 0.")

                row_data["hand_in_pocket"] = hand_in_pocket
                csv_writer.writerow(row_data)
            
        new_video_path = os.path.join(input_dir, video_name + ".mp4")

        try:
            if os.path.exists(new_video_path):
                os.remove(new_video_path)

            shutil.copy2(video_path, new_video_path)
            print(f"Copied processed video to: {new_video_path}")

            try:
                if os.path.getsize(video_path) != os.path.getsize(new_video_path):
                    print("⚠️ Warning: copied file size differs from source.")
            except Exception:
                pass

            if os.path.exists(video_path):
                os.remove(video_path)
                print(f"Deleted original video: {video_path}")

        except Exception as e:
            print(f"Video copy/overwrite/delete failed: {e}")

    csv_file.close()
    cv2.destroyAllWindows()




### FP Annotation
Annotation of FP videos to assign automatically all data to 0, just need to enter the video num and it will create csv and rename the video

In [None]:
from ultralytics import YOLO
import os
import cv2
import numpy as np
import csv
import json
import shutil

os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

# Global variable to hold loaded ROI data
roi_data_list = []
frame_count = 0
saved_TP_frames = set()


def draw_lines(frame, keypoints, connections):
    for start_idx, end_idx in connections:
        if start_idx < len(keypoints) and end_idx < len(keypoints):
            x1, y1, conf1 = keypoints[start_idx]
            x2, y2, conf2 = keypoints[end_idx]
            if conf1 > 0.5 and conf2 > 0.5:
                cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 255), 2)

def assign_roi_index(x):
    for roi in roi_data_list:
        if roi["xmin"] <= x < roi["xmax"]:
            return roi["desk"]
    return -1

if __name__ == "__main__":
    model = YOLO("C:/wajahat/hand_in_pocket/bestv8-1.pt")
    input_dir = "C:/Users/LT/Downloads/fp/fp"
    # video_name = "c2_v4"
    output_dir = "C:/wajahat/hand_in_pocket/dataset/training3/fp_s2_w1"
    # json_path = "qiyas_multicam.camera_final.json"
    json_path = "qiyas_multicam_2.camera.json"

    os.makedirs(output_dir, exist_ok=True)

    video_files = [f for f in os.listdir(input_dir) if f.endswith(".mp4")]
    if not video_files:
        print("No video files found in the input directory.")
        exit()

    for video_file in video_files:
        video_name = os.path.splitext(video_file)[0]
        video_path = os.path.join(input_dir, video_file)
        print(f"Processing {video_name}...")
    
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print("Error: Could not open video.")
            exit()

        frame_width = 1280
        frame_height = 720
        cap.set(cv2.CAP_PROP_FRAME_WIDTH, frame_width)
        cap.set(cv2.CAP_PROP_FRAME_HEIGHT, frame_height)

        ret, frame = cap.read()
        if not ret:
            print("Error reading first frame.")
            exit()

        frame = cv2.resize(frame, (1280, 720))
        cv2.imshow("Select Camera View", frame)
        cv2.waitKey(1)

        with open(json_path, "r") as f:
            camera_config = json.load(f)

        while True:
            camera_id_input = input("Enter camera ID for this video (e.g., camera_1): ")
            video_num = input("Enter video name:")
            camera_id = f"camera_{camera_id_input}"
            camera_data = next((cam for cam in camera_config if cam["_id"] == camera_id), None)
            if camera_data:
                break
            print(f"Invalid camera ID: {camera_id}. Please try again.")

        cv2.destroyWindow("Select Camera View")

        roi_data_list = list(camera_data["data"].values())
        roi_lookup = {roi["desk"]: roi for roi in roi_data_list}

        connections = [
            (0, 1), (0, 2), (0, 3),
            (1, 4), (1, 7),
            (4, 5), (5, 6),
            (7, 8), (8, 9)
        ]
        video_name = f"c{camera_id_input}_v{video_num}"
        csv_filename = os.path.join(output_dir, video_name + ".csv")

        keypoint_headers = [f"kp_{i}_x" for i in range(10)] + [f"kp_{i}_y" for i in range(10)] + [f"kp_{i}_conf" for i in range(10)]
        headers = ["frame", "person_idx", "position", "desk_no"] + keypoint_headers + ["hand_in_pocket"]

        csv_file = open(csv_filename, "w", newline="")
        csv_writer = csv.DictWriter(csv_file, fieldnames=headers)
        csv_writer.writeheader()

        all_frames_data = []
        frame_count = 0

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame = cv2.resize(frame, (1280, 720))
            save_frame = frame.copy()
            results = model(frame, verbose=False)
            person_info_list = []

            for result in results:
                keypoints = result.keypoints
                if keypoints is not None:
                    keypoints_data = keypoints.data

                    temp_person_info = []

                    for person_keypoints in keypoints_data:
                        keypoint_list = []
                        row_data = {"frame": frame_count}

                        for kp_idx, kp in enumerate(person_keypoints):
                            x, y, conf = kp[0].item(), kp[1].item(), kp[2].item()
                            if conf < 0.5:
                                x, y = 0, 0
                            keypoint_list.append((x, y, conf))
                            row_data[f"kp_{kp_idx}_x"] = x
                            row_data[f"kp_{kp_idx}_y"] = y
                            row_data[f"kp_{kp_idx}_conf"] = conf
                            cv2.circle(frame, (int(x), int(y)), 5, (0, 255, 0), -1)

                        draw_lines(frame, keypoint_list, connections)

                        if not keypoint_list:
                            continue

                        roi_x = keypoint_list[0][0]
                        roi_idx = assign_roi_index(roi_x)
                        roi_data = roi_lookup.get(roi_idx)

                        if roi_data is None:
                            print(f"⚠️ No ROI config for roi_idx {roi_idx}, skipping.")
                            continue

                        cv2.putText(frame, f"ROI: {roi_idx}", (int(roi_x), 50 + 30 * roi_idx), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)
                        
                        row_data["desk_no"] = roi_idx
                        row_data["position"] = roi_data["position"]
                        temp_person_info.append((roi_idx, row_data))

                    temp_person_info.sort(key=lambda x: x[0])
                    for new_idx, (_, row) in enumerate(temp_person_info):
                        row["person_idx"] = new_idx
                        person_info_list.append(row)

            all_frames_data.append((frame.copy(), person_info_list))

        cap.release()

        max_persons = max(len(info) for _, info in all_frames_data)
        print(max_persons)

        frame_hand_labels = {}
        auto_labels = {}
        for person_idx in range(max_persons):
            roi_idx = row_data["desk_no"]
            print(f"\n\u25ba Now annotating for Person #{roi_idx} of video {video_name}across all frames.")

            for frame_num, (frame, person_list) in enumerate(all_frames_data):
                if person_idx >= len(person_list):
                    continue
                row_data = person_list[person_idx]
                frame_to_show = frame.copy()
                save_frame = frame_to_show.copy()

                cv2.imshow("frame", frame_to_show)
                cv2.waitKey(1)

                # ---- CHANGED: auto-assign hand_in_pocket = "0" (no prompt) ----
                hand_in_pocket = "0"

                row_data["hand_in_pocket"] = hand_in_pocket
                csv_writer.writerow(row_data)

        print(f"Annotation completed and saved {video_name} CSV.")

        new_video_path = os.path.join(input_dir, video_name + ".mp4")

        try:
            # Ensure the destination name doesn't block us
            if os.path.exists(new_video_path):
                os.remove(new_video_path)

            # Copy the processed source video (video_path) to the new name
            shutil.copy2(video_path, new_video_path)
            print(f"Copied processed video to: {new_video_path}")

            # Optional quick sanity check: same size after copy
            try:
                if os.path.getsize(video_path) != os.path.getsize(new_video_path):
                    print("⚠️ Warning: copied file size differs from source.")
            except Exception:
                pass

            # Now delete the original file
            if os.path.exists(video_path):
                os.remove(video_path)
                print(f"Deleted original video: {video_path}")

        except Exception as e:
            print(f"Video copy/overwrite/delete failed: {e}")

    csv_file.close()
    cv2.destroyAllWindows()


### Balanced TP csv

In [None]:
import os
import pandas as pd
from tkinter import Tk, filedialog, Frame
from pandastable import Table

# Paths
input_folder = "C:/wajahat/hand_in_pocket/dataset/split_keypoint"   
output_folder = "C:/wajahat/hand_in_pocket/dataset/training2/balanced/old_hp" 
os.makedirs(output_folder, exist_ok=True)

csv_files = [f for f in os.listdir(input_folder) if f.endswith(".csv")]

for csv_file in csv_files:
    file_path = os.path.join(input_folder, csv_file)
    print(f"\nOpening {csv_file}...\n")

    df = pd.read_csv(file_path)

    root = Tk()
    root.title(f"Editing {csv_file} - Close window when done")

    frame = Frame(root)
    frame.pack(fill="both", expand=True)

    table = Table(frame, dataframe=df, showtoolbar=True, showstatusbar=True)
    table.show()

    root.mainloop()

    updated_df = table.model.df

    save_path = os.path.join(output_folder, csv_file)
    updated_df.to_csv(save_path, index=False)
    print(f"Saved updated file to {save_path}")


### Combined the single separate csvs into one csv
set the window size, stride rate, and proportion of TP and TN combination

In [4]:
import pandas as pd
import os
import glob

csv= "fp_s1_t4"
input_folder = "C:/Users/LT/Downloads/Final_balanced/FP_csv_room1"
output_dir = "C:/wajahat/hand_in_pocket/dataset/training4/"
os.makedirs(output_dir, exist_ok=True)  # Create output directory if it doesn't exist
output_file = f'{csv}_combine.csv'
output_file = os.path.join(output_dir, output_file)

window_size = 5  # Size of the rolling window
stride = 1
target_column = 'hand_in_pocket'

columns_to_drop = ['person_idx']
meta_columns = ['frame','desk_no']
special_column = 'position'


all_temporal_rows= []
feature_cols = None

csv_files = glob.glob(os.path.join(input_folder, '*.csv'))

for file in csv_files:
    
    df = pd.read_csv(file)

    if df.empty or len(df) < window_size:
        print(f"Skipping empty or too short file: {file}")
        continue

    drop_cols = [col for col in df.columns if '_conf' in col or col.startswith("distance(")]
    df.drop(columns=drop_cols + columns_to_drop, inplace=True, errors="ignore")

    if feature_cols is None:
        feature_cols = [col for col in df.columns 
                        if col not in meta_columns + [special_column, target_column]]
        assert len(feature_cols) * window_size > 0 

    file_name = os.path.basename(file)

    for desk_no, desk_group in df.groupby("desk_no"):
        desk_group = desk_group.reset_index(drop=True)

        if len(desk_group) < window_size:
            print(f"skipping desk {desk_no} in {file_name} due to less rows")
            continue

        for i in range(0, len(desk_group) - window_size + 1, stride):
            window = desk_group.iloc[i:i + window_size]

            if len(window) < window_size:
                continue

            feature = window[feature_cols].values.flatten()

            frame_val = window[meta_columns[0]].iloc[0]
            desk_val = window[meta_columns[1]].iloc[0]

            position_val = window[special_column].iloc[0] if special_column in window.columns else None

            labels_counts = window[target_column].value_counts()
            label = 1 if labels_counts.get(1,0) >= 1 else 0  # for the logic if there is 1 in the window, leabel is 1

            all_temporal_rows.append([file_name, frame_val, desk_val] + feature.tolist() + [position_val, label])

temporal_features_cols = [
    f"{col}_t{t}" for t in range(window_size) for col in feature_cols
]

output_columns = ['source_file'] + meta_columns + temporal_features_cols + [special_column, target_column]

temporal_df = pd.DataFrame(all_temporal_rows, columns=output_columns)
temporal_df.to_csv(output_file, index=False)

print("✅ Temporal features CSV saved")

✅ Temporal features CSV saved


Change the position values according to the json stored values
update the position length accordingly

In [13]:
import pandas as pd
import json
import re

# === File paths ===
csv = "tp_s2_t4"
# input_csv = f"C:/wajahat/hand_in_pocket/dataset/training2/window4/seq2/{csv}.csv"
input_csv = f"C:/wajahat/hand_in_pocket/dataset/training4/{csv}_combine.csv"
output_dir = "C:/wajahat/hand_in_pocket/dataset/training4/"
csv_name = f'{csv}_pos.csv'
output_csv = f"{output_dir}/{csv_name}"
# json_file = "C:/wajahat/hand_in_pocket/qiyas_multicam.camera_final.json"
json_file = "qiyas_multicam_2.camera.json"

position_length = 1

# === Load data ===
df = pd.read_csv(input_csv)
with open(json_file, 'r') as f:
    camera_data = json.load(f)

# === Build camera map from JSON ===
camera_map = {}
for cam in camera_data:
    if '_id' in cam:
        match = re.search(r'camera_(\d+)', cam['_id'])
        if match:
            cam_id = int(match.group(1))
            camera_map[cam_id] = cam

if position_length == 4:
    # === Process rows for temporal feature ===
    processed_rows = []

    for idx, row in df.iterrows():
        try:
            # Extract and validate position values
            pos_vals = [row[f'position_t{i}'] for i in range(5)]
            if len(set(pos_vals)) != 1:
                # print(f"Row {idx} skipped — Position values not uniform: {pos_vals}")
                continue  # Skip if position values differ

            position_val = pos_vals[0]

            # Extract camera number from source_file
            source_file = row['source_file']
            match = re.search(r'c(\d+)_v\d+', source_file)
            if not match:
                continue  # Skip if camera number not found

            cam_id = int(match.group(1))
            cam_info = camera_map.get(cam_id)
            if not cam_info:
                continue

            # Look for matching position in camera JSON data
            matched_entry = None
            for entry in cam_info['data'].values():
                if entry.get('position') == position_val:
                    matched_entry = entry
                    break

            if not matched_entry or 'position_list' not in matched_entry:
                continue

            position_list = matched_entry['position_list']
            if len(position_list) != 4:
                continue  # Skip if not exactly 4 values

            # Build new row with replaced columns
            new_row = row.drop(labels=[f'position_t{i}' for i in range(5)]).to_dict()
            new_row['position_a'], new_row['position_b'], new_row['position_c'], new_row['position_d'] = position_list
            processed_rows.append(new_row)

        except Exception as e:
            print(f"⚠️ Skipping row {idx} due to error: {e}")


    # === processing rows for single position ===

elif position_length == 1:
    processed_rows = []

    for idx, row in df.iterrows():
        try:
            position_val = row['position']

            source_file = row['source_file']
            match = re.search(r'c(\d+)_v\d+', source_file)
            # print(f"match {match}")
            if not match:
                # print(f"match not found {match}")
                continue

            cam_id = int(match.group(1))
            cam_info = camera_map.get(cam_id)
            # print("cam_info", cam_info['data'].values())
            if not cam_info:
                # print(f"cam info not found {cam_id}")
                continue

            matched_entry = None
            for entry in cam_info['data'].values():
                if entry.get('position') == position_val:
                    matched_entry = entry
                    break

            if not matched_entry or 'position_list' not in matched_entry:
                # print(f"matched entry not found {matched_entry}")
                continue

            position_list = matched_entry['position_list']
            if len(position_list) != 4:
                # print(f"position list not found {position_list}")
                continue

            new_row = row.drop(labels=['position']).to_dict()
            new_row['position_a'], new_row['position_b'], new_row['position_c'], new_row['position_d'] = position_list
            processed_rows.append(new_row)
            # print("processed_rows: ",processed_rows)

        except Exception as e:
            print(f"⚠️ Skipping row {idx} due to error: {e}")

    # === Final DataFrame and column ordering ===

else:
    print("Undefined position length")
    
if processed_rows:
    new_df = pd.DataFrame(processed_rows)

    new_df["camera"] = new_df["source_file"].str.extract(r'^(c\d+)_')
    new_df["video"] = new_df["source_file"].str.extract(r'_(v\d+)')

    # Drop source_file
    if "source_file" in new_df.columns:
        new_df = new_df.drop(columns=["source_file"])

    col_order = ["camera", "video"] + [c for c in new_df.columns if c not in ["camera", "video"]]
    new_df = new_df[col_order]

    # Reorder: insert position_a-d before hand_in_pocket
    if 'hand_in_pocket' in new_df.columns:
        cols = list(new_df.columns)
        insert_at = cols.index('hand_in_pocket')
        for col in ['position_a', 'position_b', 'position_c', 'position_d']:
            if col in cols:
                cols.remove(col)
        cols = cols[:insert_at] + ['position_a', 'position_b', 'position_c', 'position_d'] + cols[insert_at:]
        new_df = new_df[cols]

    new_df.to_csv(output_csv, index=False)
    print(f"\n✅ Output saved to: {output_csv}")
else:
    print("⚠️ No valid rows processed.")



✅ Output saved to: C:/wajahat/hand_in_pocket/dataset/training4//tp_s2_t4_pos.csv


### FInal CSV
Rearrange the columns and combine all csvs into one for the training 

In [14]:
import pandas as pd
import os 

csv1 = "C:/wajahat/hand_in_pocket/dataset/training3/old_hp_combine_pos.csv"
csv2 = "C:/wajahat/hand_in_pocket/dataset/training3/fp_combine_pos.csv"
csv3 = "C:/wajahat/hand_in_pocket/dataset/training3/moiz_fp_combine_pos.csv"
csv4 = "C:/wajahat/hand_in_pocket/dataset/training3/tp_combine_pos.csv"
csv5 = "C:/wajahat/hand_in_pocket/dataset/training3/fn_combine_pos.csv"
csv6 = "C:/wajahat/hand_in_pocket/dataset/training3/mudassir_hp_combine_pos.csv"
csv7 = "C:/wajahat/hand_in_pocket/dataset/training3/fp_s1_w1_pos.csv"
csv8 = "C:/wajahat/hand_in_pocket/dataset/training3/fp_s1_w2_pos.csv"
csv9 = "C:/wajahat/hand_in_pocket/dataset/training3/fp_s2_w1_pos.csv"
csv10 = "C:/wajahat/hand_in_pocket/dataset/training3/fp_s2_w2_pos.csv"
csv11 = "C:/wajahat/hand_in_pocket/dataset/training3/missing_s1_pos.csv"
csv12 = "C:/wajahat/hand_in_pocket/dataset/training3/missing_s2_pos.csv"
csv13 = "C:/wajahat/hand_in_pocket/dataset/training3/tp_s1_w1_pos.csv"
csv14 = "C:/wajahat/hand_in_pocket/dataset/training3/tp_s1_w2_pos.csv"
csv15 = "C:/wajahat/hand_in_pocket/dataset/training3/tp_s2_w1_pos.csv"
csv16 = "C:/wajahat/hand_in_pocket/dataset/training3/tp_s2_w2_pos.csv"
csv17 = "C:/wajahat/hand_in_pocket/dataset/training4/tp_s1_t4_pos.csv"
csv18 = "C:/wajahat/hand_in_pocket/dataset/training4/fp_s1_t4_pos.csv"
csv19 = "C:/wajahat/hand_in_pocket/dataset/training4/tp_s2_t4_pos.csv"
csv20 = "C:/wajahat/hand_in_pocket/dataset/training4/fp_s2_t4_pos.csv"

# csv2 = "C:/wajahat/hand_in_pocket/dataset/new_dataset/new_combined_sorted_balanced2.csv"
output_csv = "C:/wajahat/hand_in_pocket/dataset/training4/itteration4_temp_norm_balanced.csv"

df1 = pd.read_csv(csv1)
df2 = pd.read_csv(csv2)
df3 = pd.read_csv(csv3)
df4 = pd.read_csv(csv4)
df5 = pd.read_csv(csv5)
df6 = pd.read_csv(csv6)
df7 = pd.read_csv(csv7)
df8 = pd.read_csv(csv8)
df9 = pd.read_csv(csv9)
df10 = pd.read_csv(csv10)
df11 = pd.read_csv(csv11)
df12 = pd.read_csv(csv12)
df13 = pd.read_csv(csv13)
df14 = pd.read_csv(csv14)
df15 = pd.read_csv(csv15)
df16 = pd.read_csv(csv16)
df17 = pd.read_csv(csv17)
df18 = pd.read_csv(csv18)
df19 = pd.read_csv(csv19)
df20 = pd.read_csv(csv20)

combined_df = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df19, df20], ignore_index=True)

new_columns_order = ["camera",	"video",	"frame",	"desk_no",	"kp_0_x_t0",	"kp_0_x_t1",	"kp_0_x_t2",	"kp_0_x_t3",	"kp_0_x_t4",	"kp_0_y_t0",	"kp_0_y_t1",
                     	"kp_0_y_t2",	"kp_0_y_t3",	"kp_0_y_t4",	"kp_1_x_t0",	"kp_1_x_t1",	"kp_1_x_t2",	"kp_1_x_t3",	"kp_1_x_t4",
                        	"kp_1_y_t0",	"kp_1_y_t1",	"kp_1_y_t2",	"kp_1_y_t3",	"kp_1_y_t4",	"kp_2_x_t0",	"kp_2_x_t1",	"kp_2_x_t2",
                            	"kp_2_x_t3",	"kp_2_x_t4",	"kp_2_y_t0",	"kp_2_y_t1",	"kp_2_y_t2",	"kp_2_y_t3",	"kp_2_y_t4",	"kp_3_x_t0",
                                "kp_3_x_t1",	"kp_3_x_t2",	"kp_3_x_t3",	"kp_3_x_t4",	"kp_3_y_t0",	"kp_3_y_t1",	"kp_3_y_t2",	"kp_3_y_t3",
                            "kp_3_y_t4",	"kp_4_x_t0",	"kp_4_x_t1",	"kp_4_x_t2",	"kp_4_x_t3",	"kp_4_x_t4",	"kp_4_y_t0",	"kp_4_y_t1",	
                        "kp_4_y_t2",	"kp_4_y_t3",	"kp_4_y_t4",	"kp_5_x_t0",	"kp_5_x_t1",	"kp_5_x_t2",	"kp_5_x_t3",	"kp_5_x_t4",	
                    "kp_5_y_t0",	"kp_5_y_t1",	"kp_5_y_t2",	"kp_5_y_t3",	"kp_5_y_t4",	"kp_6_x_t0",	"kp_6_x_t1",	"kp_6_x_t2",	
                "kp_6_x_t3",	"kp_6_x_t4",	"kp_6_y_t0",	"kp_6_y_t1",	"kp_6_y_t2",	"kp_6_y_t3",	"kp_6_y_t4",	"kp_7_x_t0",	"kp_7_x_t1",
            	"kp_7_x_t2",	"kp_7_x_t3",	"kp_7_x_t4",	"kp_7_y_t0",	"kp_7_y_t1",	"kp_7_y_t2",	"kp_7_y_t3",	"kp_7_y_t4",	"kp_8_x_t0",
                "kp_8_x_t1",	"kp_8_x_t2",	"kp_8_x_t3",	"kp_8_x_t4",	"kp_8_y_t0",	"kp_8_y_t1",	"kp_8_y_t2",	"kp_8_y_t3",	"kp_8_y_t4",	
                "kp_9_x_t0",	"kp_9_x_t1",	"kp_9_x_t2",	"kp_9_x_t3",	"kp_9_x_t4",	"kp_9_y_t0",	"kp_9_y_t1",	"kp_9_y_t2",	"kp_9_y_t3",
                    	"kp_9_y_t4",	"position_a",	"position_b",	"position_c",	"position_d",	"hand_in_pocket"]

filtered_columns = [col for col in new_columns_order if col in combined_df.columns]
combined_df = combined_df[filtered_columns]
# combined_df = combined_df.astype(int)

for col in combined_df.columns:
    if 'x' in col.lower():
        combined_df[col] = combined_df[col].astype(int) #for not normalized keypoint to convert them into numbers like 271,542
        combined_df[col] = pd.to_numeric((combined_df[col] / 1280), errors='coerce')
        combined_df[col] = combined_df[col].apply(lambda x: -1 if x == 0 else x)
        combined_df[col] = combined_df[col].round(3) # for notmalized keypoints to convert them till 3 decimal places 
    elif 'y' in col.lower():
        combined_df[col] = combined_df[col].astype(int) #for not normalized keypoint to convert them into numbers like 271,542
        combined_df[col] = pd.to_numeric((combined_df[col] / 720), errors='coerce')
        combined_df[col] = combined_df[col].apply(lambda x: -1 if x == 0 else x)
        combined_df[col] = combined_df[col].round(3) # for notmalized keypoints to convert them till 3 decimal places 
    

combined_df.to_csv(output_csv, index=False)
# combined_df.to_csv(df, index=False)
print(f"✅ Combined CSV saved to: {output_csv}")

✅ Combined CSV saved to: C:/wajahat/hand_in_pocket/dataset/training4/itteration4_temp_norm_balanced.csv


### Training Pipeline

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import os

# -------- Configuration --------
INPUT_SIZE = 64
# HIDDEN_SIZE = 128
BATCH_SIZE = 32
EPOCHS = 500
PATIENCE = 25
model_name = "mlp_v4-c0"
label_column = 'hand_in_pocket'

# -------- Load Dataset --------
df = pd.read_csv("C:/wajahat/hand_in_pocket/dataset/training4/itteration4_temp_norm_balanced.csv")
df = df.drop(columns=['camera', 'video', 'frame', 'desk_no',
                      'kp_0_x_t1', 'kp_0_x_t3', 'kp_0_y_t1', 'kp_0_y_t3',
                        'kp_1_x_t1', 'kp_1_x_t3', 'kp_1_y_t1', 'kp_1_y_t3',
                        'kp_2_x_t1', 'kp_2_x_t3', 'kp_2_y_t1', 'kp_2_y_t3',
                        'kp_3_x_t1', 'kp_3_x_t3', 'kp_3_y_t1', 'kp_3_y_t3',
                        'kp_4_x_t1', 'kp_4_x_t3', 'kp_4_y_t1', 'kp_4_y_t3',
                        'kp_5_x_t1', 'kp_5_x_t3', 'kp_5_y_t1', 'kp_5_y_t3',
                        'kp_6_x_t1', 'kp_6_x_t3', 'kp_6_y_t1', 'kp_6_y_t3',
                        'kp_7_x_t1', 'kp_7_x_t3', 'kp_7_y_t1', 'kp_7_y_t3',
                        'kp_8_x_t1', 'kp_8_x_t3', 'kp_8_y_t1', 'kp_8_y_t3',
                        'kp_9_x_t1', 'kp_9_x_t3', 'kp_9_y_t1', 'kp_9_y_t3'])

df.replace(r'^\s*$', np.nan, regex=True, inplace=True)
df = df.apply(pd.to_numeric)
df.fillna(-1, inplace=True)

X = df.drop(columns=[label_column]).values.astype(np.float32)
y = df[label_column].values.astype(np.float32)

unique_vals = np.unique(y)
print("Unique labels in dataset:", unique_vals)

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
train_dataset = TensorDataset(torch.tensor(X_train), torch.tensor(y_train))
val_dataset = TensorDataset(torch.tensor(X_val), torch.tensor(y_val))
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE)

# -------- MLP Model --------
# Classifier
class MLP(nn.Module):
    def __init__(self, input_size):
        super(MLP, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.net(x)

model = MLP(INPUT_SIZE)

# -------- Training Setup --------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
criterion = nn.BCEWithLogitsLoss() # classificiation loss funvtion
# criterion = nn.MSELoss() # regression loss function
optimizer = optim.AdamW(model.parameters(), lr=0.001, weight_decay=1e-4)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10)

# Create save directory
os.makedirs("rf_models", exist_ok=True)

# -------- Early Stopping --------
best_loss = float('inf')
epochs_no_improve = 0

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(device), batch_y.to(device).unsqueeze(1)
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        running_loss += loss.item()
    avg_train_loss = running_loss / len(train_loader)

    # Validation
    model.eval()
    val_loss = 0.0
    val_accuracy = 0.0
    with torch.no_grad():
        for val_X, val_y in val_loader:
            val_X, val_y = val_X.to(device), val_y.to(device).unsqueeze(1)
            outputs = model(val_X)
            loss = criterion(outputs, val_y)
            val_loss += loss.item()
            predictions = (torch.sigmoid(outputs) >= 0.5).float()
            val_accuracy += (predictions == val_y).float().mean().item()
    avg_val_loss = val_loss / len(val_loader)
    avg_val_accuracy = val_accuracy / len(val_loader)

    print(f"Epoch {epoch+1}/{EPOCHS} - Train Loss: {avg_train_loss:.4f} - Val Loss: {avg_val_loss:.4f} - Val Accuracy: {avg_val_accuracy:.4f}")

    # Early stopping and model saving
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'best_loss': best_loss
        }, f"rf_models/{model_name}.pt")
        epochs_no_improve = 0
    else:
        epochs_no_improve += 1
        if epochs_no_improve >= PATIENCE:
            print("Early stopping triggered.")
            break

    scheduler.step(avg_val_loss)

print(f"Training complete. Best model saved to rf_models/{model_name}.pt")

Unique labels in dataset: [0. 1.]
Epoch 1/500 - Train Loss: 0.2541 - Val Loss: 0.2183 - Val Accuracy: 0.9195
Epoch 2/500 - Train Loss: 0.2194 - Val Loss: 0.1883 - Val Accuracy: 0.9296
Epoch 3/500 - Train Loss: 0.1981 - Val Loss: 0.1758 - Val Accuracy: 0.9319
Epoch 4/500 - Train Loss: 0.1871 - Val Loss: 0.1676 - Val Accuracy: 0.9384
Epoch 5/500 - Train Loss: 0.1805 - Val Loss: 0.1582 - Val Accuracy: 0.9401
Epoch 6/500 - Train Loss: 0.1778 - Val Loss: 0.1591 - Val Accuracy: 0.9408
Epoch 7/500 - Train Loss: 0.1746 - Val Loss: 0.1525 - Val Accuracy: 0.9403
Epoch 8/500 - Train Loss: 0.1712 - Val Loss: 0.1484 - Val Accuracy: 0.9460
Epoch 9/500 - Train Loss: 0.1701 - Val Loss: 0.1518 - Val Accuracy: 0.9451
Epoch 10/500 - Train Loss: 0.1678 - Val Loss: 0.1489 - Val Accuracy: 0.9441
Epoch 11/500 - Train Loss: 0.1645 - Val Loss: 0.1456 - Val Accuracy: 0.9469
Epoch 12/500 - Train Loss: 0.1632 - Val Loss: 0.1465 - Val Accuracy: 0.9432
Epoch 13/500 - Train Loss: 0.1629 - Val Loss: 0.1438 - Val Accu