In [1]:
%load_ext autoreload
%autoreload 2
import os
import numpy as np
import glob
import pandas as pd
from tqdm import tqdm
from ageself.annotate_videos_functions import VideoDataset
import cv2


In [2]:
# This can be used to use the to improve model training by creating new images that can be downstream classified for next training itteration.
# In general I would rather suggest to use the model from classification (age, gender) to apply this for detection and therefore having not this two step approach where we first cut out the faces and then do classification. but 
# rather to all of them in one step

base_path_videos = "/usr/users/vhassle/datasets/Wortschatzinsel/all_videos"
annotation_pahts = sorted(glob.glob("/usr/users/vhassle/model_outputs/outputs_AgeSelf/age_gender_classification_model_final/*002.txt"))
video_paths = sorted([os.path.join(base_path_videos, os.path.basename(annotation_paht).replace(".txt", ".mp4").replace("_r002", "")) for annotation_paht in annotation_pahts])

In [None]:
annotation_table_all = pd.DataFrame()
for annotation_path, video_path in zip(annotation_pahts, video_paths):
    try:
        annotation_table = pd.read_csv(annotation_path, sep=",", header=None)
    except:
        print("Error reading", annotation_path, "probably empty")
        continue
    annotation_table["video_path"] = video_path
    annotation_table_all = pd.concat([annotation_table_all, annotation_table])


In [None]:
annotation_table_all.columns = ["frame", "face_nr", "x", "y", "w", "h","","","","","age","gender","video_path"]
subset_annotation_table = annotation_table_all[(annotation_table_all["w"] * annotation_table_all["h"]) > 4900]
print(subset_annotation_table.shape)
anotation_subsample =  subset_annotation_table.sample(n=1000, random_state=42)
print(anotation_subsample.shape)

In [None]:
sampled_video_paths = anotation_subsample["video_path"].unique()
annotations_image_crops = []

output_dir = "/usr/users/vhassle/datasets/Wortschatzinsel/face_crops"
os.makedirs(output_dir, exist_ok=True)

for sampled_video_path in tqdm(sampled_video_paths):
    video_dataset = VideoDataset(sampled_video_path)
    anotation_subsamples_video = anotation_subsample[anotation_subsample["video_path"] == sampled_video_path]

    # Process each annotation in the video
    for annotation_subsample_video in anotation_subsamples_video.iterrows():
        frame = annotation_subsample_video[1]["frame"]
        x = annotation_subsample_video[1]["x"]
        y = annotation_subsample_video[1]["y"]
        w = annotation_subsample_video[1]["w"]
        h = annotation_subsample_video[1]["h"]
        age = annotation_subsample_video[1]["age"]
        gender = annotation_subsample_video[1]["gender"]
        
        # Cut out face from the frame
        face = video_dataset[frame][y:y+h, x:x+w]
        
        # Save face image with a unique name
        image_name = f"face_{sampled_video_path.split('/')[-1].split('.')[0]}_frame{frame}_x{x}_y{y}_w{w}_h{h}.jpg"
        image_path = os.path.join("Wortschatzinsel/face_crops", image_name)
        save_image_path = os.path.join(output_dir, image_name)
        face = cv2.cvtColor(face, cv2.COLOR_BGR2RGB) #it is the other way but leads to the same result
        cv2.imwrite(save_image_path, face)
        
        # Append annotation with image name and folder, age, and gender
        annotations_image_crops.append({
            "image_path": image_path,
            "age": age,
            "gender": gender
        })

# Save annotations to a text file
annotations_file_path = os.path.join(output_dir, "wortschatz_faces.txt")
with open(annotations_file_path, "w") as f:
    for annotation in annotations_image_crops:
        f.write(f"{annotation['image_path']},{annotation['age']},{annotation['gender']}\n")
