In [3]:
# not useful
import os
import json
import cv2
from tqdm import tqdm
from collections import defaultdict

# Define paths to your dataset
base_dir = '/Users/kai/GSR/data/SoccerNetGS'
output_dir = '/Users/kai/GSR/data/SoccerNetGS/converted'

# Directories for train, val, and test sets
splits = ['valid']

# Ensure output directories exist
for split in splits:
    os.makedirs(os.path.join(output_dir, split), exist_ok=True)

class TrackIDManager:
    def __init__(self):
        # Dictionary to store unique identifiers for each track_id
        self.track_id_registry = {}
        # Counter for generating new unique track IDs
        self.next_unique_id = 1

    def get_or_create_unique_track_id(self, original_track_id, jersey_number):
        """
        Manages track ID assignment with strict rules:
        1. If a track_id is seen for the first time, create a new unique ID
        2. If the track_id was previously seen, return its existing unique ID
        3. If the track_id was previously seen with a different jersey number, create a new unique ID
        
        :param original_track_id: Original track ID from the annotation
        :param jersey_number: Jersey number of the player
        :return: A unique track ID
        """
        key = (original_track_id, jersey_number)
        
        if key not in self.track_id_registry:
            # First time seeing this track_id and jersey number combination
            unique_id = self.next_unique_id
            self.track_id_registry[key] = unique_id
            self.next_unique_id += 1
            return unique_id
        
        return self.track_id_registry[key]

def extract_player_images(image_path, annotations, tracklet_output_path, track_id_manager):
    """
    Enhanced function to extract player images with persistent track IDs
    
    :param image_path: Path to the source image
    :param annotations: List of annotations for the image
    :param tracklet_output_path: Output path for saving player images
    :param track_id_manager: TrackIDManager instance to manage track IDs
    :return: List of saved player image details
    """
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: Could not load image: {image_path}")
        return []

    saved_files = []
    for i, ann in enumerate(tqdm(annotations, desc=f"Extracting player images from {os.path.basename(image_path)}", leave=False)):
        if 'bbox_image' not in ann or 'attributes' not in ann or 'jersey' not in ann['attributes']:
            continue

        # Extract bounding box and jersey number
        bbox = ann['bbox_image']
        x, y, w, h = bbox['x'], bbox['y'], bbox['w'], bbox['h']
        jersey_number = ann['attributes']['jersey']
        
        # Get the track ID from the annotation (if available)
        track_id = ann.get('track_id', i)
        
        # Get a unique track ID using our custom manager
        unique_track_id = track_id_manager.get_or_create_unique_track_id(track_id, jersey_number)

        # Ensure bounding box is within image dimensions
        x = max(0, int(x))
        y = max(0, int(y))
        w = int(w)
        h = int(h)

        # Crop the player image
        player_img = image[y:y+h, x:x+w]
        
        if player_img.size == 0:
            print(f"Empty crop for image: {image_path}, bbox: {bbox}")
            continue

        # Generate a unique name for the player image
        base_image_name = os.path.splitext(os.path.basename(image_path))[0]
        player_filename = f"{base_image_name}_player_{unique_track_id}.jpg"
        player_path = os.path.join(tracklet_output_path, player_filename)

        # Save the cropped image
        cv2.imwrite(player_path, player_img)
        saved_files.append((player_path, str(jersey_number), unique_track_id))

    return saved_files

# Main script to convert dataset
all_annotations = {split: [] for split in splits}

for split in splits:
    print(f"Processing {split} set...")

    split_input_path = os.path.join(base_dir, split)
    split_output_path = os.path.join(output_dir, split)
    os.makedirs(split_output_path, exist_ok=True)

    # Walk through each SNGS-xxx folder in the split
    for tracklet_folder in tqdm(os.listdir(split_input_path), desc=f"Processing tracklets in {split}"):
        tracklet_path = os.path.join(split_input_path, tracklet_folder)
        if not os.path.isdir(tracklet_path):
            continue

        # Create subfolder for the current tracklet under the output path
        tracklet_output_path = os.path.join(split_output_path, tracklet_folder)
        os.makedirs(tracklet_output_path, exist_ok=True)

        # Initialize track ID manager for this tracklet sequence
        track_id_manager = TrackIDManager()

        # Check if the metadata.json file exists
        metadata_path = os.path.join(tracklet_path, 'Labels-GameState.json')
        if not os.path.exists(metadata_path):
            print(f"Metadata not found for {tracklet_path}")
            continue

        # Load the metadata.json file
        with open(metadata_path, 'r') as f:
            metadata = json.load(f)

        if 'images' not in metadata or 'annotations' not in metadata:
            print(f"Invalid metadata format for {tracklet_path}")
            continue

        images = metadata['images']
        annotations = metadata['annotations']

        # Create a mapping from image_id to annotations
        image_to_annotations = {}
        for ann in annotations:
            image_id = ann['image_id']
            if image_id not in image_to_annotations:
                image_to_annotations[image_id] = []
            image_to_annotations[image_id].append(ann)

        # Path to the img1 folder
        img1_path = os.path.join(tracklet_path, 'img1')

        # Process each image in the metadata
        for img in tqdm(images, desc=f"Processing images in {tracklet_folder}", leave=False):
            if not img['is_labeled'] or 'file_name' not in img:
                continue

            image_id = img['image_id']
            image_filename = img['file_name']
            image_path = os.path.join(img1_path, image_filename)

            # Get all annotations corresponding to this image
            annotations_for_image = image_to_annotations.get(image_id, [])
            
            if len(annotations_for_image) == 0:
                continue

            # Extract player images from the frame
            player_images = extract_player_images(image_path, annotations_for_image, tracklet_output_path, track_id_manager)

            # Add player images to the annotation file
            for player_img_path, jersey_number, unique_track_id in player_images:
                relative_path = os.path.relpath(player_img_path, output_dir)
                all_annotations[split].append(f"{relative_path} {jersey_number} {unique_track_id}")

# Write train.txt, val.txt, and test.txt annotation files
for split in splits:
    annotation_file_path = os.path.join(output_dir, f'{split}.txt')
    with open(annotation_file_path, 'w') as f:
        f.writelines(f"{line}\n" for line in all_annotations[split])

print(f"Dataset conversion completed. Annotations are stored in {output_dir}.")

Processing train set...


Processing tracklets in train:   0%|          | 0/58 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[

Dataset conversion completed. Annotations are stored in /Users/kai/GSR/data/SoccerNetGS/converted.


In [12]:
import os
import shutil
from collections import defaultdict

def organize_player_images(source_folder, target_player_id):
    """
    Organize images for a specific player into a new folder.
    
    :param source_folder: Path to the source folder containing images
    :param target_player_id: Player ID to organize
    """
    # Dictionary to store frame-player image mappings
    frame_player_images = defaultdict(dict)
    
    # First pass: Collect all image paths
    for filename in os.listdir(source_folder):
        if filename.endswith('.jpg'):
            # Split filename
            parts = filename.split('_')
            frame_number = parts[0]
            try:
                player_id = parts[2].split('.')[0]
            except IndexError:
                print(f"Skipping malformed filename: {filename}")
                continue
            
            # Store image path if it matches the target player
            if player_id == str(target_player_id):
                frame_player_images[frame_number][player_id] = filename
    
    # Create target folder for the player
    target_folder = os.path.join(source_folder, str(target_player_id))
    os.makedirs(target_folder, exist_ok=True)
    
    # Sort frames to ensure correct order
    sorted_frames = sorted(frame_player_images.keys(), key=int)
    
    # Copy and rename images
    for new_index, frame in enumerate(sorted_frames):
        # Get the filename for this frame and player
        filename = frame_player_images[frame][str(target_player_id)]
        
        # New filename format: 1_0.jpg, 1_1.jpg, etc.
        new_filename = f"{target_player_id}_{new_index}.jpg"
        
        # Full source and destination paths
        src_path = os.path.join(source_folder, filename)
        dst_path = os.path.join(target_folder, new_filename)
        
        # Copy the file
        shutil.copy2(src_path, dst_path)
    
    print(f"Organized {len(sorted_frames)} images for player {target_player_id}")

# Example usage
source_folder = '/Users/kai/GSR/data/SoccerNetGS/converted/train/SNGS-060/'
target_player_id = 11

organize_player_images(source_folder, target_player_id)

Organized 548 images for player 11


ybys

In [74]:
import os
import pandas as pd
import re

# Paths to the CSV files
bbox_csv_path = '/Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection.csv'
role_csv_path = '/Users/kai/GSR/soccernet/model_detections/PRTReId_detection.csv'

# **Step 1: Load the PRTReId_detection.csv**
role_df = pd.read_csv(role_csv_path)
role_df = role_df.loc[:, ~role_df.columns.str.contains('^Unnamed')]  # Remove "Unnamed" columns

# Round bbox values to avoid floating point mismatches
role_df['bbox_ltwh'] = role_df['bbox_ltwh'].apply(
    lambda x: str([round(float(n), 2) for n in re.findall(r"[-+]?\d*\.\d+|\d+", x)])
)

# Filter non-players (where role_detection is NOT "player")
non_player_bboxes = role_df[role_df['role_detection'] != 'player'][['image_id', 'bbox_ltwh']]

print(f"Identified {len(non_player_bboxes)} non-player bboxes to be removed.")

# **Step 2: Load the BPBReIDStrongSORT_detection.csv**
bbox_df = pd.read_csv(bbox_csv_path)
bbox_df = bbox_df.loc[:, ~bbox_df.columns.str.contains('^Unnamed')]  # Remove "Unnamed" columns

# Round bbox values in the same way as role_df
bbox_df['bbox_ltwh'] = bbox_df['bbox_ltwh'].apply(
    lambda x: str([round(float(n), 2) for n in re.findall(r"[-+]?\d*\.\d+|\d+", x)])
)

# **Step 3: Identify track_ids to remove**
# Merge non-player bboxes with BPBReIDStrongSORT to identify track_ids to remove
merged_df = pd.merge(
    bbox_df[['image_id', 'bbox_ltwh', 'track_id']],
    non_player_bboxes,
    on=['image_id', 'bbox_ltwh'],
    how='inner'
)

# Get the list of track_ids to remove
track_ids_to_remove = merged_df['track_id'].unique()
print(f"Removing {len(track_ids_to_remove)} track_ids: {track_ids_to_remove}")

# **Step 4: Remove all track_ids from BPBReIDStrongSORT**
cleaned_bbox_df = bbox_df[~bbox_df['track_id'].isin(track_ids_to_remove)]

# **Step 5: Remove "Unnamed" column and save the cleaned DataFrame**
cleaned_bbox_df = cleaned_bbox_df.loc[:, ~cleaned_bbox_df.columns.str.contains('^Unnamed')]  # Remove "Unnamed" columns
cleaned_csv_path = '/Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned.csv'
cleaned_bbox_df.to_csv(cleaned_csv_path, index=False)
print(f"Cleaned file saved to: {cleaned_csv_path}")


Identified 20 non-player bboxes to be removed.
Removing 1 track_ids: [4.]
Cleaned file saved to: /Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned.csv


In [83]:
import os
import cv2
import pandas as pd
from tqdm import tqdm
from collections import defaultdict
import re

# Fixed path to the CSV file with bbox and track_id information
csv_path = '/Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned.csv'

# Define paths to your dataset
base_dir = '/Users/kai/GSR/data/SoccerNetGS'
output_dir = '/Users/kai/GSR/data/SoccerNetGS/converted'

# Directories for validation set
split = 'valid'
img1_path = os.path.join(base_dir, split, 'SNGS-021', 'img1')
split_output_path = os.path.join(output_dir, split, 'SNGS-021')

# Ensure output directory exists
os.makedirs(split_output_path, exist_ok=True)


class TrackIDManager:
    def __init__(self):
        self.track_id_registry = {}
        self.next_unique_id = 1

    def get_or_create_unique_track_id(self, original_track_id):
        if original_track_id not in self.track_id_registry:
            self.track_id_registry[original_track_id] = self.next_unique_id
            self.next_unique_id += 1
        return self.track_id_registry[original_track_id]


def extract_player_images(image_path, frame_annotations, tracklet_output_path, track_id_manager):
    """ Extracts player images from the frame based on bbox, track_id, and image_id. """
    image = cv2.imread(image_path)
    if image is None:
        print(f"Warning: Could not load image: {image_path}")
        return []

    saved_files = []
    for i, ann in frame_annotations.iterrows():
        # Extract bbox in ltwh format and track ID from the CSV file
        # bbox = eval(ann['bbox_ltwh'])  # Convert string to tuple (left, top, width, height)
        bbox = re.findall(r"[-+]?\d*\.\d+|\d+", ann['bbox_ltwh'])  # Extracts all numeric values
        bbox = [float(val) for val in bbox]  # Convert to float
        x1 = int(bbox[0])
        y1 = int(bbox[1])
        x2 = x1 + int(bbox[2])  # x + width
        y2 = y1 + int(bbox[3])  # y + height
        track_id = ann['track_id']
        
        # Get a unique track ID using our custom manager
        unique_track_id = track_id_manager.get_or_create_unique_track_id(track_id)

        # Ensure bounding box is within image dimensions
        x1 = max(0, x1)
        y1 = max(0, y1)
        x2 = min(image.shape[1], x2)
        y2 = min(image.shape[0], y2)

        # Crop the player image
        player_img = image[y1:y2, x1:x2]
        
        if player_img.size == 0:
            print(f"Empty crop for image: {image_path}, bbox: ({x1}, {y1}, {x2}, {y2})")
            continue

        # Generate a unique name for the player image
        base_image_name = os.path.splitext(os.path.basename(image_path))[0]
        player_output_folder = os.path.join(tracklet_output_path, str(int(track_id)))

        # player_output_folder = os.path.join(tracklet_output_path, f"{unique_track_id}")
        os.makedirs(player_output_folder, exist_ok=True)

        # player_filename = f"{unique_track_id-1}_{int(base_image_name)-1}.jpg"
        player_filename = f"{int(base_image_name)-1}.jpg"

        player_path = os.path.join(player_output_folder, player_filename)

        # Save the cropped image
        cv2.imwrite(player_path, player_img)
        saved_files.append((player_path, unique_track_id))

    return saved_files


# Main script to convert dataset
all_annotations = []

# Initialize track ID manager
track_id_manager = TrackIDManager()

# Load the CSV file with bbox and track_id information
if not os.path.exists(csv_path):
    raise FileNotFoundError(f"CSV file not found at {csv_path}")
    
# Load CSV file
df = pd.read_csv(csv_path)
print(f"Loaded {len(df)} annotations from {csv_path}")
df['image_id'] = df['image_id'].astype(str).str[-6:].astype(str)

# Group the annotations by image_id
grouped_annotations = df.groupby('image_id')

# Process each image in the img1 folder
for img_filename in tqdm(os.listdir(img1_path), desc=f"Processing images in {split}", leave=False):
    if not img_filename.endswith(('.jpg', '.png')):
        continue
    
    # Extract frame id from the image filename (assumes filenames like 000001.jpg)
    image_id = os.path.splitext(img_filename)[0]
    image_path = os.path.join(img1_path, img_filename)

    # Get all annotations corresponding to this image_id
    if image_id not in df['image_id'].values:
        continue
    
    frame_annotations = grouped_annotations.get_group(image_id)
    # print(img_filename)
    
    # Extract player images from the frame
    player_images = extract_player_images(image_path, frame_annotations, split_output_path, track_id_manager)

    # Add player images to the annotation file
    for player_img_path, unique_track_id in player_images:
        relative_path = os.path.relpath(player_img_path, output_dir)
        all_annotations.append(f"{relative_path} {unique_track_id}")

# Write annotation file
annotation_file_path = os.path.join(output_dir, 'valid.txt')
with open(annotation_file_path, 'w') as f:
    f.writelines(f"{line}\n" for line in all_annotations)

print(f"Dataset conversion completed. Annotations are stored in {output_dir}.")

Loaded 196 annotations from /Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned.csv


                                                                               

Dataset conversion completed. Annotations are stored in /Users/kai/GSR/data/SoccerNetGS/converted.




In [76]:
import os
import pandas as pd
import json

# Paths to input and output files
cleaned_csv_path = '/Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned.csv'
jersey_number_json_path = '/Users/kai/GSR/soccernet/jersey-number-pipeline/out/SoccerNetResults/challenge_final_results.json'
output_csv_path = '/Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned_with_jersey.csv'

# **Step 1: Load the cleaned CSV**
print(f"Loading cleaned CSV from: {cleaned_csv_path}")
df = pd.read_csv(cleaned_csv_path)

# Remove "Unnamed" columns if they exist
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
print(f"Loaded DataFrame with {df.shape[0]} rows and {df.shape[1]} columns.")

# **Step 2: Load the jersey number results from the JSON**
print(f"Loading jersey number results from: {jersey_number_json_path}")
with open(jersey_number_json_path, 'r') as f:
    jersey_number_results = json.load(f)

# Remove unnecessary keys from the JSON (like "imgs")
if 'imgs' in jersey_number_results:
    del jersey_number_results['imgs']

# Convert tracklet keys to integers and detected jersey numbers to strings
jersey_number_results = {int(k): str(v) if v != -1 else '-1' for k, v in jersey_number_results.items()}
print(f"Loaded jersey number results for {len(jersey_number_results)} tracklets.")

# **Step 3: Add the "jn_tracklet" column**
# The tracklet for a given track_id is (track_id - 1)
df['tracklet'] = df['track_id'] - 1

# Map the tracklet to its corresponding jersey number using the JSON results
df['jn_tracklet'] = df['tracklet'].map(jersey_number_results)

# Drop the temporary "tracklet" column
df = df.drop(columns=['tracklet'])

# **Step 4: Save the updated DataFrame**
df.to_csv(output_csv_path, index=False)
print(f"Saved updated CSV with jersey numbers to: {output_csv_path}")


Loading cleaned CSV from: /Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned.csv
Loaded DataFrame with 196 rows and 19 columns.
Loading jersey number results from: /Users/kai/GSR/soccernet/jersey-number-pipeline/out/SoccerNetResults/challenge_final_results.json
Loaded jersey number results for 11 tracklets.
Saved updated CSV with jersey numbers to: /Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned_with_jersey.csv


In [120]:
import math
output_csv_path = '/Users/kai/GSR/soccernet/model_detections/NBJW_Calib_detection_cleaned.csv'
csv = '/Users/kai/GSR/soccernet/model_detections/MMOCR_detection.csv'
df = pd.read_csv(output_csv_path)

print(f"Loading cleaned CSV from: {cleaned_csv_path}")
df = pd.read_csv(cleaned_csv_path)
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
print(f"Loading jersey number results from: {jersey_number_json_path}")
with open(jersey_number_json_path, 'r') as f:
        jersey_number_results = json.load(f)
if 'imgs' in jersey_number_results:
        del jersey_number_results['imgs']

jersey_number_results = {
        int(k): int(v) if v != -1 else -1
        for k, v in jersey_number_results.items()
    }
df['tracklet'] = df['track_id'] - 1
df['jersey_number_detection'] = df['tracklet'].map(jersey_number_results)

for i in range(len(df["jersey_number_detection"])):
        if math.isnan(df["jersey_number_detection"][i]):
            df.loc[i, "jersey_number_confidence"] = 0.0
        elif df["jersey_number_detection"][i] == -1.0:
            df.loc[i, "jersey_number_confidence"] = 0.0
            df.loc[i, "jersey_number_detection"] = float('nan')
        else:
            df.loc[i, "jersey_number_confidence"] = 1.0
# df['jersey_number_confidence'] = df['jersey_number_detection'].apply(
#     lambda x: 1.0 if x != -1 else 0.0
# )

# for i in range(len(df["jersey_number_detection"])):
#         if math.isnan(df["jersey_number_detection"][i]):
#             df.loc[i, "jersey_number_detection"] = math.nan
df = df.drop(columns=['tracklet'])

df.to_csv("/Users/kai/GSR/soccernet/test.csv", index=False)

Loading cleaned CSV from: /Users/kai/GSR/soccernet/model_detections/BPBReIDStrongSORT_detection_cleaned.csv
Loading jersey number results from: /Users/kai/GSR/soccernet/jersey-number-pipeline/out/SoccerNetResults/challenge_final_results.json


In [151]:
import numpy as np

detections = pd.read_csv("/Users/kai/GSR/soccernet/model_detections/PRTReId_detection.csv")
# detections = pd.read_csv("/Users/kai/GSR/soccernet/debug.csv")

# player_detections = detections[detections.role == "player"]

embeddings_list = []
for track_id, group in detections.groupby("track_id"):
    if np.isnan(track_id):
            continue
    # numeric_embeddings = [np.array(emb, dtype=np.float32) for emb in group.embeddings.values]
    embeddings = np.mean(np.vstack(group.embeddings.values), axis=0)

    print(f"g: {(track_id)}")
   
    try:
        embeddings = np.mean(np.vstack(group.embeddings.values), axis=0)
        embeddings_list.append({'track_id': track_id, 'embeddings': embeddings})
    except Exception as e:
        print(f"Hi {(track_id)}")
        
        # print(f"g: {(track_id)}")
            # valid_embeddings = [e for e in group.embeddings.values if isinstance(e, np.ndarray) and e.size > 0 and np.issubdtype(e.dtype, np.number)]
            # if valid_embeddings:
            #     embeddings_array = np.vstack(valid_embeddings)
            #     embeddings = np.mean(embeddings_array, axis=0)
            # else:
            #     # Dynamically determine the embedding size
            #     desired_embedding_size = valid_embeddings[0].shape[0] if valid_embeddings else 128  # Default to 128 if no embeddings exist
            #     embeddings = np.zeros((desired_embedding_size,))


KeyError: 'track_id'

In [None]:
FOR each tracklet1 IN tracklets:
    FOR each tracklet2 IN tracklets:
        IF tracklet1 == tracklet2 THEN CONTINUE
        iou = compute_iou(tracklet1, tracklet2)
        IF iou > IOU_THRESHOLD:
            MERGE tracklet1 and tracklet2
