This script automates the process of converting game event logs into a structured JSON file suitable for training Video-LLaMA2, providing a detailed annotation for each video from event descriptions and timestamps.

In [3]:
import pandas as pd
import json
import re

# Dictionary mapping game events to descriptions
event_descriptions = {
    "HIT": "The character is attacking an enemy (e.g., sword lunge, air kick, shuriken throw).",
    "JUMP": "The character is jumping (can lead to a double-jump if the character is already in the air).",
    "Kill": "An enemy is defeated by the character's attack.",
    "DOWN": "The character is crouching (if on the ground) or moving down (if in the air).",
    "RIGHT": "The character is moving to the right.",
    "LEFT": "The character is moving to the left.",
    "UP": "The character is moving up.",
    "HealthGain": "The character is regaining health bars.",
    "HealthLoss": "The character is losing health bars following an injury."
    # TODO add contextual details (e.g., completing a mission, defeating a boss, etc.)
}

# Import the cleaned event file
df = pd.read_csv('../data/datasets/combined_events_level-1_clean_outset_vid.csv')     # <-- Change this to the path of your file

# Create a temporary dataframe with rows where 'trial_type' is equal to an event (e.g., 'HIT', 'JUMP', etc.)
df_event = df[df.trial_type != 'frame']

# Specify the single video file and corresponding session, run, repetition
video_file = '../output/videos/videos_full/sub-01_ses-003_task-shinobi_run-01_level-1_rep-01.mp4'      # <-- Change this to the path of your file

# Extract session, run, and repetition from the file name using regex
pattern = r'ses-(\d+)_task-[a-zA-Z0-9]+_run-(\d+)_level-\d+_rep-(\d+)'
match = re.search(pattern, video_file)

if match:
    session = f"ses-{match.group(1)}"
    run = f"run-{match.group(2)}"
    repetition = f"rep-{match.group(3)}"
else:
    raise ValueError("Filename does not match expected pattern")

# Filter the DataFrame based on session, run, and repetition
df_filtered = df_event[(df_event['session'] == session) &
                       (df_event['run'] == run) &
                       (df_event['repetition'] == repetition)]

# Function to create conversations based on events
def create_conversations(df_group):
    conversations = []
    for idx, row in df_group.iterrows():
        event_description = event_descriptions.get(row['trial_type'], "No description available.")
        conversations.append({
            "from": "human",
            "value": f"<video> [{row['onset_vid']}] What is happening in the video at that time?"
        })
        conversations.append({
            "from": "gpt",
            "value": event_description
        })
    return conversations

# Create the JSON structure using the filtered DataFrame
json_data = []
conversations = create_conversations(df_filtered)
entry = {
    "id": 0,
    "video": f"{video_file}",
    "conversations": conversations
}
json_data.append(entry)

# Write to a JSON file
output_file = '../data/json_files/custom.json'
with open(output_file, 'w') as f:
    json.dump(json_data, f, indent=4)