In [1]:
import json
import os
import pandas as pd
import cv2

### csv

In [None]:
with open("data/star/STAR_test.json") as f:
    data = f.readlines()[0]
    data = json.loads(data)

In [4]:
data[1]
# video_id,,,,question,answer,question_id,,choices
# video_id,frame_count,width,height,question,answer,qid,type,a0,a1,a2,a3,a4

{'question_id': 'Interaction_T1_1',
 'question': 'Which object was taken by the person?',
 'video_id': 'YSKX3',
 'start': 11.8,
 'end': 17.3,
 'choices': [{'choice_id': 0, 'choice': 'The shoe.'},
  {'choice_id': 1, 'choice': 'The cup/glass/bottle.'},
  {'choice_id': 2, 'choice': 'The pillow.'},
  {'choice_id': 3, 'choice': 'The dish.'}]}

In [None]:
def get_video_properties(video_path):
    video = cv2.VideoCapture(video_path)
    
    if not video.isOpened():
        print("Error: Could not open video.")
        return
    
    total_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    
    video.release()
    
    return total_frames, width, height

In [None]:
def get_info(sample):
    _video_id = sample["video_id"]
    _qid = sample["question_id"]
    _type = _qid.split("_")[1]
    _question = sample["question"]
    _answer = sample["answer"] if "answer" in sample.keys() else ""
    _choices = sample["choices"]
    _a0 = _choices[0]["choice"]
    _a1 = _choices[1]["choice"]
    _a2 = _choices[2]["choice"]
    _a3 = _choices[3]["choice"]
    video_dir = os.path.join("data/star/Charades_v1_480", f"{_video_id}.mp4")
    _frame_count, _width, _height = get_video_properties(video_dir)
    return _video_id, _frame_count, _width, _height, _question, _answer, _qid, _type, _a0, _a1, _a2, _a3

In [11]:
df = pd.DataFrame(columns=[["video_id","frame_count","width","height","question","answer","qid","type","a0","a1","a2","a3"]])
for i in range(len(data)):
    _sample = data[i]
    _info = get_info(_sample)
    df.loc[i] = _info

In [None]:
df.to_csv("data/star/test.csv")

# frame2time and gsub

In [None]:
import pandas as pd
video_dirs = "data/star/Charades_v1_480"
csv_file = "data/star/test.csv"
segments_file = "data/star/Video_Segments.csv"
df = pd.read_csv(csv_file)
segments = pd.read_csv(segments_file)

In [14]:
df.loc[0]

Unnamed: 0                                            0
video_id                                          YSKX3
frame_count                                         278
width                                               270
height                                              480
question       Which object was put down by the person?
answer                                              NaN
qid                                    Interaction_T1_0
type                                                 T1
a0                                            The food.
a1                                          The laptop.
a2                                            The book.
a3                                          The pillow.
Name: 0, dtype: object

In [15]:
segments[segments["question_id"] == "Interaction_T1_13"]

Unnamed: 0,question_id,video_id,start,end
12,Interaction_T1_13,6H78U,11.1,19.6


In [17]:
# 获得duration， location: dict，fps
gsub = {}

for i in range(len(df)):
    sample = df.loc[i]
    if sample["video_id"] in gsub.keys():
        # new loc
        _segments = segments[segments["question_id"] == sample["qid"]]
        gsub[sample["video_id"]]["location"][sample["qid"]] = [[_segments["start"].values[0], _segments["end"].values[0]]]
    else:
        _segments = segments[segments["question_id"] == sample["qid"]]
        video_path = f"{video_dirs}/{sample['video_id']}.mp4"
        _fps, _duration = get_video_properties_fps(video_path)
        gsub[sample["video_id"]] = {"duration": _duration, "location": {sample["qid"]: [[_segments["start"].values[0], _segments["end"].values[0]]]}, "fps": _fps}

In [None]:
with open('data/star/gsub_test.json', 'w') as json_file:
    json.dump(gsub, json_file)

In [19]:
import numpy as np
def sample_clips(total_frames, num_clips, num_frames_per_clip):
    clips = []
    frames = list(range(total_frames)) #[str(f+1).zfill(6) for f in range(total_frames)]
    for i in np.linspace(0, total_frames, num_clips + 2, dtype=np.int32)[1: num_clips + 1]:
        clip_start = int(i) - int(num_frames_per_clip / 2)
        clip_end = int(i) + int(num_frames_per_clip / 2)
        clip_start = 0 if clip_start < 0 else clip_start
        clip_end = total_frames if clip_end > total_frames else clip_end
        clip = frames[clip_start:clip_end] 
        if clip_start == 0 and len(clip) < num_frames_per_clip:
            shortage = num_frames_per_clip - (clip_end - clip_start)
            added_fids = []
            for _ in range(shortage):
                added_fids.append(frames[clip_start])
            if len(added_fids) > 0:
                clip = added_fids + clip
        if clip_end == total_frames and len(clip) < num_frames_per_clip:
            shortage = num_frames_per_clip - (clip_end - clip_start)
            added_fids = []
            for _ in range(shortage):
                added_fids.append(frames[clip_end-1])
            if len(added_fids) > 0:
                clip += added_fids
        cid = clip[len(clip)//2] #[::4] use the center frame
        clips.append(cid)
    # clips = clips[::2]
    return clips

In [None]:
def get_frames_by_timestamps(video_path, timestamps):
    video = cv2.VideoCapture(video_path)
    
    if not video.isOpened():
        print("Error: Could not open video.")
        return []
    
    fps = video.get(cv2.CAP_PROP_FPS)
    
    frames = []
    for timestamp in timestamps:
        frame_index = int(timestamp * fps)
        
        video.set(cv2.CAP_PROP_POS_FRAMES, frame_index)
        
        ret, frame = video.read()
        if ret:
            frames.append(frame)
        else:
            print(f"Warning: Could not read frame at timestamp {timestamp}s.")
    
    video.release()
    
    return frames

In [21]:
def save_frames_as_images(frames, output_prefix='frame'):
    if os.path.exists(output_prefix) is False:
        os.mkdir(output_prefix)

    for i, frame in enumerate(frames):
        filename = f"{output_prefix}/{i}.png"
        cv2.imwrite(filename, frame)

In [None]:
frame2time = {} 

for vid in gsub.keys():
    _frame_count = df[df["video_id"] == vid]["frame_count"].values[0]
    _duration = gsub[vid]["duration"]
    _fps = gsub[vid]["fps"]
    _clips = sample_clips(_frame_count, 32, 4)
    _time_clips = [round(x / _fps, 2) for x in _clips]
    v_path = f"{video_dirs}/{vid}.mp4"
    _frames = get_frames_by_timestamps(v_path, _time_clips)
    save_frames_as_images(_frames, f"data/star/frames/{vid}")
    frame2time[vid] = _time_clips
    print("\r",len(frame2time), " --- ",len(gsub), end='')

 955  ---  955

In [None]:
with open('star/frame2time_test.json', 'w') as json_file:
    json.dump(frame2time, json_file)