In [1]:
%%capture
!pip install -q \
  numpy==1.26.4 \
  scipy==1.11.4 \
  scikit-learn==1.3.2 \
  pandas==2.1.4 \
  matplotlib==3.8.2 \
  opencv-python==4.8.1.78 \
  imageio


!pip install -q torch accelerate==0.33.0
!pip install imageio-ffmpeg


!pip install -q git+https://github.com/huggingface/diffusers.git

!pip install -q \
  transformers==4.44.2 \
  sentencepiece



!git clone https://github.com/google-deepmind/physics-IQ-benchmark.git

!pip install -q --no-deps -r physics-IQ-benchmark/requirements.txt


In [2]:
import torch
from diffusers import CogVideoXImageToVideoPipeline,DiffusionPipeline
from diffusers.utils import export_to_video
from PIL import Image
from tqdm import tqdm
import pandas as pd
import glob
import os
import gc

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
os.chdir("/teamspace/studios/this_studio/physics-IQ-benchmark")
!python code/download_physics_iq_data.py
os.chdir("/teamspace/studios/this_studio")

Syncing gs://physics-iq-benchmark/full-videos/take-1/30FPS → ./physics-IQ-benchmark/full-videos/take-1/30FPS using gsutil rsync...
Building synchronization state...
Starting synchronization...
Copying gs://physics-iq-benchmark/full-videos/take-1/30FPS/0008_full-videos_30FPS_perspective-center_take-1_trimmed-ball-hits-duck.mp4...
Copying gs://physics-iq-benchmark/full-videos/take-1/30FPS/0004_full-videos_30FPS_perspective-left_take-1_trimmed-ball-behind-rotating-paper.mp4...
Copying gs://physics-iq-benchmark/full-videos/take-1/30FPS/0009_full-videos_30FPS_perspective-right_take-1_trimmed-ball-hits-duck.mp4...
Copying gs://physics-iq-benchmark/full-videos/take-1/30FPS/0010_full-videos_30FPS_perspective-left_take-1_trimmed-ball-hits-nothing.mp4...
Copying gs://physics-iq-benchmark/full-videos/take-1/30FPS/0014_full-videos_30FPS_perspective-center_take-1_trimmed-ball-in-basket.mp4...
Copying gs://physics-iq-benchmark/full-videos/take-1/30FPS/0016_full-videos_30FPS_perspective-left_take-1_t

In [4]:
gc.collect()
torch.cuda.empty_cache()

model_id = "THUDM/CogVideoX-5b-I2V"
print("Loading model... this may take a few minutes.")
pipe = CogVideoXImageToVideoPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16
).to("cuda")

pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_tiling()

print("Model loaded successfully!")

Loading model... this may take a few minutes.


Fetching 17 files: 100%|██████████| 17/17 [00:16<00:00,  1.04it/s]
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00, 59.22it/s]it/s]
Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  6.83it/s]it/s]
Loading pipeline components...: 100%|██████████| 5/5 [00:01<00:00,  3.87it/s]


Model loaded successfully!


In [5]:
import torch

def count_params(module):
    return sum(p.numel() for p in module.parameters())

def count_trainable_params(module):
    return sum(p.numel() for p in module.parameters() if p.requires_grad)

total = 0
trainable = 0

for name, component in pipe.components.items():
    if hasattr(component, "parameters"):
        p = count_params(component)
        tp = count_trainable_params(component)
        total += p
        trainable += tp
        print(f"{name:15s} | params: {p/1e6:.2f}M | trainable: {tp/1e6:.2f}M")

print("-" * 60)
print(f"TOTAL params     : {total/1e6:.2f}M")
print(f"Trainable params : {trainable/1e6:.2f}M")


text_encoder    | params: 4762.31M | trainable: 4762.31M
vae             | params: 215.58M | trainable: 215.58M
transformer     | params: 5570.48M | trainable: 5570.48M
------------------------------------------------------------
TOTAL params     : 10548.37M
Trainable params : 10548.37M


In [6]:
df = pd.read_csv('/teamspace/studios/this_studio/physics-IQ-benchmark/descriptions/descriptions.csv')
df.columns

Index(['scenario', 'description', 'category', 'generated_video_name'], dtype='object')

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 396 entries, 0 to 395
Data columns (total 4 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   scenario              396 non-null    object
 1   description           396 non-null    object
 2   category              396 non-null    object
 3   generated_video_name  396 non-null    object
dtypes: object(4)
memory usage: 12.5+ KB


In [8]:
# switch_frames_dir = "/teamspace/studios/this_studio/physics-IQ-benchmark/physics-IQ-benchmark/switch-frames"
# output_dir = "/teamspace/studios/this_studio/evaluated_videos"
# os.makedirs(output_dir, exist_ok=True)

# switch_frames = sorted(glob.glob(os.path.join(switch_frames_dir, "*.jpg")))
# print(f"Found {len(switch_frames)} test cases.")


# system_prompt = """Generate a physically accurate video continuation that follows real-world physics laws. 
# Ensure proper object motion, gravity effects, collision dynamics, and momentum conservation. 
# The following description provides the physical scenario and expected behavior: """

# i = 0
# for frame_path in tqdm(switch_frames):
#     filename = os.path.basename(frame_path)
    
#     video_id = filename.split("_")[0]
    
#     output_filename = filename.replace("switch-frames_anyFPS", "generated").replace(".jpg", ".mp4")
#     output_path = os.path.join(output_dir, output_filename)
    
#     if os.path.exists(output_path):
#         continue
    
#     image = Image.open(frame_path).convert("RGB")
    
#     desc = df['description'][i]
#     prompt = system_prompt + " " + desc
    
#     frames = pipe(
#         prompt=prompt,
#         image=image,                      
#         num_videos_per_prompt=1,
#         num_inference_steps=60,                        
#         num_frames=49,    
#         guidance_scale=6.0,                  
#         generator=torch.Generator("cuda").manual_seed(42),              
#     ).frames[0]
    
#     export_to_video(frames, output_path, fps=8)
#     i += 1

# print(f"Generation complete. Videos saved to {output_dir}")

In [10]:
import subprocess
import os

input_dir = "/teamspace/studios/this_studio/evaluated_videos"
output_dir = "/teamspace/studios/this_studio/5"
os.makedirs(output_dir, exist_ok=True)

for f in os.listdir(input_dir):
    if f.endswith(".mp4"):
        subprocess.run([
            "ffmpeg", "-y",
            "-i", os.path.join(input_dir, f),
            "-t", "5",
            "-c", "copy",
            os.path.join(output_dir, f)
        ])

ffmpeg version 7.1 Copyright (c) 2000-2024 the FFmpeg developers
  built with gcc 13.3.0 (conda-forge gcc 13.3.0-1)
  configuration: --prefix=/home/zeus/miniconda3/envs/cloudspace --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1732155191655/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1732155191655/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1732155191655/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1732155191655/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --enable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libharfbuzz --enable-libfontconfig --enable-libopenh264 --enable-libdav1d --disable-gnutls --enable-libmp3lame --enable-libvpx --enable-libass --enable-pthreads --enable-vaapi --enable-libopenvino --enable-gpl --enable-libx264 --enable-libx265 --enable-libaom --enable-

In [12]:
generated_videos_dir = "/teamspace/studios/this_studio/evaluated_videos"
output_score_dir = "/teamspace/studios/this_studio/scores"
descriptions_path = "/teamspace/studios/this_studio/physics-IQ-benchmark/descriptions/descriptions.csv"

!python3 /teamspace/studios/this_studio/physics-IQ-benchmark/code/run_physics_iq.py \
    --input_folders {generated_videos_dir} \
    --output_folder {output_score_dir} \
    --descriptions_file {descriptions_path}

import glob
csv_files = glob.glob(os.path.join(output_score_dir, "*.csv"))
for f in csv_files:
    print(f"Results in: {f}")
    with open(f, 'r') as file:
         print(file.read())


Processing folder: /teamspace/studios/this_studio/evaluated_videos
All videos in /teamspace/studios/this_studio/evaluated_videos have FPS: 8.0
Validating real videos at FPS 8...
Validation successful for /teamspace/studios/this_studio/physics-IQ-benchmark/physics-IQ-benchmark/split-videos/testing/30FPS.
Real videos for FPS 8 folder is valid with all required files.
Real videos at FPS 8 are complete and ready at /teamspace/studios/this_studio/physics-IQ-benchmark/physics-IQ-benchmark/split-videos/testing/8FPS.
Validation successful for /teamspace/studios/this_studio/physics-IQ-benchmark/physics-IQ-benchmark/split-videos/testing/8FPS.
Binary masks for real videos at FPS 8 folder is valid with all required files.
Binary masks for real videos are ready at ./physics-IQ-benchmark/physics-IQ-benchmark/video-masks/real/8FPS.
Validation successful for /teamspace/studios/this_studio/evaluated_videos.
Binary masks for generated videos do not exist. Creating...
Generating mask for: 0133_perspecti