In [None]:
import sys
from pathlib import Path

import numpy as np
import torch

from torch.utils.data import DataLoader
from torchvision.transforms import Compose
from einops.layers.torch import Rearrange

In [None]:
# Check if GPU is available
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [None]:
# Downlaod a sample video
! wget https://github.com/yt-dlp/yt-dlp/releases/latest/download/yt-dlp
! chmod +x yt-dlp

! ./yt-dlp https://www.youtube.com/watch?v=rEq1Z0bjdwc -f "best[height<=720]" -o video.mp4 --downloader-args "ffmpeg:-filter:v fps=25 -vcodec h264"

In [None]:
# Clone relevant repositories
! git clone https://github.com/moabitcoin/ig65m-pytorch.git ig65m
! git clone https://github.com/antoine77340/S3D_HowTo100M.git s3d

In [None]:
# Download model weights
! wget https://www.rocq.inria.fr/cluster-willow/amiech/howto100m/s3d_howto100m.pth
! wget https://www.rocq.inria.fr/cluster-willow/amiech/howto100m/s3d_dict.npy

In [None]:
# Extract R(2+1)D-IG65M features and load them into python
! cd ig65m/ && python -m ig65m.cli extract ../video.mp4 ../video.npy --frame-size 112

if65m_features = np.load("video.npy")

In [None]:
# Extract S3D_HowTo100M features
from s3d.s3dg import S3D
# Instantiate the model
s3d_net = S3D('s3d_dict.npy', 512)
# Load the model weights
s3d_net.load_state_dict(torch.load('s3d_howto100m.pth'))
s3d_net.eval()
s3d_net = s3d_net.to(device)

In [None]:
# Define video processing pipeline
from ig65m.ig65m.datasets import VideoDataset
from ig65m.ig65m.transforms import ToTensor, Resize, Normalize

transform_how100 = Compose([
    ToTensor(),
    Rearrange("t h w c -> c t h w"),
    Resize([224, 224]),
])

vd = VideoDataset(Path("video.mp4"), clip=32, transform=transform_how100)
vd = DataLoader(vd, batch_size=1, num_workers=1, shuffle=False)

In [None]:
features = []
for _input in vd:
    s3d_features = s3d_net(_input.to(device))["video_embedding"].data.cpu().numpy()
    features.append(s3d_features.flatten())
s3d_features = np.stack(features)

In [None]:
# Check the expected feature shape
assert if65m_features.shape == (13, 512)
assert s3d_features.shape == (13, 512)

In [None]:
# Remove the downlaoded files and repos
! rm -rf video.mp4 video.npy yt-dlp s3d_howto100m.pth s3d_dict.npy ig65m s3d