In [None]:
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from scipy.linalg import sqrtm
import os
from glob import glob

# Load the I3D model from TensorFlow Hub
i3d_model = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1")

def calculate_fvd(mu1, sigma1, mu2, sigma2):
    diff = mu1 - mu2
    covmean, _ = sqrtm(sigma1 @ sigma2, disp=False)
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    return np.sum(diff ** 2) + np.trace(sigma1 + sigma2 - 2 * covmean)

def load_images_as_video(folder_path, num_frames=16):
    img_paths = sorted(glob(os.path.join(folder_path, '*.jpg')))
    video = []
    for img_path in img_paths[:num_frames]:  # Take only the first `num_frames` images
        img = tf.image.decode_jpeg(tf.io.read_file(img_path), channels=3)
        img = tf.image.resize(img, (224, 224))  # Resize to match I3D input size
        video.append(img)
    video = tf.stack(video)  # Stack frames to shape (num_frames, height, width, channels)
    video = tf.expand_dims(video, axis=0)  # Add batch dimension
    return video

def extract_features(folder, num_frames=16):
    features = []
    for patient_folder in sorted(os.listdir(folder)):
        patient_path = os.path.join(folder, patient_folder)
        video = load_images_as_video(patient_path, num_frames=num_frames)
        feature_vector = i3d_model(video)
        features.append(feature_vector.numpy().flatten())
    return np.array(features)

# Extract features from each folder
grounding_features = extract_features('path/to/grounding')
generation_features = extract_features('path/to/generation')

# Calculate statistics
mu_grounding, sigma_grounding = grounding_features.mean(axis=0), np.cov(grounding_features, rowvar=False)
mu_generation, sigma_generation = generation_features.mean(axis=0), np.cov(generation_features, rowvar=False)

# Calculate FVD
fvd = calculate_fvd(mu_grounding, sigma_grounding, mu_generation, sigma_generation)
print(f"Frechet Video Distance: {fvd}")
