In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import torch
import torch.nn as nn
from torchvision import transforms
from PIL import Image
import cv2
import numpy as np


# 1. Load the DINO Model
def load_dino_model():
    print("Loading DINO model...")
    model = torch.hub.load('facebookresearch/dino:main', 'dino_vitb16')
    model.eval()
    return model


# 2. Frame Preprocessing Function
def preprocess_frame(frame):
    preprocess = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.Grayscale(num_output_channels=3),  # Convert grayscale image to 3 channels
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
    ])
    return preprocess(frame).unsqueeze(0)

# 3. Process Video and Extract Features
def extract_dino_features(video_path, model, target_frames=1000):
    print(f"Processing video: {video_path}")
    cap = cv2.VideoCapture(video_path)
    frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_indices = np.linspace(0, frame_count - 1, target_frames).astype(int)

    features_list = []

    for idx in frame_indices:
        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
        ret, frame = cap.read()
        if not ret:
            print(f"Failed to read frame at index {idx}")
            continue

        gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        gray_frame_rgb = cv2.cvtColor(gray_frame, cv2.COLOR_GRAY2RGB)
        pil_frame = Image.fromarray(gray_frame_rgb)

        # Preprocess the frame and extract features
        input_tensor = preprocess_frame(pil_frame)
        with torch.no_grad():
            features = model(input_tensor).squeeze(0)
        features_list.append(features.cpu().numpy())

    cap.release()
    print("Feature extraction complete.")
    return np.vstack(features_list)

# 4. Main Function
def main():
    video_path = "/content/drive/My Drive/EC523_Project/natural_movie_3.mp4"
    output_path = "/content/drive/My Drive/EC523_Project/dino_features.npy"

    # Load the DINO model
    dino_model = load_dino_model()

    # Extract features
    dino_features = extract_dino_features(video_path, dino_model, target_frames=3600)

    # Save features to a file
    np.save(output_path, dino_features)
    print(f"Features saved to {output_path}")
    print(f"Feature shape: {dino_features.shape}")

if __name__ == "__main__":
    main()

Loading DINO model...


Downloading: "https://github.com/facebookresearch/dino/zipball/main" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/dino/dino_vitbase16_pretrain/dino_vitbase16_pretrain.pth" to /root/.cache/torch/hub/checkpoints/dino_vitbase16_pretrain.pth
100%|██████████| 327M/327M [00:01<00:00, 265MB/s]


Processing video: /content/drive/My Drive/EC523_Project/natural_movie_3.mp4
Feature extraction complete.
Features saved to /content/drive/My Drive/EC523_Project/dino_features.npy
Feature shape: (3600, 768)


In [None]:
import numpy as np

npy_file_path = "/content/drive/My Drive/EC523_Project/dino_features.npy"

data = np.load(npy_file_path)

print(f"Data shape: {data.shape}")
print(f"Data type: {data.dtype}")
print(f"Sample data: {data[0]}")


Data shape: (3600, 768)
Data type: float32
Sample data: [-2.2534041e+00 -8.5639745e-02  3.9164035e+00 -1.9651811e-01
  1.1714014e+00  1.8200840e+00 -7.0511997e-02 -1.6928266e+00
 -2.3439450e+00  1.3587298e+00 -3.6901531e+00 -9.1349804e-01
  7.6360273e-01  1.3172982e+00 -3.3061785e-01 -4.1366258e-01
 -1.1506133e-01  1.1859964e+00  7.1120776e-02  3.1176443e+00
 -5.4618087e-02 -9.5164132e-01 -1.8744853e+00 -2.4323210e-01
 -1.5190521e+00 -2.2272897e+00 -4.8131204e-01  1.0230111e+00
 -4.0389863e-01  5.3099010e-02 -2.6250968e+00 -6.2344283e-01
  5.3645247e-01  2.1444085e+00 -8.5055751e-01 -3.1921270e+00
  2.4972594e+00 -2.5746558e+00  2.4504135e+00  5.0046492e-01
 -1.1034878e+00 -7.4689232e-02  1.2618390e+00 -1.1819612e+00
  2.8242409e+00 -9.4111099e+00 -4.0019474e+00  1.8020989e+00
  3.4761333e-01 -1.5810339e-01 -3.5584220e-01  5.5530244e-01
  7.6176529e+00 -1.2404474e+00 -8.0582228e+00 -3.4060853e+00
 -1.7385694e-01  1.8521179e+00  2.1435680e+00  1.0796664e+00
 -3.2981431e-01 -3.9026842e+0