In [2]:
import numpy as np
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

In [3]:

SEQ_LEN = 16
IMG_SIZE = (224, 224)
MODEL_FILE = "final_action_model.keras" 
LABELS = [
    "basketball",
    "biking",
    "Diving",
    "golf_swing",
    "horse_riding",
    "soccer_juggling",
    "swing",
    "tennis_swing",
    "trampoline_jumping",
    "volleyball_spiking",
    "walking"
]
IDX2LABEL = {i: lbl for i, lbl in enumerate(LABELS)}

In [4]:
model = load_model(MODEL_FILE)

  saveable.load_own_variables(weights_store.get(inner_path))


In [5]:
feature_extractor = MobileNetV2(include_top=False, weights='imagenet', pooling='avg', input_shape=(*IMG_SIZE,3))

In [6]:
def sample_frames(video_path, seq_len=SEQ_LEN, img_size=IMG_SIZE):
    cap = cv2.VideoCapture(video_path)
    frames = []
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    if total_frames == 0:
        raise ValueError("Video has no frames or cannot be read")
    frame_idxs = np.linspace(0, total_frames-1, seq_len, dtype=int)
    idx_set = set(frame_idxs)
    for i in range(total_frames):
        ret, frame = cap.read()
        if not ret:
            continue
        if i in idx_set:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            frame = cv2.resize(frame, img_size)
            frames.append(frame)
    cap.release()
    if len(frames) < seq_len:
        # pad by repeating last frame
        while len(frames) < seq_len:
            frames.append(frames[-1])
    return np.array(frames)

In [7]:
def predict_video_action(video_path):
    frames = sample_frames(video_path)
    frames = preprocess_input(frames)
    features = feature_extractor.predict(frames, verbose=0)
    features = np.expand_dims(features, axis=0)  # batch size = 1
    probs = model.predict(features, verbose=0)[0]
    pred_idx = np.argmax(probs)
    pred_label = IDX2LABEL[pred_idx]
    return pred_label, probs

In [None]:
def test_video_in_notebook(video_file):
    action, probs = predict_video_action(video_file)
    print(f"Action: {action}")
    print(f"Probabilities: {probs}")
    return action, probs

# Example usage:
test_video_in_notebook("vids/3.mp4")

Action: golf_swing
Probabilities: [0.05594726 0.0015833  0.00995736 0.5273444  0.01395207 0.06210929
 0.12369637 0.00971623 0.16844793 0.00119065 0.02605511]


('golf_swing',
 array([0.05594726, 0.0015833 , 0.00995736, 0.5273444 , 0.01395207,
        0.06210929, 0.12369637, 0.00971623, 0.16844793, 0.00119065,
        0.02605511], dtype=float32))