##Installation of Gradio

In [None]:
!pip install gradio



## Importing Libraries

In [None]:
import requests
import base64
from moviepy.video.io.VideoFileClip import VideoFileClip
from PIL import Image
from io import BytesIO
import gradio as gr

## API Configuration

In [None]:

API_LINK = "https://ai.api.nvidia.com/v1/vlm/nvidia/neva-22b"
KEY = "API Key for neva"

## Video frames

In [None]:
def get_video_frames(video_path, frames_count=16):
    clip = VideoFileClip(video_path)
    time_length = clip.duration
    frames = [
        clip.get_frame(i * time_length / frames_count) for i in range(frames_count)
    ]
    return [Image.fromarray(frame) for frame in frames]

In [None]:
def image_to_base64(image_frame):
    temp_buffer = BytesIO()
    image_frame.save(temp_buffer, format="PNG")
    return base64.b64encode(temp_buffer.getvalue()).decode()

##  detecting actions using Neva- 22b

In [None]:
def detect_action(image_b64, activity):
    headers = {
        "Authorization": f"Bearer {KEY}",
        "Accept": "application/json",
    }
    info = {
        "messages": [
            {
                "role": "user",
                "content": f'🤔 Do you see someone performing "{activity}" in this image? 🖼️ <img src="data:image/png;base64,{image_b64}" />',
            }
        ],
        "max_tokens": 1024,
        "temperature": 0.20,
        "top_p": 0.70,
        "seed": 0,
        "stream": False,
    }
    response = requests.post(API_LINK, headers=headers, json=info)
    result = response.json()
    return "yes" in result.get("choices", [{}])[0].get("message", {}).get("content", "").lower()

## Calculating Accuracy

In [None]:
def calculate_accuracy(video_frames, activity):
    detections = 0
    for image in video_frames:
        image_b64 = image_to_base64(image)
        if detect_action(image_b64, activity):
            detections += 1
    return (detections / len(video_frames)) * 100

## Prototype Interface Using Gradio

In [None]:
def analyze_videos(video1, video2, activity):
    try:
        frames_video1 = get_video_frames(video1)
        frames_video2 = get_video_frames(video2)

        result1 = calculate_accuracy(frames_video1, activity)
        result2 = calculate_accuracy(frames_video2, activity)

        return (
            f"Video 1 '{activity}' Detection Rate: {result1:.2f}% ✅",
            f"Video 2 '{activity}' Detection Rate: {result2:.2f}% ✅",
        )
    except Exception as error:
        return f"Oops! Something went wrong: {str(error)}", None

app = gr.Interface(
    fn=analyze_videos,
    inputs=[
        gr.Video(label="🎥 Upload Synthetic Video"),
        gr.Video(label="🎥 Upload Real Video"),
        gr.Textbox(label="💡 Specify Action (e.g., running, jumping)"),
    ],
    outputs=[
        gr.Textbox(label="📊 Synthetic Video Analysis"),
        gr.Textbox(label="📊 Real Video Analysis"),
    ],
    title="🎬 Action Detection using NVIDIA NEVA",
    description="✨ Upload videos and specify an action to analyze. This tool calculates the success rate for each video."
)

if __name__ == "__main__":
    app.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0c5d2dfe7933ebdda9.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
