**# VLOG ANALYSIS (WITH NO AUDIO VOICEOVER)**

In [12]:
import cv2
import google.generativeai as genai
from google.generativeai import GenerativeModel, GenerationConfig
from transformers import BlipProcessor, BlipForConditionalGeneration 
import torch

# Configure Gemini API Key
genai.configure(api_key="API KEY") 

# Function to Extract Scene Descriptions
def extract_scene_descriptions(video_path, frame_interval=30):
    """
    Extracts scene descriptions from video frames.
    :param video_path: Path to the video file.
    :param frame_interval: Number of frames to skip between analyses.
    :return: List of scene descriptions.
    """
    video = cv2.VideoCapture(video_path)
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model.to(device)

    frame_count = 0
    scene_descriptions = []
    success, frame = video.read()

    while success:
        if frame_count % frame_interval == 0: 
            # Convert frame to RGB format
            rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Convert to PIL image for BLIP
            pil_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            inputs = processor(pil_frame, return_tensors="pt").to(device)
            caption = model.generate(**inputs)
            description = processor.decode(caption[0], skip_special_tokens=True)
            scene_descriptions.append(description)

        frame_count += 1
        success, frame = video.read()

    video.release()
    return scene_descriptions

# Path to Video and Extract Descriptions
video_path = "/kaggle/input/video-data/Japan Cinematic Travel Video _ Grand Escape Weathering With You Music _ Discover Japan in 4 K.mp4"  
print("Extracting scene descriptions...")
scenes = extract_scene_descriptions(video_path)
print("\nScene Descriptions:")
print(scenes)

# Prepare Input for Gemini 1.5 Pro
MODEL_ID = "gemini-1.5-pro-002"
model_bot = GenerativeModel(
    MODEL_ID,
    system_instruction=[
        "You are an insightful assistant analyzing video scenes.",
        "Your mission is to extract meaningful insights from video scene descriptions."
    ],
)

generation_config = GenerationConfig(
    temperature=0.7,
    top_p=0.9,
    top_k=30,
    candidate_count=1,
    max_output_tokens=8192,
)

contents = [
    f"""
You are a travel expert analyzing scene descriptions from a video.  Provide a structured summary of the travel destinations and experiences depicted. For each place/activity, include:

* **Name:** The name of the place or activity (if identifiable; otherwise, a descriptive label).
* **Description:** A brief description (2-3 sentences) based on the visual scenes.
* **Type:** Categorize as City, Hotel, Experience, Activity, Landmark,  or Other.
* **Relevant Information:** Based on the "Type," provide the following:
    * **City:** Best time to visit, type of traveler who would enjoy it, potential local cuisine.
    * **Hotel:**  Potential amenities (inferred from visuals), target audience (e.g., families, couples), overall vibe.
    * **Experience/Activity:** Best time to participate, who might enjoy it.
    * **Landmark:** Historical or cultural significance, key features.
    * **Other:** Any other relevant observations.

If information is not clearly depicted in the scenes, use "Not specified" or make reasonable assumptions based on the visuals.  Focus on creating a concise and informative travel guide based on the video content.


Scene Descriptions:
{scenes}
"""
]

# Generate Analysis Using Gemini
print("\nAnalyzing scenes with Gemini...")
response = model_bot.generate_content(
    contents=contents,
    generation_config=generation_config,
)

# Display Analysis Results
if response.candidates:
    analysis_text = response.candidates[0].content.parts[0].text
    print("\nAnalysis Results:")
    print(analysis_text)
else:
    print("No candidates found in the response.")


Extracting scene descriptions...





Scene Descriptions:
['a black background with a white and red flower', 'a view of a pagoda tower in the middle of a city', 'a pagoda tower in the middle of a city', 'a pagoda tower in the middle of a city', 'a pagoda tower in the middle of a city', 'a pagoda tower with a mountain in the background', 'a green field with a blue sky in the background', 'a green field with a blue sky in the background', 'a green field with a blue sky in the background', 'a tree with pink flowers in the sky', 'a tree with red flowers in the sun', 'a tree with red flowers in the sun', 'a group of colorful flags hanging from a pole', 'a bunch of balloons hanging from a pole', 'a group of people are holding colorful balloons', 'a bunch of colorful flags hanging from a building', 'a man standing in front of a red gate', 'a man sitting in front of a red gate', 'a man standing in front of a red gate', 'a man standing in front of a red gate', 'a lake with a boat in it', 'a lake with a boat in it', 'a lake with a 