# ChromaWhisper APP

## Importing Required Libraries

In [1]:
import os
import json
import cv2
import ffmpeg
import yt_dlp
import whisper
import chromadb
from pathlib import Path
from typing import Optional, List, Dict
from webvtt import WebVTT, Caption
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
from sentence_transformers import SentenceTransformer
from chromadb.config import Settings

## **Video Processing Pipeline**  
Our pipeline efficiently processes videos by downloading, transcribing, and extracting key visual information using state-of-the-art tools. It is implemented through the following modular components:  

1. **VideoDownloader**  
   - Utilizes `yt-dlp` to download videos along with available subtitles.  
   - Ensures optimal resolution selection for better processing.  
   - Handles multiple video formats and subtitle extraction seamlessly.  

2. **TranscriptProcessor**  
   - Uses `Whisper`, an advanced speech-to-text model, to generate transcripts when subtitles are unavailable.  
   - Supports multiple languages and provides accurate transcription even in noisy environments.  
   - Converts transcripts into structured WebVTT format for downstream processing.  

3. **FrameProcessor**  
   - Extracts keyframes using intelligent scene detection techniques.  
   - Generates frame descriptions using `BLIP`, a powerful vision-language model.  
   - Helps in creating concise and meaningful summaries for video content analysis.  


In [None]:
# Class to download videos and subtitles from YouTube
class VideoDownloader:
    def __init__(self, base_dir: str = "./input_data/Videos"):
        # Initialize the base directory where videos will be stored
        self.base_dir = Path(base_dir)
        self.base_dir.mkdir(parents=True, exist_ok=True)  # Create directory if it doesn't exist

    def download_video(self, url: str, video_id: str) -> Optional[Dict]:
        """
        Downloads a video from the given URL and extracts metadata.

        Args:
            url (str): The URL of the video to download.
            video_id (str): The unique identifier for the video.

        Returns:
            Optional[Dict]: A dictionary containing the video path, title, description, and URL,
                            or None if the download fails.
        """
        output_dir = self.base_dir / video_id  # Directory to store the video
        output_dir.mkdir(exist_ok=True)  # Create directory if it doesn't exist
        ydl_opts = {
            'format': 'best',  # Download the best available format
            'outtmpl': str(output_dir / '%(title)s.%(ext)s'),  # Output file naming
            'quiet': True,  # Suppress console output
            'ignoreerrors': True,  # Ignore download errors
            'writedescription': True,  # Fetch video description
        }
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            try:
                info = ydl.extract_info(url, download=True)  # Extract video info and download
            except Exception as e:
                print(f"Error during video download/extraction: {e}")
                return None

            if not info:
                return None
            video_path = Path(ydl.prepare_filename(info))  # Path to the downloaded video file
            if not video_path.exists():
                return None
            return {
                "video_path": video_path,
                "title": info.get("title", ""),
                "description": info.get("description", ""),
                "url": url,
            }

    def download_subtitles(self, url: str, video_id: str) -> Optional[Path]:
        """
        Downloads subtitles for the given video URL.

        Args:
            url (str): The URL of the video.
            video_id (str): The unique identifier for the video.

        Returns:
            Optional[Path]: The path to the downloaded subtitle file, or None if download fails.
        """
        output_dir = self.base_dir / video_id  # Directory to store the subtitles
        output_dir.mkdir(exist_ok=True)  # Create directory if it doesn't exist
        ydl_opts = {
            'skip_download': True,  # Skip video download
            'writesubtitles': True,  # Write subtitles if available
            'writeautomaticsub': True,  # Write automatic subtitles if available
            'subtitleslangs': ['en'],  # Download English subtitles
            'subtitlesformat': 'vtt',  # Subtitle format
            'outtmpl': str(output_dir / '%(title)s.%(ext)s'),  # Output file naming
            'quiet': True,  # Suppress console output
            'ignoreerrors': True  # Ignore download errors
        }
        try:
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info = ydl.extract_info(url, download=True)  # Extract video info and download subtitles
        except Exception as e:
            print(f"Error during subtitle download/extraction: {e}")
            return None

        if not info:
            return None

        requested_subtitles = info.get('requested_subtitles')
        if requested_subtitles:
            subs = requested_subtitles.get('en', {})
            if subs and (sub_path := Path(subs.get('filepath', ''))).exists():
                return sub_path

        # Check for existing vtt files in the output directory
        for f in output_dir.glob("*.en.vtt"):
            return f

        # If no subtitles were downloaded, return None
        return None

# Class to process video transcripts using Whisper
class TranscriptProcessor:
    def __init__(self):
        # Load the Whisper model
        self.model = whisper.load_model("base")

    def process_subtitles(self, video_path: Path, sub_path: Optional[Path]) -> Path:
        """
        Processes subtitles for the given video, either by using provided subtitles or generating new ones.

        Args:
            video_path (Path): The path to the video file.
            sub_path (Optional[Path]): The path to existing subtitle file (optional).

        Returns:
            Path: The path to the processed subtitle file.
        """
        video_dir = video_path.parent
        if sub_path and sub_path.exists() and sub_path.stat().st_size > 0:
            return sub_path  # Use existing subtitles if they exist and are not empty
        return self._generate_subtitles(video_path, video_dir)  # Generate subtitles if no existing ones

    def _generate_subtitles(self, video_path: Path, output_dir: Path) -> Path:
        """
        Generates subtitles for the given video using the Whisper model.

        Args:
            video_path (Path): The path to the video file.
            output_dir (Path): The directory to save the generated subtitles.

        Returns:
            Path: The path to the generated subtitle file.
        """
        audio_path = output_dir / "temp_audio.wav"  # Path for temporary audio file
        vtt_path = output_dir / "generated_subtitles.vtt"  # Path for generated subtitles
        (
            ffmpeg.input(str(video_path))
            .output(str(audio_path), acodec='pcm_s16le', ar=16000, ac=1)  # Extract audio from video
            .run(quiet=True)
        )
        result = self.model.transcribe(str(audio_path), task="translate", language="en", fp16=False)  # Transcribe audio to subtitles
        self._create_vtt(result["segments"], vtt_path)  # Create VTT file from transcription segments
        if audio_path.exists():
            audio_path.unlink()  # Remove temporary audio file
        return vtt_path

    def _create_vtt(self, segments: List[Dict], output_path: Path):
        """
        Creates a VTT file from the given transcription segments.

        Args:
            segments (List[Dict]): A list of transcription segments.
            output_path (Path): The path to save the VTT file.
        """
        vtt = WebVTT()
        for seg in segments:
            caption = Caption(
                self._format_time(seg['start']),
                self._format_time(seg['end']),
                seg['text'].strip()
            )
            vtt.captions.append(caption)
        vtt.save(str(output_path))

    @staticmethod
    def _format_time(seconds: float) -> str:
        """
        Formats time in seconds to a string format (HH:MM:SS.milliseconds).

        Args:
            seconds (float): The time in seconds.

        Returns:
            str: The formatted time string.
        """
        hours = int(seconds // 3600)
        mins = int((seconds % 3600) // 60)
        secs = seconds % 60
        return f"{hours:02}:{mins:02}:{secs:06.3f}"

# Class to process video frames and generate descriptions
class FrameProcessor:
    def __init__(self):
        from transformers import BlipProcessor, BlipForConditionalGeneration
        from PIL import Image
        import cv2
        # Load the BLIP processor and model for image captioning
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
        self.model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

    def process_video(self, video_path: Path, vtt_path: Path) -> List[Dict]:
        """
        Processes a video to extract frames, generate descriptions, and associate subtitles.

        Args:
            video_path (Path): The path to the video file.
            vtt_path (Path): The path to the subtitle file.

        Returns:
            List[Dict]: A list of dictionaries containing frame metadata.
        """
        video_dir = video_path.parent
        frames_dir = video_dir / "frames"  # Directory to store extracted frames
        frames_dir.mkdir(exist_ok=True)  # Create directory if it doesn't exist
        subtitles = self._parse_vtt(vtt_path)  # Parse subtitles from VTT file
        import cv2
        cap = cv2.VideoCapture(str(video_path))  # Open video file
        fps = cap.get(cv2.CAP_PROP_FPS)  # Get frames per second
        frame_interval = int(round(fps))  # Interval to extract frames (every second)
        metadata = []  # List to store frame metadata
        frame_count = 0
        while cap.isOpened():
            ret, frame = cap.read()  # Read a frame from the video
            if not ret:
                break  # Break if no frame is read
            if frame_count % frame_interval == 0:
                timestamp = frame_count / fps  # Calculate timestamp
                frame_description = self._generate_frame_description(frame)  # Generate description for the frame
                self._process_frame(frame, frame_count, timestamp, frames_dir, subtitles, metadata, frame_description)  # Process the frame
            frame_count += 1
        cap.release()  # Release video file
        return metadata

    def _generate_frame_description(self, frame) -> str:
        """
        Generates a description for the given frame using the BLIP model.

        Args:
            frame: The video frame.

        Returns:
            str: The generated frame description.
        """
        from PIL import Image
        pil_image = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))  # Convert frame to PIL image
        inputs = self.processor(pil_image, return_tensors="pt")  # Prepare input for the model
        out = self.model.generate(**inputs)  # Generate description
        return self.processor.decode(out[0], skip_special_tokens=True)  # Decode the output

    def _process_frame(self, frame, count: int, timestamp: float, frames_dir: Path, subtitles: List[Dict], metadata: List, frame_description: str):
        """
        Processes a single frame, saves it, and adds metadata.

        Args:
            frame: The video frame.
            count (int): The frame count.
            timestamp (float): The timestamp of the frame.
            frames_dir (Path): The directory to save the frame.
            subtitles (List[Dict]): A list of subtitles.
            metadata (List): The list to store frame metadata.
            frame_description (str): The description of the frame.
        """
        frame_path = frames_dir / f"frame_{count}_time_{timestamp:.2f}.jpg"  # Path to save the frame
        cv2.imwrite(str(frame_path), frame, [cv2.IMWRITE_JPEG_QUALITY, 85])  # Save the frame
        metadata.append({
            "frame_path": str(frame_path.relative_to(frames_dir.parent)),  # Relative path to the frame
            "timestamp": round(timestamp, 2),  # Timestamp of the frame
            "subtitles": self._get_matching_subtitles(subtitles, timestamp),  # Matching subtitles for the frame
            "frame_description": frame_description  # Description of the frame
        })

    def _parse_vtt(self, path: Path) -> List[Dict]:
        """
        Parses a VTT file and returns a list of subtitle dictionaries.

        Args:
            path (Path): The path to the VTT file.

        Returns:
            List[Dict]: A list of subtitle dictionaries.
        """
        return [{
            "start": self._time_to_sec(caption.start),  # Start time of the subtitle
            "end": self._time_to_sec(caption.end),  # End time of the subtitle
            "text": caption.text.strip()  # Subtitle text
        } for caption in WebVTT().read(str(path)).captions]

    def _get_matching_subtitles(self, subtitles: List[Dict], timestamp: float) -> List[str]:
        """
        Gets the subtitles that match the given timestamp.

        Args:
            subtitles (List[Dict]): A list of subtitle dictionaries.
            timestamp (float): The timestamp to match.

        Returns:
            List[str]: A list of matching subtitles.
        """
        return [sub['text'] for sub in subtitles if sub['start'] <= timestamp <= sub['end']]

    @staticmethod
    def _time_to_sec(time_str: str) -> float:
        """
        Converts a time string to seconds.

        Args:
            time_str (str): The time string.

        Returns:
            float: The time in seconds.
        """
        parts = list(map(float, time_str.replace(',', '.').split(':')))
        if len(parts) == 3:
            return parts[0] * 3600 + parts[1] * 60 + parts[2]
        elif len(parts) == 2:
            return parts[0] * 60 + parts[1]
        return 0.0

# Main function to process the video pipeline
def process_video_pipeline(url: str, video_id: str):
    """
    Processes a video from the given URL, including downloading, generating subtitles,
    extracting frames, and generating descriptions.

    Args:
        url (str): The URL of the video.
        video_id (str): The unique identifier for the video.
    """
    downloader = VideoDownloader()  # Create a VideoDownloader instance
    transcript_processor = TranscriptProcessor()  # Create a TranscriptProcessor instance
    frame_processor = FrameProcessor()  # Create a FrameProcessor instance

    # Step 1: Download video and extract metadata
    video_info = downloader.download_video(url, video_id)
    if not video_info or not video_info["video_path"].exists():
        print("Video download failed.")
        return

    # Step 2: Download or generate subtitles
    sub_path = downloader.download_subtitles(url, video_id)
    vtt_path = transcript_processor.process_subtitles(video_info["video_path"], sub_path)

    # Step 3: Process frames and generate descriptions
    frame_metadata = frame_processor.process_video(video_info["video_path"], vtt_path)

    # Step 4: Prepare final metadata
    metadata = {
        "title": video_info["title"],
        "description": video_info["description"],
        "transcript": [{
            "start_time": seg["start"],
            "end_time": seg["end"],
            "text": seg["text"]
        } for seg in frame_processor._parse_vtt(vtt_path)],
        "frames": frame_metadata,
        "video_uri": video_info["url"],
    }

    # Step 5: Save metadata
    metadata_path = video_info["video_path"].parent / "metadata.json"
    import json
    with open(metadata_path, 'w') as f:
        json.dump(metadata, f, indent=2)

    print(f"Metadata saved to {metadata_path}")

# Main execution block
if __name__ == "__main__":
    video_urls = [
        "https://www.youtube.com/watch?v=ftDsSB3F5kg",
        "https://www.youtube.com/watch?v=kKFrbhZGNNI",
        "https://www.youtube.com/watch?v=6qUxwZcTXHY",
        "https://www.youtube.com/watch?v=MspNdsh0QcM",
        "https://www.youtube.com/watch?v=Kf57KGwKa0w",
    ]
    for idx, url in enumerate(video_urls, 1):
        video_id = f"video_{idx}"
        process_video_pipeline(url, video_id)  # Process each video URL


Metadata saved to input_data/Videos/video_1/metadata.json
Metadata saved to input_data/Videos/video_2/metadata.json
Metadata saved to input_data/Videos/video_3/metadata.json
Metadata saved to input_data/Videos/video_4/metadata.json
                                                        

## **Storing and Retrieval Pipeline**  
Our pipeline efficiently processes and stores video metadata, enabling efficient retrieval of relevant content using vector embeddings. It consists of the following modular components:



1. **MetadataChunker**  
- Segments video metadata into meaningful chunks for structured storage.  
- Each chunk includes title, description, transcript segments, and associated frame descriptions.  
- Ensures each transcript segment is linked with relevant frame descriptions for better retrieval.  


2. **EmbeddingGenerator**  
- Uses `SentenceTransformer (all-MiniLM-L6-v2)` to generate high-quality vector embeddings.  
- Prepares textual input by combining title, description, transcript, and frame descriptions.  
- Converts each chunk into a numerical representation, enabling efficient similarity searches.  


3. **VectorDB**  
- Implements `ChromaDB`, a high-performance vector database, for metadata storage.  
- Stores chunk embeddings along with structured metadata (timestamps, video URI, etc.)**.  
- Supports fast and scalable retrieval of stored video chunks.  


4. **Retriever**  
- Queries the vector database using sentence embeddings for high-precision retrieval.  
- Uses semantic similarity search to find the most relevant transcript segments.  
- Returns the most relevant video segments, including start time, text, and video URI.  


In [4]:
class MetadataChunker:
    def __init__(self, metadata: Dict):
        self.metadata = metadata

    def chunk_metadata(self) -> List[Dict]:
        """
        Chunk the metadata into smaller, meaningful pieces.
        Each chunk will include:
        - Title
        - Description
        - Transcript segment (start_time, end_time, text)
        - Frame description (if available for the segment)
        - Video URI
        """
        chunks = []

        # Chunk based on transcript segments
        for transcript_segment in self.metadata["transcript"]:
            chunk = {
                "title": self.metadata["title"],
                "description": self.metadata["description"],
                "start_time": transcript_segment["start_time"],
                "end_time": transcript_segment["end_time"],
                "text": transcript_segment["text"],
                "video_uri": self.metadata["video_uri"],
            }

            # Add frame descriptions if available for the segment
            frame_descriptions = []
            for frame in self.metadata["frames"]:
                if transcript_segment["start_time"] <= frame["timestamp"] <= transcript_segment["end_time"]:
                    frame_descriptions.append(frame["frame_description"])
            chunk["frame_descriptions"] = frame_descriptions

            chunks.append(chunk)

        return chunks

class EmbeddingGenerator:
    def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def generate_embeddings(self, chunks: List[Dict]) -> List[List[float]]:
        """
        Generate embeddings for each chunk's text.
        """
        texts = [self._prepare_text(chunk) for chunk in chunks]
        return self.model.encode(texts).tolist()

    @staticmethod
    def _prepare_text(chunk: Dict) -> str:
        """
        Prepare the text for embedding by combining relevant fields.
        """
        text = f"Title: {chunk['title']}\nDescription: {chunk['description']}\nTranscript: {chunk['text']}"
        if chunk["frame_descriptions"]:
            text += f"\nFrame Descriptions: {' '.join(chunk['frame_descriptions'])}"
        return text

class VectorDB:
    def __init__(self, db_path: str = "./chroma_db"):
        self.client = chromadb.Client(Settings(persist_directory=db_path, is_persistent=True))
        self.collection = self.client.get_or_create_collection(name="video_metadata")

    def store_chunks(self, chunks: List[Dict], embeddings: List[List[float]]):
        """
        Store chunks and their embeddings in the vector database.
        """
        ids = [str(i) for i in range(len(chunks))]
        documents = [self._prepare_document(chunk) for chunk in chunks]
        metadatas = [self._prepare_metadata(chunk) for chunk in chunks]

        self.collection.add(
            ids=ids,
            embeddings=embeddings,
            documents=documents,
            metadatas=metadatas,
        )

    @staticmethod
    def _prepare_document(chunk: Dict) -> str:
        """
        Prepare the document text for storage.
        """
        return f"Title: {chunk['title']}\nDescription: {chunk['description']}\nTranscript: {chunk['text']}"

    @staticmethod
    def _prepare_metadata(chunk: Dict) -> Dict:
        """
        Prepare metadata for storage.
        """
        return {
            "start_time": chunk["start_time"],
            "end_time": chunk["end_time"],
            "video_uri": chunk["video_uri"],
        }

class Retriever:
    def __init__(self, db_path: str = "./chroma_db"):
        self.client = chromadb.Client(Settings(persist_directory=db_path, is_persistent=True))
        self.collection = self.client.get_collection(name="video_metadata")
        self.embedding_generator = EmbeddingGenerator()

    def retrieve(self, query: str, top_k: int = 3) -> List[Dict]:
        """
        Retrieve the most relevant chunks based on the user's query.
        """
        query_embedding = self.embedding_generator.generate_embeddings([{"title": "", "description": "", "text": query, "frame_descriptions": []}])[0]
        results = self.collection.query(
            query_embeddings=[query_embedding],
            n_results=top_k,
        )

        # Format the results
        retrieved_chunks = []
        for i in range(len(results["ids"][0])):
            retrieved_chunks.append({
                "video_uri": results["metadatas"][0][i]["video_uri"],
                "start_time": results["metadatas"][0][i]["start_time"],
                "text": results["documents"][0][i],
            })

        return retrieved_chunks
def process_all_metadata_for_vectordb(metadata_dir: str):
    """
    Process all metadata files in the directory and store them in the vector database.
    """
    metadata_dir = Path(metadata_dir)
    vectordb = VectorDB()
    embedding_generator = EmbeddingGenerator()

    # Iterate through all metadata files
    for metadata_file in metadata_dir.glob("**/metadata.json"):
        print(f"Processing {metadata_file}...")

        # Load metadata
        with open(metadata_file, "r") as f:
            metadata = json.load(f)

        # Chunk metadata
        chunker = MetadataChunker(metadata)
        chunks = chunker.chunk_metadata()

        # Generate embeddings
        embeddings = embedding_generator.generate_embeddings(chunks)

        # Store in VectorDB
        vectordb.store_chunks(chunks, embeddings)

    print(f"All metadata processed and stored in VectorDB.")

def query_vectordb(query: str):
    """
    Query the vector database and retrieve relevant results.
    """
    retriever = Retriever()
    results = retriever.retrieve(query)

    print("Retrieved Results:")
    for result in results:
        print(f"Video URI: {result['video_uri']}")
        print(f"Start Time: {result['start_time']}")
        print(f"Text: {result['text']}")
        print("-" * 50)


if __name__ == "__main__":
    # Step 1: Process all metadata files and store in VectorDB
    metadata_dir = "./input_data/Videos"
    process_all_metadata_for_vectordb(metadata_dir)

    # Step 2: Query the VectorDB
    query = "What is the difference between story and script?"
    query_vectordb(query)

Processing input_data/Videos/video_2/metadata.json...


Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10
Add of existing embedding ID: 11
Add of existing embedding ID: 12
Add of existing embedding ID: 13
Add of existing embedding ID: 14
Add of existing embedding ID: 15
Add of existing embedding ID: 16
Add of existing embedding ID: 17
Add of existing embedding ID: 18
Add of existing embedding ID: 19
Add of existing embedding ID: 20
Add of existing embedding ID: 21
Add of existing embedding ID: 22
Add of existing embedding ID: 23
Add of existing embedding ID: 24
Add of existing embedding ID: 25
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 29
Add of existing embe

Processing input_data/Videos/video_5/metadata.json...


Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10
Add of existing embedding ID: 11
Add of existing embedding ID: 12
Add of existing embedding ID: 13
Add of existing embedding ID: 14
Add of existing embedding ID: 15
Add of existing embedding ID: 16
Add of existing embedding ID: 17
Add of existing embedding ID: 18
Add of existing embedding ID: 19
Add of existing embedding ID: 20
Add of existing embedding ID: 21
Add of existing embedding ID: 22
Add of existing embedding ID: 23
Add of existing embedding ID: 24
Add of existing embedding ID: 25
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 29
Add of existing embe

Processing input_data/Videos/video_4/metadata.json...


Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10
Add of existing embedding ID: 11
Add of existing embedding ID: 12
Add of existing embedding ID: 13
Add of existing embedding ID: 14
Add of existing embedding ID: 15
Add of existing embedding ID: 16
Add of existing embedding ID: 17
Add of existing embedding ID: 18
Add of existing embedding ID: 19
Add of existing embedding ID: 20
Add of existing embedding ID: 21
Add of existing embedding ID: 22
Add of existing embedding ID: 23
Add of existing embedding ID: 24
Add of existing embedding ID: 25
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 29
Add of existing embe

Processing input_data/Videos/video_3/metadata.json...


Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10
Add of existing embedding ID: 11
Add of existing embedding ID: 12
Add of existing embedding ID: 13
Add of existing embedding ID: 14
Add of existing embedding ID: 15
Add of existing embedding ID: 16
Add of existing embedding ID: 17
Add of existing embedding ID: 18
Add of existing embedding ID: 19
Add of existing embedding ID: 20
Add of existing embedding ID: 21
Add of existing embedding ID: 22
Add of existing embedding ID: 23
Add of existing embedding ID: 24
Add of existing embedding ID: 25
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 29
Add of existing embe

Processing input_data/Videos/video_1/metadata.json...


Add of existing embedding ID: 0
Add of existing embedding ID: 1
Add of existing embedding ID: 2
Add of existing embedding ID: 3
Add of existing embedding ID: 4
Add of existing embedding ID: 5
Add of existing embedding ID: 6
Add of existing embedding ID: 7
Add of existing embedding ID: 8
Add of existing embedding ID: 9
Add of existing embedding ID: 10
Add of existing embedding ID: 11
Add of existing embedding ID: 12
Add of existing embedding ID: 13
Add of existing embedding ID: 14
Add of existing embedding ID: 15
Add of existing embedding ID: 16
Add of existing embedding ID: 17
Add of existing embedding ID: 18
Add of existing embedding ID: 19
Add of existing embedding ID: 20
Add of existing embedding ID: 21
Add of existing embedding ID: 22
Add of existing embedding ID: 23
Add of existing embedding ID: 24
Add of existing embedding ID: 25
Add of existing embedding ID: 26
Add of existing embedding ID: 27
Add of existing embedding ID: 28
Add of existing embedding ID: 29
Add of existing embe

All metadata processed and stored in VectorDB.
Retrieved Results:
Video URI: https://www.youtube.com/watch?v=MspNdsh0QcM
Start Time: 249.55
Text: Title: स्टोरीबोर्ड का निर्माण भाग - 1
Description: For more information and related videos visit us on http://www.digitalgreen.org/
Transcript: how to convert a story into a script and story board
--------------------------------------------------
Video URI: https://www.youtube.com/watch?v=MspNdsh0QcM
Start Time: 245.84
Text: Title: स्टोरीबोर्ड का निर्माण भाग - 1
Description: For more information and related videos visit us on http://www.digitalgreen.org/
Transcript: what a story board is and
how to convert a story into a script and story board
--------------------------------------------------
Video URI: https://www.youtube.com/watch?v=MspNdsh0QcM
Start Time: 249.56
Text: Title: स्टोरीबोर्ड का निर्माण भाग - 1
Description: For more information and related videos visit us on http://www.digitalgreen.org/
Transcript: how to convert a story into 