# Database Connection

In [None]:
from pymongo import MongoClient

#MongoDB Connection
client = MongoClient('mongodb://localhost:27017/')

# Create Database and Collection

In [None]:
# Create Database
db = client['VideoKeyframeDB']

# Create the Collection
collection = db['videos']

# Prepare and insert Data

This script processes video data, extracts keyframes and shot boundaries, and prepares the data for insertion into a MongoDB database using GridFS for storing keyframe images. The main steps and functionalities include:

1. **Configuration Parameters**:
   - **Directories**: Defines base paths for video files, shot boundaries, and keyframes.
   - **Database Setup**: Uses GridFS for storing keyframe images and MongoDB for storing video metadata.

2. **Parsing and Extraction Functions**:
   - **Parse Shot Boundaries**: Reads shot boundaries from a specified file and returns them as a list of tuples.
   - **Extract Framerate**: Retrieves the framerate of a given video file.
   - **Read Keyframe Image**: Reads a keyframe image from a specified file path.

3. **Data Preparation**:
   - **Prepare Video Data**: Constructs a dictionary containing video metadata, including shot boundaries and keyframes. It reads and stores keyframe images in GridFS and references these images in the video metadata.

4. **Database Insertion**:
   - **Insert All Videos**: Iterates over a specified range of video IDs, prepares the video data, and inserts it into the MongoDB collection if the necessary shot boundaries and keyframes exist.


In [None]:
import os
import cv2
import gridfs

fs = gridfs.GridFS(db)

def parse_shot_boundaries(file_path):
    """
    Parse the shot boundaries from a file.

    Args:
        file_path (str): Path to the shot boundaries file.

    Returns:
        list: List of tuples containing start and end times for each shot.
    """
    with open(file_path, 'r') as file:
        lines = file.readlines()
        shots = [line.strip().split(', ') for line in lines]
        return shots

def extract_framerate(video_path):
    """
    Extract the framerate of a video.

    Args:
        video_path (str): Path to the video file.

    Returns:
        float: The framerate of the video.
    """
    video = cv2.VideoCapture(video_path)
    framerate = video.get(cv2.CAP_PROP_FPS)
    video.release()
    return framerate

def read_keyframe_image(file_path):
    """
    Read a keyframe image from a file.

    Args:
        file_path (str): Path to the keyframe image file.

    Returns:
        bytes: The content of the keyframe image file.
    """
    with open(file_path, 'rb') as f:
        return f.read()

def prepare_video_data(video_id, collection_name, video_path, shot_boundaries_path, keyframes_path):
    """
    Prepare video data for insertion into the database.

    Args:
        video_id (str): The ID of the video.
        collection_name (str): The name of the collection.
        video_path (str): Path to the video file.
        shot_boundaries_path (str): Path to the shot boundaries file.
        keyframes_path (str): Path to the directory containing keyframes.

    Returns:
        dict: The prepared video data.
    """
    original_framerate = extract_framerate(video_path)
    shots = parse_shot_boundaries(shot_boundaries_path)
    frames = []
    for i, (start, end) in enumerate(shots, start=1):
        frame_id = f"{video_id}_Scene-{i}"
        keyframe_path = os.path.join(keyframes_path, f"{frame_id}_.jpg")
        print(keyframe_path)
        if os.path.exists(keyframe_path):
            keyframe_image = read_keyframe_image(keyframe_path)
            keyframe_file_id = fs.put(keyframe_image, filename=f"{frame_id}.jpg")
            frame = {
                "FrameID": frame_id,
                "Starttime": start,
                "Endtime": end,
                "KeyframeImageID": keyframe_file_id,
                "Objects": []
            }
            frames.append(frame)
        else: 
            print(f"Missing keyframe image: {keyframe_path}")

    video_data = {
        "VideoID": video_id,
        "CollectionName": collection_name,
        "OriginalFramerate": original_framerate,
        "EditedFramerate": 24,
        "Frames": frames
    }
    return video_data

def insert_all_videos(base_video_path, base_shot_boundaries_path, base_keyframes_path, collection_name):
    """
    Insert all video data into the database.

    Args:
        base_video_path (str): Base path to the video files.
        base_shot_boundaries_path (str): Base path to the shot boundaries files.
        base_keyframes_path (str): Base path to the keyframe directories.
        collection_name (str): The name of the collection.
    """
    for video_num in range(100, 200):  # from 00100 to 00199
        video_id = f"{video_num:05d}"
        video_path = os.path.join(base_video_path, video_id, f"{video_id}.mp4")
        shot_boundaries_path = os.path.join(base_shot_boundaries_path, f"{video_id}_shots.txt")
        keyframes_path = os.path.join(base_keyframes_path, video_id)

        if os.path.exists(shot_boundaries_path) and os.path.exists(keyframes_path):
            video_data = prepare_video_data(video_id, collection_name, video_path, shot_boundaries_path, keyframes_path)
            collection.insert_one(video_data)
        else:
            print(f"Missing video data for video {video_id}: {shot_boundaries_path} or {keyframes_path}")

# Change according to config
base_video_path = 'V3C1-100'
base_shot_boundaries_path = 'shot_boundaries_converted'
base_keyframes_path = 'keyframes'
collection_name = 'V3C100'

insert_all_videos(base_video_path, base_shot_boundaries_path, base_keyframes_path, collection_name)


# Update Framrates (Do not execute if originalFramrate and Edited Framrate are already set initally)

This script connects to a MongoDB database and updates the framerate information for all videos stored in the database. The primary steps and functionalities include:

1. **Database Connection**:
   - **MongoDB Setup**: Connects to a MongoDB instance and accesses the `VideoKeyframeDB` database and the `videos` collection.

2. **Extract Framerate**:
   - **Framerate Extraction**: Utilizes OpenCV to extract the framerate from a given video file.

3. **Update Video Framerates**:
   - **Database Query**: Retrieves all video records from the MongoDB collection.
   - **Path Construction**: Constructs the file path for each video based on its ID and the base video directory.
   - **Framerate Update**: Updates each video record in the database with the original framerate and a fixed edited framerate of 24 FPS. If the video file does not exist, it logs an error message.


In [None]:
import os
import cv2
import pymongo

# Connect to MongoDB
client = pymongo.MongoClient('mongodb://localhost:27017/')
db = client['VideoKeyframeDB']
collection = db['videos']

def extract_framerate(video_path):
    """
    Extract the framerate of a video.

    Args:
        video_path (str): Path to the video file.

    Returns:
        float: The framerate of the video.
    """
    video = cv2.VideoCapture(video_path)
    framerate = video.get(cv2.CAP_PROP_FPS)
    video.release()
    return framerate

def update_video_framerates(base_video_path):
    """
    Update the framerate information for all videos in the database.

    Args:
        base_video_path (str): Base path to the video files.
    """
    videos = collection.find({})
    for video in videos:
        video_id = video['VideoID']
        video_path = os.path.join(base_video_path, video_id, f"{video_id}.mp4")

        if os.path.exists(video_path):
            original_framerate = extract_framerate(video_path)
            collection.update_one(
                {"VideoID": video_id},
                {"$set": {
                    "OriginalFramerate": original_framerate,
                    "EditedFramerate": 24
                }}
            )
            print(f"Updated VideoID: {video_id} with OriginalFramerate: {original_framerate} and EditedFramerate: 24")
        else:
            print(f"Video file not found: {video_path}")

# Change according to your config
base_video_path = 'V3C1-100'

update_video_framerates(base_video_path)


# Update Database for correct converted shot_boundaries (Do not execute if you initially already have used converted shot_boundaries)
This script updates the shot boundaries for videos stored in a MongoDB database using converted shot boundaries files. The primary steps and functionalities include:

1. **Database Connection**:
   - **MongoDB Setup**: Connects to a MongoDB instance and accesses the `VideoKeyframeDB` database and the `videos` collection.

2. **Parsing Shot Boundaries**:
   - **Parse Shot Boundaries**: Reads shot boundaries from a specified file and returns them as a list of tuples.

3. **Update Shot Boundaries**:
   - **Update Individual Video**: Updates the shot boundaries for a specific video in the database.
   - **Update All Videos**: Iterates over a specified range of video IDs, parses the shot boundaries, and updates the corresponding video records in the database.

### Example Usage
- **Converted Shot Boundaries Path**: `shot_boundaries_converted` (directory containing the converted shot boundaries files).

### Main Script Execution
- **Update All Videos**: Calls the function to update the shot boundaries for all videos in the specified range.


In [None]:
from pymongo import MongoClient
import os

# Configuration
client = MongoClient('mongodb://localhost:27017/')
db = client['VideoKeyframeDB']
collection = db['videos']

def parse_shot_boundaries(file_path):
    """
    Parse the shot boundaries from a file.

    Args:
        file_path (str): Path to the shot boundaries file.

    Returns:
        list: List of tuples containing start and end times for each shot.
    """
    with open(file_path, 'r') as file:
        lines = file.readlines()
        shots = [line.strip().split(', ') for line in lines]
        return shots

def update_shot_boundaries(video_id, converted_shot_boundaries_file):
    """
    Update the shot boundaries for a given video in the database.

    Args:
        video_id (str): The ID of the video.
        converted_shot_boundaries_file (str): Path to the converted shot boundaries file.
    """
    shots = parse_shot_boundaries(converted_shot_boundaries_file)
    for i, (start, end) in enumerate(shots, start=1):
        frame_id = f"{video_id}_Scene-{i}"
        collection.update_one(
            {"VideoID": video_id, "Frames.FrameID": frame_id},
            {"$set": {
                "Frames.$.Starttime": start,
                "Frames.$.Endtime": end
            }}
        )
    print(f"Updated shot boundaries for video {video_id}")

def update_all_videos(converted_shot_boundaries_path):
    """
    Update the shot boundaries for all videos in a specified range.

    Args:
        converted_shot_boundaries_path (str): Base path to the converted shot boundaries files.
    """
    for video_num in range(100, 200):  # from 00100 to 00199
        video_id = f"{video_num:05d}"
        converted_shot_boundaries_file = os.path.join(converted_shot_boundaries_path, f"{video_id}_shots.txt")
        
        if os.path.exists(converted_shot_boundaries_file):
            print(f"Updating shot boundaries for video: {video_id}")
            update_shot_boundaries(video_id, converted_shot_boundaries_file)
        else:
            print(f"Missing converted shot boundaries file: {converted_shot_boundaries_file}")

if __name__ == "__main__":
    # Change according to your configuration
    converted_shot_boundaries_path = 'shot_boundaries_converted'
    
    update_all_videos(converted_shot_boundaries_path)
