# Video Pre-processing System

This code preprocesses video files by resizing them, adjusting the frame rate, and optionally converting them to grayscale. Additionally, it has the capability to convert videos to MP4 format and assess the quality of video frames. The processed videos are saved in a specified output directory with a specified format.

### Features:
- **Resizing**: Videos are resized to specified dimensions.
- **Frame Rate Adjustment**: Videos are processed to have a target frame rate.
- **Grayscale Conversion**: Optionally convert videos to grayscale.
- **Multi-threading**: Utilizes threading to process multiple videos concurrently.
- **Format Conversion**: Converts videos to MP4 format if needed.
- **Quality Assessment**: Assesses the brightness and contrast of video frames.
- **Adaptive Histogram Equalization**: Optionally applies CLAHE based on video quality.

The processed videos are stored in the specified output directory with the desired format.


In [None]:
import os
import cv2
import numpy as np
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

# Configuration parameters
input_dir = 'V3C1-100/'  # Directory containing input videos
output_dir = 'preprocessed_videos_test/'  # Directory to save preprocessed videos
output_format = 'mp4'  # Desired output video format
resize_width = 640  # Width to resize video frames
resize_height = 480  # Height to resize video frames
convert_to_grayscale = False  # Flag to convert video frames to grayscale
frame_rate = 24  # Target frame rate for output videos
max_workers = 4  # Number of worker threads for parallel processing

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

def assess_quality(frame):
    """
    Assess the brightness and contrast of a video frame.

    Args:
        frame (np.ndarray): Input video frame.

    Returns:
        tuple: Average brightness and contrast of the frame.
    """
    brightness = np.mean(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    contrast = np.std(cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY))
    return brightness, contrast

def convert_to_mp4(input_path, output_path):
    """
    Convert a video to MP4 format.

    Args:
        input_path (str): Path to the input video file.
        output_path (str): Path to save the converted video file.
    """
    try:
        print(f"Converting {input_path} to MP4 format.")
        cap = cv2.VideoCapture(input_path)
        if not cap.isOpened():
            raise ValueError(f"Failed to open video file: {input_path}")

        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps = cap.get(cv2.CAP_PROP_FPS)

        out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            out.write(frame)

        cap.release()
        out.release()
        print(f"Successfully converted {input_path} to {output_path}")
    except Exception as e:
        print(f"Error converting {input_path}: {e}")

def preprocess_video(input_path, output_path, resize_dim, grayscale, frame_rate, clip_limit=2.0, tile_grid_size=(8, 8)):
    """
    Preprocess a video by resizing and changing the frame rate.

    Args:
        input_path (str): Path to the input video file.
        output_path (str): Path to save the preprocessed video file.
        resize_dim (tuple): Dimensions to resize the video frames.
        grayscale (bool): Flag to convert video frames to grayscale.
        frame_rate (int): Target frame rate for the output video.
        clip_limit (float): CLAHE clip limit for adaptive histogram equalization.
        tile_grid_size (tuple): Grid size for CLAHE.
    """
    try:
        print(f"Processing video: {input_path}")
        cap = cv2.VideoCapture(input_path)
        if not cap.isOpened():
            raise ValueError(f"Failed to open video file: {input_path}")

        original_frame_rate = cap.get(cv2.CAP_PROP_FPS)
        if original_frame_rate == 0:
            raise ValueError(f"Failed to get frame rate for video file: {input_path}")

        frame_interval = max(1, int(original_frame_rate // frame_rate))
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_path, fourcc, frame_rate, resize_dim, not grayscale)

        frame_count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break
            if frame_count % frame_interval == 0:
                # Assess video quality
                # brightness, contrast = assess_quality(frame)

                # Adaptive CLAHE parameters based on quality
                # if brightness < 50 or contrast < 30:
                #     clahe_clip_limit = 3.0
                #     clahe_tile_grid_size = (4, 4)
                # else:
                #     clahe_clip_limit = clip_limit
                #     clahe_tile_grid_size = tile_grid_size

                # Resize frame
                frame = cv2.resize(frame, resize_dim, interpolation=cv2.INTER_AREA)

                # Apply adaptive histogram equalization
                # clahe = cv2.createCLAHE(clipLimit=clahe_clip_limit, tileGridSize=clahe_tile_grid_size)
                # if len(frame.shape) == 3 and frame.shape[2] == 3:
                #     for i in range(3):
                #         frame[:, :, i] = clahe.apply(frame[:, :, i])
                # else:
                #     frame = clahe.apply(frame)

                # Convert to grayscale if specified
                if grayscale:
                    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

                # Write frame
                out.write(frame)
            frame_count += 1

        cap.release()
        out.release()
        print(f"Successfully processed video: {input_path}")
    except Exception as e:
        print(f"Error processing {input_path}: {e}")

def get_video_files(input_directory):
    """
    Retrieve all video files from the input directory.

    Args:
        input_directory (str): Path to the directory containing video files.

    Returns:
        list: List of paths to video files.
    """
    video_files = []
    for root, _, files in os.walk(input_directory):
        for file in files:
            if file.endswith(('.mp4', '.avi', '.mov', '.mkv')):
                video_files.append(os.path.join(root, file))
    return video_files

def is_valid_video(file_path):
    """
    Check if a video file is valid and can be opened.

    Args:
        file_path (str): Path to the video file.

    Returns:
        bool: True if the video file is valid, False otherwise.
    """
    try:
        cap = cv2.VideoCapture(file_path)
        if not cap.isOpened():
            return False
        ret, _ = cap.read()
        cap.release()
        return ret
    except:
        return False

def process_videos(video_files, output_directory, resize_dim, grayscale, frame_rate, clip_limit=2.0, tile_grid_size=(8, 8)):
    """
    Process a list of video files by resizing and changing the frame rate.

    Args:
        video_files (list): List of paths to video files to be processed.
        output_directory (str): Directory to save the processed videos.
        resize_dim (tuple): Dimensions to resize the video frames.
        grayscale (bool): Flag to convert video frames to grayscale.
        frame_rate (int): Target frame rate for the output videos.
        clip_limit (float): CLAHE clip limit for adaptive histogram equalization.
        tile_grid_size (tuple): Grid size for CLAHE.
    """
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = []
        with tqdm(total=len(video_files), desc="Processing Videos", unit="video") as pbar:
            for video_file in video_files:
                relative_path = os.path.relpath(video_file, input_dir)
                output_file = os.path.join(output_directory, os.path.splitext(relative_path)[0] + '.' + output_format)
                os.makedirs(os.path.dirname(output_file), exist_ok=True)
                
                # Check if the output file already exists and is a valid video
                if os.path.exists(output_file):
                    if is_valid_video(output_file):
                        print(f"Output file {output_file} already exists and is valid. Skipping processing.")
                        pbar.update()
                        continue
                    else:
                        print(f"Output file {output_file} exists but is invalid. Deleting and reprocessing.")
                        os.remove(output_file)

                # If the video is not in MP4 format, convert it first
                if not video_file.endswith('.mp4'):
                    intermediate_output = os.path.splitext(video_file)[0] + '.mp4'
                    convert_to_mp4(video_file, intermediate_output)
                    video_file = intermediate_output

                future = executor.submit(preprocess_video, video_file, output_file, resize_dim, grayscale, frame_rate, clip_limit, tile_grid_size)
                futures.append(future)

            for future in futures:
                future.add_done_callback(lambda p: pbar.update())
            for future in futures:
                future.result()  # Wait for all threads to complete

# Main script execution
print("Starting video pre-processing...")
video_files = get_video_files(input_dir)
print(f"Found {len(video_files)} video files.")
resize_dim = (resize_width, resize_height)
process_videos(video_files, output_dir, resize_dim, convert_to_grayscale, frame_rate)
print("All videos processed successfully.")


# Video Integrity Checker

This script checks the integrity of video files within a specified root folder and its subfolders. It utilizes `ffmpeg` and OpenCV to perform a thorough analysis, including:

- **FFmpeg-based Error Detection**: Uses `ffmpeg` to decode video files and detect errors during decoding.
- **Frame Analysis with OpenCV**: Extracts frames from videos and analyzes them for pixelation and lag by calculating the PSNR (Peak Signal-to-Noise Ratio) between consecutive frames.
- **Error Logging**: Logs and filters relevant errors found during the `ffmpeg` analysis and frame extraction process.

The script processes videos with extensions `.mp4`, `.avi`, `.mov`, and `.mkv`, and outputs a detailed error log for any video files with detected issues.


In [None]:
import os
import ffmpeg
import cv2
import numpy as np

def check_video_integrity(video_path):
    """
    Check the integrity of a video file using ffmpeg-python and frame analysis.
    
    Args:
        video_path (str): Path to the video file to be checked.
    
    Returns:
        str: Error log if there are errors, otherwise None.
    """
    try:
        # Run ffmpeg to decode the video file and check for errors
        process = (
            ffmpeg
            .input(video_path)
            .output('null', format='null')
            .run_async(pipe_stderr=True, quiet=True)
        )
        _, err = process.communicate()
        error_log = ""
        
        if process.returncode != 0:
            # Filter out common but less relevant ffmpeg errors
            filtered_errors = []
            for line in err.decode('utf-8').split('\n'):
                if "damaged" in line or "Error" in line:
                    filtered_errors.append(line)
            error_log = "\n".join(filtered_errors)
        
        # Extract and analyze frames for pixelation and lag
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        fps = cap.get(cv2.CAP_PROP_FPS)
        interval = int(fps) if fps > 0 else 1  # Extract one frame per second, ensure interval is not zero

        prev_frame = None
        frame_errors = ""
        
        for i in range(0, frame_count, interval):
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                frame_errors += f"Frame {i} could not be read.\n"
                continue

            if prev_frame is not None:
                # Calculate the PSNR between consecutive frames
                psnr = cv2.PSNR(prev_frame, frame)
                if psnr < 30:  # Threshold for detecting significant quality loss
                    frame_errors += f"Significant quality loss detected between frames {i-interval} and {i} (PSNR: {psnr}).\n"
            
            prev_frame = frame
        
        cap.release()

        if frame_errors:
            error_log += "\n" + frame_errors
        
        return error_log if error_log else None
    except Exception as e:
        return str(e)

def find_videos_in_subfolders(root_folder):
    """
    Find all video files in subfolders of the given root folder.

    Args:
        root_folder (str): Path to the root folder to search for video files.
    
    Returns:
        list: List of paths to video files found in the root folder and its subfolders.
    """
    video_files = []
    for root, _, files in os.walk(root_folder):
        for file in files:
            if file.lower().endswith(('.mp4', '.avi', '.mov', '.mkv')):
                video_files.append(os.path.join(root, file))
    return video_files

def main(root_folder):
    """
    Main function to check the integrity of video files in a specified root folder.

    Args:
        root_folder (str): Path to the root folder containing video files.
    """
    if not os.path.exists(root_folder):
        print(f"Root folder '{root_folder}' does not exist.")
        return
    
    video_files = find_videos_in_subfolders(root_folder)
    if not video_files:
        print("No video files found in the specified directory.")
        return
    
    files_with_errors = {}

    for video_file in video_files:
        print(f"Checking {video_file}...")
        error_log = check_video_integrity(video_file)
        if error_log:
            files_with_errors[video_file] = error_log
        else:
            print(f"{video_file} is okay.")

    if files_with_errors:
        print("\nFiles with errors:")
        for file, errors in files_with_errors.items():
            print(f"\n{file}")
            print(errors)
    else:
        print("No errors found in any video files.")


root_folder = 'preprocessed_videos/'
main(root_folder)
