In [1]:
!pip install python-dotenv flask pyngrok

Collecting python-dotenv
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Downloading python_dotenv-1.1.1-py3-none-any.whl (20 kB)
Downloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Installing collected packages: python-dotenv, pyngrok
Successfully installed pyngrok-7.2.11 python-dotenv-1.1.1


In [None]:
import cv2
import torch
from PIL import Image
from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer

class PretrainedModel:
    def __init__(self):
        self.model = None
        self.feature_extractor = None
        self.tokenizer = None
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.max_length = 16
        self.num_beams = 4
        self.load_model()

    def load_model(self):
        try:
            self.model = VisionEncoderDecoderModel.from_pretrained(
                "nlpconnect/vit-gpt2-image-captioning"
            ).to(self.device)
            self.feature_extractor = ViTImageProcessor.from_pretrained(
                "nlpconnect/vit-gpt2-image-captioning"
            )
            self.tokenizer = AutoTokenizer.from_pretrained(
                "nlpconnect/vit-gpt2-image-captioning"
            )
            print("Pretrained image captioning model loaded successfully")
            return True
        except Exception as e:
            print(f"Error loading pretrained model: {str(e)}")
            return False

    def generate_captions(self, video_path: str):
        cap = cv2.VideoCapture(video_path)
        fps = int(cap.get(cv2.CAP_PROP_FPS))
        images = []
        timestamps = []

        frame_count = 0
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            if frame_count % fps == 0:
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                images.append(Image.fromarray(frame))
                timestamps.append(frame_count // fps)

            frame_count += 1
        cap.release()

        if not images:
            return []

        pixel_values = self.feature_extractor(
            images=images,
            return_tensors="pt"
        ).pixel_values.to(self.device)

        output_ids = self.model.generate(
            pixel_values,
            max_length=self.max_length,
            do_sample=True,         # enable sampling
            top_k=50,               # use top-k sampling
            top_p=0.95,             # or nucleus sampling
            num_return_sequences=1  # optional
        )

        captions = self.tokenizer.batch_decode(
            output_ids,
            skip_special_tokens=True
        )

        return [
            {"second": ts, "caption": cap.strip()}
            for ts, cap in zip(timestamps, captions)
        ]

pretrained_model = PretrainedModel()

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/982M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/982M [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/228 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/241 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/120 [00:00<?, ?B/s]

Pretrained image captioning model loaded successfully


In [None]:
import os
import numpy as np
import joblib
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input as resnet_preprocess
from tensorflow.keras.layers import Attention
import cv2

class CustomVideoModel40FrameResNet:
    def __init__(self, model_dir):
        self.model_dir = model_dir
        self.encoder = None
        self.decoder = None
        self.tokenizer = None
        self.feature_extractor = None
        self.num_encoder_tokens = 2048  # ResNet50 feature dimension
        self.latent_dim = 512
        self.num_decoder_tokens = 1500
        self.time_steps_encoder = 40
        self.time_steps_decoder = 10
        self.load_model()

    def load_model(self):
        try:
            # Load models with custom attention layer
            custom_objects = {'Attention': Attention}
            self.encoder = load_model(
                 "/content/encoder_model.h5",
                custom_objects=custom_objects
            )
            self.decoder = load_model(
                "/content/decoder_inference_model.h5",
                custom_objects=custom_objects
            )

            # Load tokenizer
            tokenizer_path = "/content/tokenizer1500"
            with open(tokenizer_path, 'rb') as f:
                self.tokenizer = joblib.load(f)

            # Create ResNet50 feature extractor
            base_model = ResNet50(
                weights="imagenet",
                include_top=False,
                pooling='avg'
            )
            self.feature_extractor = Model(
                inputs=base_model.input,
                outputs=base_model.output
            )

            print("ResNet50 video captioning model loaded successfully")
            return True
        except Exception as e:
            print(f"Error loading custom model: {str(e)}")
            return False

    def extract_features(self, video_path: str):
        cap = cv2.VideoCapture(video_path)
        features = []

        while cap.isOpened() and len(features) < 40:
            ret, frame = cap.read()
            if not ret:
                break

            # Preprocess for ResNet50
            frame = cv2.resize(frame, (224, 224))
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert to RGB
            frame = resnet_preprocess(frame)  # ResNet-specific preprocessing
            features.append(frame)

        cap.release()

        if not features:
            return np.zeros((1, 40, self.num_encoder_tokens))

        video_frames = np.array(features)
        return self.feature_extractor.predict(video_frames, verbose=0)

    def prepare_encoder_input(self, features):
        num_features = features.shape[0]
        if num_features < self.time_steps_encoder:
            padding = np.zeros((self.time_steps_encoder - num_features, self.num_encoder_tokens))
            features = np.vstack([features, padding])
        elif num_features > self.time_steps_encoder:
            features = features[:self.time_steps_encoder]
        return np.expand_dims(features, axis=0)

    def generate_caption(self, video_features):
        # Encoder returns [encoder_output, state_h, state_c]
        encoder_outputs = self.encoder.predict(video_features, verbose=0)
        encoder_output = encoder_outputs[0]  # Full sequence output for attention
        states = encoder_outputs[1:]  # [state_h, state_c]
        h, c = states

        # Initialize target sequence
        target_seq = np.zeros((1, 1, self.num_decoder_tokens))
        start_idx = self.tokenizer.word_index.get('start')
        if start_idx is None:
            return "Error: 'start' token not found in vocabulary"
        target_seq[0, 0, start_idx] = 1

        caption = []
        for _ in range(self.time_steps_decoder):
            # Inputs: [target_seq, encoder_output, state_h, state_c]
            inputs = [target_seq, encoder_output, h, c]
            output = self.decoder.predict(inputs, verbose=0)

            # Output: [output_tokens, state_h, state_c]
            output_tokens = output[0]
            h = output[1]
            c = output[2]

            # Get the predicted token
            sampled_token_index = np.argmax(output_tokens[0, -1, :])
            word = self.tokenizer.index_word.get(sampled_token_index, "<unknown>")
            caption.append(word)

            # Stop if we predict the end token
            if word == 'end':
                break

            # Update the target sequence for next iteration
            target_seq = np.zeros((1, 1, self.num_decoder_tokens))
            target_seq[0, 0, sampled_token_index] = 1

        # Filter special tokens and return caption
        return " ".join([w for w in caption if w not in ['start', 'end', '<unknown>']])

custom_model = CustomVideoModel40FrameResNet("model")



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
ResNet50 video captioning model loaded successfully


In [None]:
from flask import Flask, request, jsonify, render_template_string, url_for
from pyngrok import ngrok, conf
import threading
import os

# Create the Flask app
app = Flask(__name__)


In [None]:
html_page = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Video Caption Generator | AI-Powered Tool</title>
    <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
    <link href="https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&display=swap" rel="stylesheet">
    <style>
        :root {
            --primary: #4361ee;
            --primary-dark: #3a56d4;
            --secondary: #4cc9f0;
            --light: #f8f9fa;
            --dark: #212529;
            --success: #4CAF50;
            --gray: #6c757d;
            --light-gray: #e9ecef;
            --border-radius: 12px;
            --box-shadow: 0 8px 30px rgba(0, 0, 0, 0.08);
            --transition: all 0.3s ease;
        }

        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }

        body {
            font-family: 'Poppins', sans-serif;
            background: linear-gradient(135deg, #f5f7fa 0%, #e4e7f1 100%);
            min-height: 100vh;
            padding: 40px 20px;
            color: var(--dark);
            line-height: 1.6;
        }

        .container {
            max-width: 900px;
            margin: 0 auto;
            background: white;
            border-radius: var(--border-radius);
            box-shadow: var(--box-shadow);
            overflow: hidden;
        }

        header {
            background: linear-gradient(120deg, var(--primary), var(--secondary));
            color: white;
            padding: 30px;
            text-align: center;
            position: relative;
        }

        header::before {
            content: "";
            position: absolute;
            top: 0;
            left: 0;
            right: 0;
            height: 4px;
            background: linear-gradient(90deg, #ff9a9e 0%, #fad0c4 100%);
        }

        h1 {
            font-size: 2.5rem;
            font-weight: 700;
            margin-bottom: 10px;
            display: flex;
            align-items: center;
            justify-content: center;
            gap: 15px;
        }

        .subtitle {
            font-weight: 300;
            font-size: 1.1rem;
            max-width: 600px;
            margin: 0 auto;
            opacity: 0.9;
        }

        .content {
            padding: 30px;
        }

        .card {
            background: white;
            border-radius: var(--border-radius);
            box-shadow: 0 4px 15px rgba(0, 0, 0, 0.05);
            padding: 25px;
            margin-bottom: 25px;
        }

        .card-title {
            font-size: 1.3rem;
            font-weight: 600;
            margin-bottom: 20px;
            color: var(--primary);
            display: flex;
            align-items: center;
            gap: 10px;
        }

        .model-selector {
            display: flex;
            flex-wrap: wrap;
            gap: 20px;
            margin-bottom: 15px;
        }

        .model-option {
            flex: 1;
            min-width: 250px;
            border: 2px solid var(--light-gray);
            border-radius: var(--border-radius);
            padding: 20px;
            cursor: pointer;
            transition: var(--transition);
        }

        .model-option:hover {
            border-color: var(--primary);
            transform: translateY(-3px);
            box-shadow: 0 5px 15px rgba(67, 97, 238, 0.1);
        }

        .model-option.selected {
            border-color: var(--primary);
            background-color: rgba(67, 97, 238, 0.03);
        }

        .model-option input {
            display: none;
        }

        .model-name {
            font-weight: 600;
            margin-bottom: 8px;
            display: flex;
            align-items: center;
            gap: 10px;
        }

        .model-description {
            font-size: 0.9rem;
            color: var(--gray);
        }

        .upload-container {
            border: 2px dashed var(--light-gray);
            border-radius: var(--border-radius);
            padding: 40px 20px;
            text-align: center;
            transition: var(--transition);
            margin: 20px 0;
            background-color: #fafbff;
        }

        .upload-container.active {
            border-color: var(--primary);
            background-color: rgba(67, 97, 238, 0.03);
        }

        .upload-icon {
            font-size: 3.5rem;
            color: var(--primary);
            margin-bottom: 15px;
        }

        .upload-text {
            margin-bottom: 20px;
        }

        .file-input {
            display: none;
        }

        .btn {
            background: linear-gradient(120deg, var(--primary), var(--primary-dark));
            color: white;
            border: none;
            padding: 12px 28px;
            cursor: pointer;
            border-radius: 50px;
            font-weight: 500;
            font-size: 1rem;
            transition: var(--transition);
            display: inline-flex;
            align-items: center;
            gap: 8px;
            box-shadow: 0 4px 10px rgba(67, 97, 238, 0.3);
        }

        .btn:hover {
            transform: translateY(-2px);
            box-shadow: 0 6px 15px rgba(67, 97, 238, 0.4);
        }

        .btn:active {
            transform: translateY(0);
        }

        .btn-outline {
            background: transparent;
            border: 2px solid var(--primary);
            color: var(--primary);
            box-shadow: none;
        }

        .btn-outline:hover {
            background: rgba(67, 97, 238, 0.05);
        }

        #video-preview {
            width: 100%;
            max-height: 400px;
            border-radius: var(--border-radius);
            margin-top: 25px;
            display: none;
            background: black;
        }

        #result-container {
            margin-top: 30px;
        }

        .result-box {
            padding: 25px;
            background: var(--light);
            border-radius: var(--border-radius);
            margin-bottom: 20px;
            border-left: 4px solid var(--success);
            position: relative;
            overflow: hidden;
        }

        .result-box::before {
            content: "";
            position: absolute;
            top: 0;
            left: 0;
            width: 100%;
            height: 4px;
            background: linear-gradient(90deg, var(--success), #8bc34a);
        }

        .result-title {
            font-size: 1.2rem;
            font-weight: 600;
            margin-bottom: 15px;
            color: var(--success);
        }

        .caption-text {
            font-size: 1.1rem;
            font-style: italic;
            background: white;
            padding: 15px;
            border-radius: 8px;
            margin: 10px 0;
            border-left: 3px solid var(--primary);
        }

        .timestamp {
            font-weight: 600;
            color: var(--primary);
            margin-right: 8px;
        }

        .caption-item {
            padding: 12px;
            background: white;
            border-radius: 8px;
            margin: 10px 0;
            border-left: 3px solid var(--secondary);
        }

        .video-info {
            margin-top: 15px;
            font-size: 0.9rem;
            color: var(--gray);
            display: flex;
            align-items: center;
            gap: 8px;
        }

        .loading {
            display: flex;
            flex-direction: column;
            align-items: center;
            gap: 20px;
            padding: 30px;
        }

        .spinner {
            width: 50px;
            height: 50px;
            border: 5px solid rgba(67, 97, 238, 0.2);
            border-top: 5px solid var(--primary);
            border-radius: 50%;
            animation: spin 1s linear infinite;
        }

        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }

        .loading-text {
            font-size: 1.1rem;
            font-weight: 500;
            color: var(--primary);
        }

        .error-box {
            background: #ffebee;
            border-left: 4px solid #f44336;
        }

        .error-box .result-title {
            color: #f44336;
        }

        .features {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
            gap: 20px;
            margin-top: 20px;
        }

        .feature {
            display: flex;
            align-items: flex-start;
            gap: 12px;
        }

        .feature i {
            color: var(--primary);
            font-size: 1.2rem;
            margin-top: 3px;
        }

        footer {
            text-align: center;
            padding: 20px;
            color: var(--gray);
            font-size: 0.9rem;
            border-top: 1px solid var(--light-gray);
        }

        @media (max-width: 768px) {
            .model-selector {
                flex-direction: column;
            }

            h1 {
                font-size: 2rem;
            }

            .content {
                padding: 20px;
            }
        }
    </style>
</head>
<body>
    <div class="container">
        <header>
            <h1>
                <i class="fas fa-closed-captioning"></i>
                AI Video Caption Generator
            </h1>
            <p class="subtitle">Upload any video and get accurate captions generated by our advanced AI models</p>
        </header>

        <div class="content">
            <div class="card">
                <h2 class="card-title"><i class="fas fa-brain"></i> Select AI Model</h2>

                <div class="model-selector">
                    <div class="model-option selected" onclick="selectModel(this, 'custom')">
                        <div class="model-name">
                            <i class="fas fa-cogs"></i> Custom Video Model
                        </div>
                        <div class="model-description">
                            Generates a comprehensive caption describing the entire video content.
                            Best for summarizing videos under 5 minutes.
                        </div>
                    </div>

                    <div class="model-option" onclick="selectModel(this, 'pretrained')">
                        <div class="model-name">
                            <i class="fas fa-layer-group"></i> Frame Analysis Model
                        </div>
                        <div class="model-description">
                            Analyzes key frames and generates captions for each segment.
                            Ideal for detailed scene-by-scene understanding.
                        </div>
                    </div>
                </div>
            </div>

            <div class="card">
                <h2 class="card-title"><i class="fas fa-upload"></i> Upload Video</h2>

                <div class="upload-container" id="upload-area">
                    <div class="upload-icon">
                        <i class="fas fa-cloud-upload-alt"></i>
                    </div>
                    <p class="upload-text">Drag & drop your video file here or click to browse</p>
                    <input type="file" id="video-upload" class="file-input" accept="video/*">
                    <button class="btn" onclick="document.getElementById('video-upload').click()">
                        <i class="fas fa-folder-open"></i> Choose Video File
                    </button>
                    <p style="margin-top: 15px; color: var(--gray); font-size: 0.9rem;">
                        Supported formats: MP4, MOV, AVI, WEBM (Max 500MB)
                    </p>
                </div>

                <div style="text-align: center; margin-top: 20px;">
                    <button class="btn" onclick="uploadVideo()">
                        <i class="fas fa-magic"></i> Generate Captions
                    </button>
                </div>

                <video id="video-preview" controls></video>
            </div>

            <div id="result-container"></div>

            <div class="features">
                <div class="feature">
                    <i class="fas fa-bolt"></i>
                    <div>Fast processing with GPU acceleration</div>
                </div>
                <div class="feature">
                    <i class="fas fa-shield-alt"></i>
                    <div>Secure processing - files deleted after analysis</div>
                </div>
                <div class="feature">
                    <i class="fas fa-language"></i>
                    <div>Supports multiple languages</div>
                </div>
                <div class="feature">
                    <i class="fas fa-highlighter"></i>
                    <div>Accurate contextual understanding</div>
                </div>
            </div>
        </div>

        <footer>
            <p>AI Video Caption Generator &copy; 2023 | Powered by Advanced Deep Learning Models</p>
        </footer>
    </div>

    <script>
        const videoUpload = document.getElementById('video-upload');
        const videoPreview = document.getElementById('video-preview');
        const resultContainer = document.getElementById('result-container');
        const uploadArea = document.getElementById('upload-area');

        // Set up drag and drop
        uploadArea.addEventListener('dragover', (e) => {
            e.preventDefault();
            uploadArea.classList.add('active');
        });

        uploadArea.addEventListener('dragleave', () => {
            uploadArea.classList.remove('active');
        });

        uploadArea.addEventListener('drop', (e) => {
            e.preventDefault();
            uploadArea.classList.remove('active');

            if (e.dataTransfer.files.length) {
                videoUpload.files = e.dataTransfer.files;
                handleFileSelect();
            }
        });

        // Preview selected video
        videoUpload.addEventListener('change', handleFileSelect);

        function handleFileSelect() {
            const file = videoUpload.files[0];
            if (file) {
                if (file.size > 500 * 1024 * 1024) {
                    alert('File size exceeds 500MB limit. Please choose a smaller video.');
                    return;
                }

                const url = URL.createObjectURL(file);
                videoPreview.src = url;
                videoPreview.style.display = 'block';
                resultContainer.innerHTML = '';
            }
        }

        // Model selection
        function selectModel(element, modelValue) {
            document.querySelectorAll('.model-option').forEach(opt => {
                opt.classList.remove('selected');
            });
            element.classList.add('selected');
        }

        // Upload and process video
        async function uploadVideo() {
            const file = videoUpload.files[0];
            if (!file) {
                alert('Please select a video file first!');
                return;
            }

            // Get selected model
            const modelType = document.querySelector('.model-option.selected').getAttribute('onclick').includes('custom') ? 'custom' : 'pretrained';

            resultContainer.innerHTML = `
                <div class="loading">
                    <div class="spinner"></div>
                    <div class="loading-text">Processing your video with AI...</div>
                    <p>This may take a moment depending on video length</p>
                </div>
            `;

            const formData = new FormData();
            formData.append('file', file);
            formData.append('model_type', modelType);

            try {
                // Calculate endpoint based on current page's URL
                let basePath = window.location.pathname;
                if (!basePath.endsWith('/')) {
                    basePath = basePath.substring(0, basePath.lastIndexOf('/') + 1);
                }
                const endpoint = window.location.origin + basePath + 'caption';

                const response = await fetch(endpoint, {
                    method: 'POST',
                    body: formData
                });

                const result = await response.json();
                console.log(result);

                if (result.error) {
                    resultContainer.innerHTML = `
                        <div class="result-box error-box">
                            <h3 class="result-title"><i class="fas fa-exclamation-circle"></i> Processing Error</h3>
                            <div class="caption-text">${result.error}</div>
                            <p>Please try again with a different video or model.</p>
                        </div>
                    `;
                    return;
                }

                if (result.model === "custom") {
                    resultContainer.innerHTML = `
                        <div class="result-box">
                            <h3 class="result-title"><i class="fas fa-check-circle"></i> Generated Caption</h3>
                            <div class="caption-text">"${result.caption}"</div>
                            <div class="video-info">
                                <i class="fas fa-video"></i>
                                Video: ${result.filename} | Model: Custom Video Captioning
                            </div>
                        </div>
                    `;
                } else if (result.model === "pretrained") {
                    let captionsHtml = '<div class="result-box">';
                    captionsHtml += '<h3 class="result-title"><i class="fas fa-list-alt"></i> Captions by Time Segment</h3>';

                    result.captions.forEach(item => {
                        captionsHtml += `
                            <div class="caption-item">
                                <span class="timestamp">${item.second}s:</span> ${item.caption}
                            </div>
                        `;
                    });

                    captionsHtml += `<div class="video-info">
                        <i class="fas fa-video"></i>
                        Video: ${result.filename} | Model: Frame Analysis Model
                    </div>`;
                    captionsHtml += '</div>';

                    resultContainer.innerHTML = captionsHtml;
                }
            } catch (error) {
                resultContainer.innerHTML = `
                    <div class="result-box error-box">
                        <h3 class="result-title"><i class="fas fa-exclamation-triangle"></i> Network Error</h3>
                        <div class="caption-text">${error.message}</div>
                        <p>Please check your connection and try again.</p>
                    </div>
                `;
            }
        }
    </script>
</body>
</html>
"""

@app.route("/")
def index():
    return render_template_string(html_page)

In [None]:
import uuid

@app.route("/caption", methods=["POST"])
def caption_video():
    if 'file' not in request.files:
        return jsonify({"error": "No video file uploaded"}), 400

    uploaded_file = request.files['file']
    model_type = request.form.get('model_type', 'custom')

    # Create unique filename
    file_ext = os.path.splitext(uploaded_file.filename)[1]
    temp_filename = f"{uuid.uuid4()}{file_ext}"
    temp_path = os.path.join("temp_videos", temp_filename)

    # Ensure directory exists
    os.makedirs(os.path.dirname(temp_path), exist_ok=True)
    uploaded_file.save(temp_path)

    try:
        if model_type == "custom":
            # Process with custom model
            features = custom_model.extract_features(temp_path)
            encoder_input = custom_model.prepare_encoder_input(features)
            caption = custom_model.generate_caption(encoder_input)

            # Return in expected format
            return jsonify({
                "model": "custom",
                "caption": caption,
                "filename": uploaded_file.filename
            })

        elif model_type == "pretrained":
            # Process with pretrained model
            captions = pretrained_model.generate_captions(temp_path)

            # Return in expected format
            return jsonify({
                "model": "pretrained",
                "captions": captions,
                "filename": uploaded_file.filename
            })

        else:
            return jsonify({"error": "Invalid model type specified"}), 400

    except Exception as e:
        return jsonify({
            "error": f"Processing error: {str(e)}"
        }), 500

    finally:
        # Clean up temporary file
        if os.path.exists(temp_path):
            os.remove(temp_path)

In [None]:
# Start Flask in a background thread
def run_app():
    app.run(port=5000)

import dotenv
dotenv.load_dotenv()
authtoken = os.getenv("NGROK_AUTH")
conf.get_default().auth_token = authtoken

# Open ngrok tunnel
public_url = ngrok.connect(5000)
print(f"🌍 Public URL: {public_url}")

# Start the Flask server
thread = threading.Thread(target=run_app)
thread.start()

🌍 Public URL: NgrokTunnel: "https://bdf4-34-169-239-168.ngrok-free.app" -> "http://localhost:5000"
