In [7]:
# Vivi 2.0  – chat + smart video clipping  
# ----------------------------------------------------------  
# Imports  
# ---------------------------------------------------------- 

import os
import subprocess
import threading
import tkinter as tk
from tkinter import filedialog, scrolledtext, ttk
import whisper
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import OllamaLLM
import tempfile
import uuid
import time

In [5]:
def _parse_srt_time(t):
    try:
        h, m, s_ms = t.split(":")
        if "," in s_ms:
            s, ms = s_ms.split(",")
            return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000
        else:
            return int(h) * 3600 + int(m) * 60 + float(s_ms)
    except:
        pass

    try:
        m, s = t.split(":")
        return int(m) * 60 + float(s)
    except:
        pass

    try:
        return float(t)
    except:
        raise ValueError(f"Unrecognized timestamp format: {t}")

def format_time(seconds):
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = seconds % 60
    return f"{h:02}:{m:02}:{s:06.3f}".replace(".", ",")

# ------------------ FFmpeg Functions ------------------

def concatenate_videos(video_paths, output_filepath):
    try:
        if not video_paths:
            return "❌ Error: No video clips provided.", None

        ffmpeg_path = "C:\\ffmpeg\\ffmpeg.exe"  

        # Create unique list file in temp dir
        temp_dir = tempfile.gettempdir()
        list_file_path = os.path.join(temp_dir, f"concat_list_{uuid.uuid4().hex[:8]}.txt")

        # Write the list of files to concatenate
        with open(list_file_path, "w", encoding="utf-8") as f:
            for path in video_paths:
                f.write(f"file '{path.replace('\\', '/')}'\n")

        # Run ffmpeg concat
        command = [
            ffmpeg_path, "-y",
            "-f", "concat", "-safe", "0",
            "-i", list_file_path,
            "-c", "copy", output_filepath
        ]
        result = subprocess.run(command, capture_output=True, text=True)

        # Cleanup list file
        os.remove(list_file_path)

        if result.returncode != 0:
            return f"❌ FFmpeg error:\n{result.stderr}", None

        return f"✅ Concatenated video saved to: {output_filepath}", output_filepath

    except Exception as e:
        return f"❌ Exception during concatenation: {str(e)}", None


#def concatenate_videos(video_paths, output_path):
 #   ffmpeg_path = 'C:\\ffmpeg'
  #  try:
   #     if not video_paths:
    #        return "Error: No video clips provided.", None

     #   list_file = "concat_list.txt"
      #  with open(list_file, "w") as f:
       #     for path in video_paths:
        #        f.write(f"file '{path}'\n")

#        command = [
 #           ffmpeg_path, "-y", "-f", "concat", "-safe", "0",
  #          "-i", list_file, "-c", "copy", output_path
   #     ]

    #    result = subprocess.run(command, capture_output=True, text=True)
     #   os.remove(list_file)
      #  if result.returncode != 0:
       #     return f"Error concatenating: {result.stderr}", None
        #return f"✅ Concatenated video saved to {output_path}", output_path

#    except Exception as e:
 #       return f"Error concatenating videos: {str(e)}", None

# ------------------ LLM Setup ------------------
llm = OllamaLLM(model='gemma3:1B')

general_template = """
You are Vivi, an expert and friendly video assistant chatbot.
You are having an ongoing conversation with the user. You have access to a full transcript of a video. If the user’s question is about the video, answer helpfully and refer to timestamps if relevant.
If the question is general and not related to the video, just respond helpfully like a normal assistant.
---
Conversation History:
{context}
---
Full Transcript of the Video:
{transcript}
---
User:
{question}
---
Vivi:
"""

clipping_template = """
You are an expert video analysis assistant. Given a user query and the transcript of a video with timestamps, identify all timestamp ranges where the video content is relevant to the query. Return the result as a list of timestamp ranges (start and end times in seconds) and a brief explanation of why each range is relevant.

Return timestamps as plain numbers with two decimal places (e.g., 31.00, 45.00) without brackets or other characters. Ensure the format is consistent. Ensure end_time is greater than start_time and both are non-negative.

Query: {query}
Transcript: {transcript}

Return the result in the following format and each range and its explanation must be on separate lines:
- Range: start_time - end_time
- Range: start_time - end_time
  Relevance: [Brief explanation of why this range is relevant to the query]
"""

prompt_general = ChatPromptTemplate.from_template(general_template)
prompt_clipping = ChatPromptTemplate.from_template(clipping_template)
chain_general = prompt_general | llm
chain_clipping = prompt_clipping | llm

# ------------------ Vivi GUI Class ------------------
class ViviChatbot:
    def __init__(self):
        self.video_path = ""
        self.context = ""
        self.transcript_segments = []
        self.full_transcript_text = ""
        self.cap = None
        self.playing = False
        self.current_frame = 0
        self.seek_scale = None
        self.audio_process = None

        self.root = tk.Tk()
        self.root.title("Vivi Video Chatbot")

        self.chat_display = scrolledtext.ScrolledText(self.root, wrap=tk.WORD, width=80, height=30, font=("Arial", 12))
        self.chat_display.pack(padx=10, pady=10)

        self.user_entry = tk.Entry(self.root, font=("Arial", 12))
        self.user_entry.pack(fill=tk.X, padx=10, pady=(0, 10))
        self.user_entry.bind("<Return>", lambda e: self.send_message())

        self.send_btn = tk.Button(self.root, text="Send", font=("Arial", 12), command=self.send_message)
        self.send_btn.pack(pady=(0, 10))

        btn_frame = tk.Frame(self.root)
        btn_frame.pack()

        self.browse_btn = tk.Button(btn_frame, text="📂 Upload Video", command=self.browse_video)
        self.browse_btn.pack(side=tk.LEFT, padx=5)

        self.preview_btn = tk.Button(btn_frame, text="▶️ Preview Video", command=self.play_video)
        self.preview_btn.pack(side=tk.LEFT, padx=5)

        self.progress_var = tk.DoubleVar()
        self.progress_bar = ttk.Progressbar(self.root, orient="horizontal", mode="determinate", variable=self.progress_var)
        self.progress_bar.pack(fill=tk.X, padx=10, pady=5)

        self.chat_display.insert(tk.END, " Welcome to the Vivi Video Chatbot! Type 'exit' to quit.\n For getting your video clipped, enter your query in the format:\n Video clipping:<query>")
        self.chat_display.yview(tk.END)

    
    
    def format_time(self, seconds):
        h = int(seconds // 3600)
        m = int((seconds % 3600) // 60)
        s = seconds % 60
        return f"{h:02}:{m:02}:{s:06.3f}".replace(".", ",")

    def clip_video(self, start_time, end_time):
        try:
            # Validate time range
            if start_time >= end_time:
                self.chat_display.insert(tk.END, f"\n⚠ Invalid clip range: start ({start_time}) >= end ({end_time})\n")
                return " Invalid time range", None
            
            # Use a unique filename to avoid collisions
            clip_filename = f"clip_{uuid.uuid4().hex[:8]}.mp4"
            output_path = os.path.join(tempfile.gettempdir(), clip_filename)

            if os.path.exists(output_path):
                os.remove(output_path)

            subprocess.run([
                "ffmpeg", "-y",
                "-ss", str(start_time),
                "-to", str(end_time),
                "-i", self.video_path,
                "-c", "copy",
                output_path
            ], check=True)

            self.chat_display.yview(tk.END)
            return f"🎬 Video clip saved to {output_path}", output_path

        except subprocess.CalledProcessError as e:
            self.chat_display.insert(tk.END, f"\n❌ FFmpeg failed: {e}\n")
        except PermissionError as e:
            self.chat_display.insert(tk.END, f"\n❌ Permission denied: {e}\n")
        except Exception as e:
            self.chat_display.insert(tk.END, f"\n❌ Error clipping video: {e}\n")
    
    def transcribe_video(self, video_path):
        base = os.path.splitext(os.path.basename(video_path))[0]
        dir_ = os.path.dirname(video_path)
        txt_path = os.path.join(dir_, f"{base}.txt")
        srt_path = os.path.join(dir_, f"{base}.srt")

        if os.path.exists(txt_path) and os.path.exists(srt_path):
            self.chat_display.insert(tk.END, "✅ Transcript and subtitles found.\n")
            with open(srt_path, "r", encoding="utf-8") as f:
                blocks = f.read().strip().split("\n\n")
            with open(txt_path, "r", encoding="utf-8") as f:
                timed_transcript=f.read().strip()
            segments = []
            for block in blocks:
                lines = block.split("\n")
                if len(lines) >= 3:
                    times = lines[1].split(" --> ")
                    start = _parse_srt_time(times[0])
                    end = _parse_srt_time(times[1])
                    text = " ".join(lines[2:])
                    segments.append({"start": start, "end": end, "text": text})
            return timed_transcript, segments

        self.chat_display.insert(tk.END, "🔍 Running Whisper transcription...\n")
        model = whisper.load_model("medium")
        result = model.transcribe(video_path, verbose=True)
        segments = []
        timed_transcript=""
        for seg in result['segments']:
            segments.append({"start": seg['start'], "end": seg['end'], "text": seg['text']})
            timed_transcript+=f"[{segment['start']:.2f} - {segment['end']:.2f}] {segment['text']}\n"

        with open(srt_path, "w", encoding="utf-8") as f:
            for i, seg in enumerate(segments):
                f.write(f"{i+1}\n{format_time(seg['start'])} --> {format_time(seg['end'])}\n{seg['text']}\n\n")

        with open(txt_path, "w", encoding="utf-8") as f:
            f.write(timed_transcript)

        return timed_transcript, segments

    def send_message(self):
        user_input = self.user_entry.get()
        if user_input.strip().lower() == "exit":
            self.root.destroy()
            return

        self.chat_display.insert(tk.END, f"You: {user_input}\n")
        self.user_entry.delete(0, tk.END)
        self.user_entry.config(state="disabled")
        self.send_btn.config(state="disabled")

        def run_bot():
            if user_input.lower().startswith("video clipping:"):
                query = user_input[len("video clipping:"):].strip()
                response = chain_clipping.invoke({"query": query, "transcript": self.transcript_segments})
                self.chat_display.insert(tk.END, f"\nVivi:")
                video_clips = []
                for line in response.strip().split("\n"):
                    if line.startswith("- Range: "):
                        parts = line[len("- Range: "):].split(" - ")
                        if len(parts) == 2:
                            try:
                                start = float(parts[0])
                                end = float(parts[1])
                            except ValueError:
                                try:
                                    start = _parse_srt_time(parts[0])
                                    end = _parse_srt_time(parts[1])
                                except Exception as e:
                                    self.chat_display.insert(tk.END, f"\n⚠️ Failed to parse timestamps: {parts}")
                                    continue
                            msg, clip_path = self.clip_video(start, end)
                            self.chat_display.insert(tk.END, f"\n{msg}")
                            if clip_path:
                                video_clips.append(clip_path)
                if video_clips:
                    msg, out = concatenate_videos(video_clips, output_filepath=os.path.join(os.path.dirname(self.video_path), "final_output.mp4"))
                    self.chat_display.insert(tk.END, f"\n{msg}\n")
                
                    if out:
                        for clip in video_clips:
                            try:
                                os.remove(clip)
                            except Exception as e:
                                self.chat_display.insert(tk.END, f"⚠️ Failed to delete {clip}: {e}\n")
                        self.chat_display.insert(tk.END, f"🧹 Deleted all temporary clips\n")

            else:
                response = chain_general.invoke({
                    "context": self.context,
                    "question": user_input,
                    "transcript": self.transcript_segments
                })
                self.chat_display.insert(tk.END, "Vivi: ")
                for word in response.split():
                    self.chat_display.insert(tk.END, word + " ")
                    self.chat_display.yview(tk.END)
                    time.sleep(0.04)
                self.chat_display.insert(tk.END, "\n\n")
                self.context += f"\nUser: {user_input}\nAI: {response}\n"
            self.user_entry.config(state="normal")
            self.send_btn.config(state="normal")
            self.user_entry.focus()

        threading.Thread(target=run_bot).start()

    def browse_video(self):
        self.video_path = filedialog.askopenfilename(filetypes=[("Video Files", "*.mp4 *.mov *.avi")])
        if not self.video_path:
            return

        self.chat_display.insert(tk.END, f"\n📁 Selected video: {os.path.basename(self.video_path)}\n")

        def process_video():
            self.full_transcript_text, self.transcript_segments = self.transcribe_video(self.video_path)
            self.chat_display.insert(tk.END, "✅ Transcription completed!\n\n")
            self.progress_var.set(0)

        threading.Thread(target=process_video).start()

    def play_video(self):
        # Check if final_output.mp4 exists
        final_output = os.path.join(os.path.dirname(self.video_path), "final_output.mp4")
        if os.path.exists(final_output):
            video_to_play = final_output
            self.chat_display.insert(tk.END, f"\n🎞️ Playing final_output.mp4\n")
        elif self.video_path:
            video_to_play = self.video_path
            self.chat_display.insert(tk.END, f"\n🎞️ Previewing original uploaded video\n")
        else:
            self.chat_display.insert(tk.END, "\n⚠️ No video loaded yet. Upload a video first.\n")
            return

        # Kill previous ffplay process if still running
        if self.audio_process and self.audio_process.poll() is None:
            self.audio_process.terminate()

        try:
            # ffplay handles both video & audio, synced
            self.audio_process = subprocess.Popen([
                "ffplay", "-autoexit", "-loglevel", "quiet", video_to_play
            ])
        except Exception as e:
            self.chat_display.insert(tk.END, f"\n❌ Failed to play video with ffplay: {e}\n")


    def run(self):
        self.root.mainloop()

# Start the chatbot
if __name__ == "__main__":
    app = ViviChatbot()
    app.run()

In [1]:
import os
import subprocess
import threading
import tempfile
import uuid
import time
import whisper
import customtkinter as ctk
import tkinter as tk
from tkinter import filedialog, ttk
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import OllamaLLM
from groq import Groq
from PIL import Image, ImageTk
from customtkinter import CTkImage

In [None]:
# ------------------ Utility Functions ------------------
def _parse_srt_time(t):
    try:
        h, m, s_ms = t.split(":")
        if "," in s_ms:
            s, ms = s_ms.split(",")
            return int(h) * 3600 + int(m) * 60 + int(s) + int(ms) / 1000
        else:
            return int(h) * 3600 + int(m) * 60 + float(s_ms)
    except:
        pass
    try:
        m, s = t.split(":")
        return int(m) * 60 + float(s)
    except:
        pass
    try:
        return float(t)
    except:
        raise ValueError(f"Unrecognized timestamp format: {t}")

def format_time(seconds):
    h = int(seconds // 3600)
    m = int((seconds % 3600) // 60)
    s = seconds % 60
    return f"{h:02}:{m:02}:{s:06.3f}".replace(".", ",")

# ------------------ FFmpeg Functions ------------------
def concatenate_videos(video_paths, output_filepath):
    try:
        if not video_paths:
            return "❌ Error: No video clips provided.", None
        ffmpeg_path = "C:\\ffmpeg\\ffmpeg.exe"
        temp_dir = tempfile.gettempdir()
        list_file_path = os.path.join(temp_dir, f"concat_list_{uuid.uuid4().hex[:8]}.txt")
        with open(list_file_path, "w", encoding="utf-8") as f:
            for path in video_paths:
                f.write(f"file '{path.replace('\\', '/')}\n")
        command = [
            ffmpeg_path, "-y", "-f", "concat", "-safe", "0",
            "-i", list_file_path, "-c", "copy", output_filepath
        ]
        result = subprocess.run(command, capture_output=True, text=True)
        os.remove(list_file_path)
        if result.returncode != 0:
            return f"❌ FFmpeg error:\n{result.stderr}", None
        return f"✅ Concatenated video saved to: {output_filepath}", output_filepath
    except Exception as e:
        return f"❌ Exception during concatenation: {str(e)}", None

# ------------------ LLM Setup ------------------
llm = OllamaLLM(model='gemma3:1B')

general_template = """
You are Vivi, an expert and friendly video assistant chatbot.
You are having an ongoing conversation with the user. You have access to a full transcript of a video. If the user’s question is about the video, answer helpfully and refer to timestamps if relevant.
If the question is general and not related to the video, just respond helpfully like a normal assistant.
---
Conversation History:
{context}
---
Full Transcript of the Video:
{transcript}
---
User:
{question}
---
Vivi:
"""

clipping_template = """
You are an expert video analysis assistant. Given a user query and the transcript of a video with timestamps, identify all timestamp ranges where the video content is relevant to the query. Return the result as a list of timestamp ranges (start and end times in seconds) and a brief explanation of why each range is relevant.

Return timestamps as plain numbers with two decimal places (e.g., 31.00, 45.00) without brackets or other characters. Ensure the format is consistent. Ensure end_time is greater than start_time and both are non-negative.

Query: {query}
Transcript: {transcript}

Return the result in the following format and each range and its explanation must be on separate lines:
- Range: start_time - end_time
- Range: start_time - end_time
  Relevance: [Brief explanation of why this range is relevant to the query]
"""

prompt_general = ChatPromptTemplate.from_template(general_template)
prompt_clipping = ChatPromptTemplate.from_template(clipping_template)
chain_general = prompt_general | llm
chain_clipping = prompt_clipping | llm

# ------------------ Vivi GUI Class ------------------
class ViviChatbot:
    def __init__(self):
        # Use light appearance mode
        ctk.set_appearance_mode("light")
        ctk.set_default_color_theme("blue")  # or "light-blue" if available

        # Create root and set background
        self.root = ctk.CTk()
        self.root.configure(fg_color="#FFFFFF")  # white background
        self.root.title("ClipQuery- Video Chatbot")
        self.root.geometry("900x650")

        self.video_path = ""
        self.context = ""
        self.transcript_segments = []
        self.full_transcript_text = ""
        self.audio_process = None

        # Chat display frame
        self.chat_frame = ctk.CTkScrollableFrame(self.root, width=850, height=450, fg_color="#FFFFFF")
        self.chat_frame.pack(padx=10, pady=(20, 10), fill="both", expand=True)
        self.scrollable_frame = self.chat_frame

        # Entry field frame
        self.entry_frame = ctk.CTkFrame(self.root, fg_color="#FFFFFF")
        self.entry_frame.pack(pady=(0, 10), fill="x", padx=20)

        self.user_entry = ctk.CTkEntry(self.entry_frame, placeholder_text="Ask Vivi...", width=600)
        self.user_entry.pack(side="left", fill="x", expand=True, padx=(0, 10))
        self.user_entry.bind("<Return>", lambda e: self.send_message())

        self.send_btn = ctk.CTkButton(self.entry_frame, text="Send", command=self.send_message, width=80, fg_color="#1a2238")
        self.send_btn.pack(side="right")

        # Buttons for upload, input video, and final video
        self.btn_frame = ctk.CTkFrame(self.root, fg_color="#FFFFFF")
        self.btn_frame.pack(pady=(0, 10))

        self.upload_btn = ctk.CTkButton(self.btn_frame, text="📂 Upload Video", command=self.browse_video, fg_color="#1a2238", width=160, height=40, font=("Arial", 16))
        self.upload_btn.pack(side="left", padx=5)

        self.input_video_btn = ctk.CTkButton(self.btn_frame, text="▶️ Input Video", command=self.play_input_video, fg_color="#1a2238", width=160, height=40, font=("Arial", 16))
        self.input_video_btn.pack(side="left", padx=5)

        self.final_video_btn = ctk.CTkButton(self.btn_frame, text="🎬 Final Video", command=self.play_final_video, fg_color="#1a2238", width=160, height=40, font=("Arial", 16))
        self.final_video_btn.pack(side="left", padx=5)

        # Progress bar
        self.progress_var = tk.DoubleVar()
        self.progress_bar = ttk.Progressbar(self.root, orient="horizontal", mode="determinate", variable=self.progress_var)
        self.progress_bar.pack(fill="x", padx=20, pady=(0, 10))

        # Initial welcome message
        self.display_message("system", "🎬 Welcome to Vivi!\nType 'exit' to quit.\nUse format `Video clipping:<your query>` for semantic editing.")

    def typewriter_effect(self, sender, message):
        # Outer frame to control side alignment
        outer_frame = ctk.CTkFrame(self.scrollable_frame, fg_color="#FFFFFF")
        outer_frame.pack(fill="x", padx=10, pady=5)

        # Inner chat bubble with side-specific color
        bubble_frame = ctk.CTkFrame(
            outer_frame,
            fg_color="#3a0e2e" if sender.lower() == "vivi" else "#1B263B",
            corner_radius=12
        )
        if sender.lower() == "vivi":
            bubble_frame.pack(anchor="w", padx=5)
        else:
            bubble_frame.pack(anchor="e", padx=5)

        # Name label (optional, can be hidden if not needed)
        name_label = ctk.CTkLabel(
            bubble_frame,
            text=sender + ":",
            font=("Arial", 12, "bold"),
            text_color="#F0F0F0"
        )
        name_label.pack(anchor="w", padx=8, pady=(6, 0))

        # Message label for typewriter text
        message_label = ctk.CTkLabel(
            bubble_frame,
            text="",
            font=("Arial", 18, "normal"),
            wraplength=500,
            justify="left",
            text_color="#F0F0F0"
        )
        message_label.pack(anchor="w", padx=8, pady=(0, 8))

        # Scroll to bottom before typing
        self.root.update_idletasks()
        self.chat_frame._parent_canvas.yview_moveto(1.0)

        # Typewriter animation
        current_text = ""
        for word in message.split():
            current_text += word + " "
            message_label.configure(text=current_text)
            self.root.update_idletasks()
            self.chat_frame._parent_canvas.yview_moveto(1.0)
            time.sleep(0.04)
    
    def display_message(self, sender, message):
        outer_frame = ctk.CTkFrame(self.chat_frame, fg_color="#FFFFFF")
        outer_frame.pack(fill="x", padx=10, pady=5)

        bubble = ctk.CTkFrame(
            outer_frame,
            corner_radius=15,
            fg_color="#3a0e2e" if sender.lower() != "user" else "#1a2238"
        )

        if sender.lower() == "user":
            bubble.pack(anchor="e", padx=5)
        else:
            bubble.pack(anchor="w", padx=5)

        label = ctk.CTkLabel(
            bubble,
            text=f"{message}",
            wraplength=700,
            font=("Arial", 18, "normal"),
            justify="left",
            text_color="#F0F0F0"
        )
        label.pack(anchor="w", padx=10, pady=5)


    def clip_video(self, start_time, end_time):
        try:
            if start_time >= end_time:
                self.display_message("system", f"⚠ Invalid clip range: {start_time} >= {end_time}")
                return "Invalid time range", None
            clip_filename = f"clip_{uuid.uuid4().hex[:8]}.mp4"
            output_path = os.path.join(tempfile.gettempdir(), clip_filename)
            subprocess.run([
                "ffmpeg", "-y", "-ss", str(start_time), "-to", str(end_time),
                "-i", self.video_path, "-c", "copy", output_path
            ], check=True)
            return f"🎬 Video clip saved to {output_path}", output_path
        except Exception as e:
            self.display_message("system", f"❌ Error clipping video: {e}")

    def transcribe_video(self, video_path):
        base = os.path.splitext(os.path.basename(video_path))[0]
        dir_ = os.path.dirname(video_path)
        txt_path = os.path.join(dir_, f"{base}.txt")
        srt_path = os.path.join(dir_, f"{base}.srt")
        if os.path.exists(txt_path) and os.path.exists(srt_path):
            with open(srt_path, "r", encoding="utf-8") as f:
                blocks = f.read().strip().split("\n\n")
            with open(txt_path, "r", encoding="utf-8") as f:
                timed_transcript = f.read().strip()
            segments = []
            for block in blocks:
                lines = block.split("\n")
                if len(lines) >= 3:
                    times = lines[1].split(" --> ")
                    start = _parse_srt_time(times[0])
                    end = _parse_srt_time(times[1])
                    text = " ".join(lines[2:])
                    segments.append({"start": start, "end": end, "text": text})
            return timed_transcript, segments
        self.display_message("system", "🔍 Running Whisper transcription...")
        model = whisper.load_model("medium")
        result = model.transcribe(video_path, verbose=True)
        segments = []
        timed_transcript = ""
        for seg in result['segments']:
            segments.append({"start": seg['start'], "end": seg['end'], "text": seg['text']})
            timed_transcript += f"[{seg['start']:.2f} - {seg['end']:.2f}] {seg['text']}\n"
        with open(srt_path, "w", encoding="utf-8") as f:
            for i, seg in enumerate(segments):
                f.write(f"{i+1}\n{format_time(seg['start'])} --> {format_time(seg['end'])}\n{seg['text']}\n\n")
        with open(txt_path, "w", encoding="utf-8") as f:
            f.write(timed_transcript)
        return timed_transcript, segments

    def send_message(self):
        user_input = self.user_entry.get()
        if user_input.strip().lower() == "exit":
            self.root.destroy()
            return
        self.display_message("user", user_input)
        self.user_entry.delete(0, tk.END)
        self.user_entry.configure(state="disabled")
        self.send_btn.configure(state="disabled")

        def run_bot():
            if user_input.lower().startswith("video clipping:"):
                query = user_input[len("video clipping:"):].strip()
                response = chain_clipping.invoke({"query": query, "transcript": self.transcript_segments})
                video_clips = []
                for line in response.strip().split("\n"):
                    if line.startswith("- Range: "):
                        parts = line[len("- Range: "):].split(" - ")
                        try:
                            start = float(parts[0])
                            end = float(parts[1])
                            msg, clip_path = self.clip_video(start, end)
                            #self.display_message("Vivi", msg)
                            if clip_path:
                                video_clips.append(clip_path)
                        except:
                            continue
                if video_clips:
                    msg, out = concatenate_videos(video_clips, output_filepath=os.path.join(os.path.dirname(self.video_path), "final_output.mp4"))
                    self.display_message("Vivi", msg)
            else:
                response = chain_general.invoke({
                    "context": self.context,
                    "question": user_input,
                    "transcript": self.transcript_segments
                })
                self.typewriter_effect("Vivi", response)
                self.context += f"\nUser: {user_input}\nAI: {response}\n"
            self.user_entry.configure(state="normal")
            self.send_btn.configure(state="normal")
            self.user_entry.focus()

        threading.Thread(target=run_bot).start()

    def browse_video(self):
        self.video_path = filedialog.askopenfilename(filetypes=[("Video Files", "*.mp4 *.mov *.avi")])
        if not self.video_path:
            return
        self.display_message("system", f"📁 Selected video: {os.path.basename(self.video_path)}")
        def process_video():
            self.full_transcript_text, self.transcript_segments = self.transcribe_video(self.video_path)
            self.display_message("system", "✅ Transcription completed!")
            self.progress_var.set(0)
        threading.Thread(target=process_video).start()

    def play_input_video(self):
        if not self.video_path:
            self.display_message("system", "⚠️ No video uploaded yet. Upload a video first.")
            return
        if self.audio_process and self.audio_process.poll() is None:
            self.audio_process.terminate()
        try:
            self.audio_process = subprocess.Popen(["ffplay", "-autoexit", "-loglevel", "quiet", self.video_path])
        except Exception as e:
            self.display_message("system", f"❌ Failed to play input video: {e}")

    def play_final_video(self):
        final_output = os.path.join(os.path.dirname(self.video_path), "final_output.mp4")
        if not os.path.exists(final_output):
            self.display_message("system", "⚠️ No final video available. Generate a clipped video first.")
            return
        if self.audio_process and self.audio_process.poll() is None:
            self.audio_process.terminate()
        try:
            self.audio_process = subprocess.Popen(["ffplay", "-autoexit", "-loglevel", "quiet", final_output])
        except Exception as e:
            self.display_message("system", f"❌ Failed to play final video: {e}")

    def run(self):
        self.root.mainloop()

if __name__ == "__main__":
    app = ViviChatbot()
    app.run()