In [1]:
import os


In [2]:
!pip install streamlit
!pip install -q langchain langchain-openai langchain-community faiss-cpu transformers youtube-transcript-api openai


Collecting streamlit
  Downloading streamlit-1.46.1-py3-none-any.whl.metadata (9.0 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.46.1-py3-none-any.whl (10.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m40.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m27.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hI

In [3]:
#Trimmed OpenAI API key
os.environ["OPENAI_API_KEY"] = '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'

In [4]:
!pip install -q gradio youtube-transcript-api langchain transformers faiss-cpu

import gradio as gr
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnableParallel, RunnableLambda, RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from transformers import pipeline



def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def get_video_answer(video_id, question, language):
    try:
        # 1. Fetch transcript
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["hi", "en"])
        transcript = " ".join(chunk['text'] for chunk in transcript_list)

        # 2. Split text
        splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        chunks = splitter.create_documents([transcript])

        # 3. Embed
        embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        vector_store = FAISS.from_documents(chunks, embeddings)
        retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

        # 4. Prompt setup
        prompt = PromptTemplate(
            template="""
            You are a helpful assistant.
            Answer ONLY from the provided transcript context.
            If the context is insufficient, just say you don't know.

            {context}
            Question: {question}
            """,
            input_variables=['context', 'question']
        )

        # 5. LangChain Runnable chain
        parallel_chain = RunnableParallel({
            'context': retriever | RunnableLambda(format_docs),
            'question': RunnablePassthrough()
        })

        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)
        main_chain = parallel_chain | prompt | llm | StrOutputParser()

        # 6. Get answer
        answer = main_chain.invoke(question)

        # 7. Translate if needed
        if language == "Hindi":
            translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-hi")
            answer = translator(answer)[0]['translation_text']
        elif language == "German":
            translator = pipeline("translation", model="Helsinki-NLP/opus-mt-en-de")
            answer = translator(answer)[0]['translation_text']

        return answer

    except TranscriptsDisabled:
        return "Captions are disabled for this video."
    except NoTranscriptFound as e:
        return f"No transcript found in the requested languages. Available: {e}"
    except Exception as e:
        return f"Error: {e}"

# 🎛️ Launch Gradio interface
gr.Interface(
    fn=get_video_answer,
    inputs=[
        gr.Textbox(label="Enter YouTube Video ID (e.g. dQw4w9WgXcQ)"),
        gr.Textbox(label="Ask your question about the video"),
        gr.Dropdown(choices=["English", "Hindi", "German"], label="Select Output Language")
    ],
    outputs="text",
    title="🎥 YouTube Q&A Bot",
    description="Ask questions about any YouTube video using its transcript. Translates answer if needed."
).launch()


It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://eeca61760e9af08d48.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


