In [19]:
!pip install -q langchain langchain-core langchain-huggingface youtube-transcript-api langchain-community faiss-cpu tiktoken gradio

[0m

In [4]:
# from langchain_core.output_parsers import StrOutputParser
from langchain_huggingface import HuggingFacePipeline, HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain_core.prompts import PromptTemplate
# from langchain.retrievers.contextual_compression import ContextualCompressionRetriever
# from langchain.retrievers.document_compressors import LLMChainExtractor
from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel
# from langchain import LlamaCpp
import re
import gradio as gr

# Indexing -> a)document ingestion

In [5]:
def extract_video_id(url):
    # Regular expression pattern for common YouTube URL formats
    pattern = r"(?:v=|\/)([0-9A-Za-z_-]{11}).*"
    match = re.search(pattern, url)
    return match.group(1) if match else None

def extract_transcript(url):
    video_id = extract_video_id(url)
    if not video_id:
        return "❌ Invalid YouTube URL."
    try:
    # If you don’t care which language, this returns the “best” one
        transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

    # Flatten it to plain text
        transcript = " ".join(chunk["text"] for chunk in transcript_list)
        return transcript

    except TranscriptsDisabled:
        return "No captions available for this video."

#Indexing -> b)text splitting

In [6]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
)

# Indexing -> c)Embedding generation and d)storing in vector store

In [7]:
# Define the path to the pre-trained model you want to use
modelPath = "sentence-transformers/all-MiniLM-l6-v2"

# Create a dictionary with encoding options, specifically setting 'normalize_embeddings' to False
encode_kwargs = {'normalize_embeddings': True}

# Initialize an instance of HuggingFaceEmbeddings with the specified parameters
embeddings = HuggingFaceEmbeddings(
    model_name=modelPath,
    encode_kwargs=encode_kwargs
)

In [8]:
# vector_store.index_to_docstore_id

# Retrival

In [9]:
# base_retriever = vector_store.as_retriever(
#     search_type="mmr",
#     search_kwargs={"k": 3,"lambda_mult": 0.5}
# )

# Augmentation

In [10]:
llm = HuggingFacePipeline.from_model_id(
    model_id='TinyLlama/TinyLlama-1.1B-Chat-v1.0',
    task='text-generation',
    pipeline_kwargs={
        "repetition_penalty": 1.2,
        "max_new_tokens":600
    }
    )

Device set to use cuda:0


In [13]:
prompt = PromptTemplate(
    template = """You are an assistant for question-answering tasks.
    Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use four sentences maximum and keep the answer concise.
Context: {context}
Question: {question}
Answer:""",
    input_variables=["context", "question"]
)


In [14]:
def format_context(context):
  return "\n\n".join([doc.page_content for doc in context])

In [15]:
def get_answer(llmresponse):
  qa = llmresponse.split("Question:")[-1].strip()
  ans = qa.split("Answer:")[-1].strip() if "Answer:" in qa else qa.split("?\n")[-1].strip()
  return ans

# Generation

In [11]:
# # fetch transcript
# transcript = extract_transcript("https://www.youtube.com/watch?v=p0FERNkpyHE")

# #create chunks
# chunks = splitter.create_documents([transcript])

# # vectorize and store
# vector_store = FAISS.from_documents(chunks, embeddings)

# # retrieval
# base_retriever = vector_store.as_retriever(
#     search_type="mmr",
#     search_kwargs={"k": 3,"lambda_mult": 0.5}
# )

# # creating a chain
# parallel_chain = RunnableParallel(
#     {
#         "context": base_retriever|RunnableLambda(format_context),
#         "question": RunnablePassthrough()
#     }
# )
# main_chain = parallel_chain | prompt | llm | RunnableLambda(get_answer)

In [16]:
def generate(url, question):
    transcript = extract_transcript(url)
    chunks = splitter.create_documents([transcript])
    vector_store = FAISS.from_documents(chunks, embeddings)
    base_retriever = vector_store.as_retriever(
        search_type="mmr",
        search_kwargs={"k": 3,"lambda_mult": 0.5}
    )
    parallel_chain = RunnableParallel(
        {
            "context": base_retriever|RunnableLambda(format_context),
            "question": RunnablePassthrough()
        }
    )
    main_chain = parallel_chain | prompt | llm | RunnableLambda(get_answer)
    return main_chain.invoke(question)

In [17]:
# generate(url ="https://www.youtube.com/watch?v=p0FERNkpyHE" ,question ="gice a brief info about what is discussed in this? ")

'This is a gentic rag. The way it works compared to traditional Rag is with a set of genetic elements represented by their embeddings. These embeddings represent the content of each element. Then together they form a graph of relationships between elements, and the edges in the graph correspond to the relationships between them. Intraday volatility trading strategies, and now this graph represents three dimensions -- element type (like text), positional attributes, such as relevance, and temporal characteristics, such as time since publication and relative age. By working with these interconnected embeddings, we can learn patterns, clusters, and connections across vast amounts of data. With knowledge graphs as core components of the system, users can easily integrate and visualize knowledge from multiple sources, and the system itself helps understand and make relevant decisions.'

In [22]:
with gr.Blocks(theme = gr.themes.Citrus()) as demo:
    gr.Markdown("## 🎥 YouTube QA with TinyLLaMA")

    url_input = gr.Textbox(label="Enter YouTube URL")
    question_input = gr.Textbox(label="Your Question")
    output = gr.Textbox(label="Answer")

    btn = gr.Button("Get Answer")

    btn.click(fn=generate, inputs=[url_input, question_input], outputs=output)

demo.launch(share = True)

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://5f07d332cda19690b2.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


