In [82]:
import os
from dotenv import load_dotenv

In [83]:
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [96]:
from youtube_transcript_api import (
    YouTubeTranscriptApi,
    NoTranscriptFound,
    TranscriptsDisabled
)
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings , ChatOpenAI
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate


print("All imports working ✅")

All imports working ✅


In [87]:
import youtube_transcript_api
print(youtube_transcript_api.__file__)
print(dir(YouTubeTranscriptApi))

/Users/vansajrawat/Desktop/QuickTube/venv/lib/python3.14/site-packages/youtube_transcript_api/__init__.py
['__class__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__firstlineno__', '__format__', '__ge__', '__getattribute__', '__getstate__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__static_attributes__', '__str__', '__subclasshook__', '__weakref__', 'fetch', 'list']


### Step 1 INdexing (document loader)


In [88]:
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound

video_id = "fNk_zzaMoSs"
ytt_api = YouTubeTranscriptApi()

try:
    data = ytt_api.fetch(video_id, languages=["en"])
except NoTranscriptFound:
    data = ytt_api.fetch(video_id, languages=["hi"])
print(data)
transcript = " ".join(item.text for item in data)

FetchedTranscript(snippets=[FetchedTranscriptSnippet(text='[Translated by Grant Sanderson. Submit corrections at criblate.com]', start=0.0, duration=10.92), FetchedTranscriptSnippet(text='The fundamental, root-of-it-all building block for linear algebra is the vector.', start=10.92, duration=4.3), FetchedTranscriptSnippet(text="So it's worth making sure that we're all on the same page about what exactly a vector is.", start=15.72, duration=4.12), FetchedTranscriptSnippet(text='You see, broadly speaking, there are three distinct but related ideas about vectors,', start=20.38, duration=3.851), FetchedTranscriptSnippet(text="which I'll call the physics student perspective,", start=24.231, duration=2.247), FetchedTranscriptSnippet(text="the computer science student perspective, and the mathematician's perspective.", start=26.478, duration=3.622), FetchedTranscriptSnippet(text='The physics student perspective is that vectors are arrows pointing in space.', start=30.88, duration=3.52), Fetch

### text splitter (recursive character)

In [89]:
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
chunks = splitter.create_documents([transcript])

In [90]:
print(chunks[0])

page_content='[Translated by Grant Sanderson. Submit corrections at criblate.com] The fundamental, root-of-it-all building block for linear algebra is the vector. So it's worth making sure that we're all on the same page about what exactly a vector is. You see, broadly speaking, there are three distinct but related ideas about vectors, which I'll call the physics student perspective, the computer science student perspective, and the mathematician's perspective. The physics student perspective is that vectors are arrows pointing in space. What defines a given vector is its length and the direction it's pointing, but as long as those two facts are the same, you can move it all around, and it's still the same vector. Vectors that live in the flat plane are two-dimensional, and those sitting in broader space that you and I live in are three-dimensional. The computer science perspective is that vectors are ordered lists of numbers. For example, let's say you were doing some analytics about 

## embedding and storing in vector database

In [91]:
embedding = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

vectorstore = FAISS.from_documents(chunks, embedding)


In [92]:
print(vectorstore)

<langchain_community.vectorstores.faiss.FAISS object at 0x13ef21cd0>


## Reteriver

In [93]:
retriever = vectorstore.as_retriever(search_type = "similarity" , search_kwargs={"k":4})

In [94]:
retriever.invoke("what is linear algebra?")

[Document(id='8c194e1a-4ad1-446f-86ac-d396885f7979', metadata={}, page_content="and abstracted away from however you choose to represent vectors. In truth, it doesn't matter whether you think about vectors as fundamentally being arrows in space, like I'm suggesting you do, that happen to have a nice numerical representation, or fundamentally as lists of numbers that happen to have a nice geometric interpretation. The usefulness of linear algebra has less to do with either one of these views than it does with the ability to translate back and forth between them. It gives the data analyst a nice way to conceptualize many lists of numbers in a visual way, which can seriously clarify patterns in data and give a global view of what certain operations do. And on the flip side, it gives people like physicists and computer graphics programmers a language to describe space and the manipulation of space using numbers that can be crunched and run through a computer. When I do math-y animations, f

## step 3 augmentation

In [97]:
llm = ChatOpenAI(
    model="gpt-4o-mini",   # cheapest OpenAI chat model
    temperature=0
)

In [98]:
prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=
        """
        Use the following context to answer the question.
        If the answer is not found in the context, say "I don't know".
        Context: {context}
        Question: {question}
        """
)

In [99]:
question = "What is linear algebra?"
retrieved_docs = retriever.invoke(question)

In [100]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)

In [101]:
final_prompt = prompt.invoke({
    "context": context_text,
    "question": question
})

## Genration

In [102]:


answer = llm.invoke(final_prompt)
print(answer.content)

Linear algebra is a branch of mathematics that deals with vectors and the operations that can be performed on them, such as vector addition and scalar multiplication. It provides a framework for understanding and manipulating vectors, which can be represented as arrows in space or as ordered lists of numbers. Linear algebra is useful for data analysis, allowing for the visualization of data patterns, and it serves as a language for describing space and its manipulation in fields like physics and computer graphics. The ability to translate between different representations of vectors is a key aspect of linear algebra.


## BUilding a CHain


In [105]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [107]:
def format_docs(retrieved_docs):
    context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
    return context_text

In [108]:
parallel_chain = RunnableParallel({
    'context' : retriever | RunnableLambda(format_docs),
    'question' : RunnablePassthrough()
})

In [None]:
parallel_chain.invoke('what this video about')


and the manipulation of space using numbers that can be crunched and run through a computer. When I do math-y animations, for example, I start by thinking about what's actually going on in space, and then get the computer to represent things numerically, thereby figuring out where to place the pixels on the screen. And doing that usually relies on a lot of linear algebra understanding. So there are your vector basics, and in the next video I'll start getting into some pretty neat concepts surrounding vectors like span, bases, and linear dependence. See you then!

of adding two vectors and multiplying a vector by a number, operations that I'll talk about later on in this video. The details of this view are rather abstract, and I actually think it's healthy to ignore it until the last video of this series, favoring a more concrete setting in the interim. But the reason I bring it up here is that it hints at the fact that the ideas of vector addition and multiplication by numbers will pla

In [114]:
parser = StrOutputParser()
main_chain = parallel_chain | prompt | llm | parser

In [115]:
main_chain.invoke('what is the name of channel')

"I don't know."