## Install libraries

In [6]:
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import OllamaEmbeddings, ChatOllama
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate

In [7]:
embedding_model = OllamaEmbeddings(model="nomic-embed-text:latest")
llm = ChatOllama(model="deepseek-r1:1.5b")

## Step 1a - Indexing (Document Ingestion)

In [13]:
# video_id = "Gfr50f6ZBvo" # only the ID, not full URL
# try:
#     # If you don’t care which language, this returns the “best” one
#     transcript_list = YouTubeTranscriptApi.get_transcript(video_id, languages=["en"])

#     # Flatten it to plain text
#     transcript = " ".join(chunk["text"] for chunk in transcript_list)
#     print(transcript)

# except TranscriptsDisabled:
#     print("No captions available for this video.")

In [None]:
transcript_list

[{'text': 'Imagine you happen across a short movie script that',
  'start': 1.14,
  'duration': 2.836},
 {'text': 'describes a scene between a person and their AI assistant.',
  'start': 3.976,
  'duration': 3.164},
 {'text': "The script has what the person asks the AI, but the AI's response has been torn off.",
  'start': 7.48,
  'duration': 5.58},
 {'text': 'Suppose you also have this powerful magical machine that can take',
  'start': 13.06,
  'duration': 3.92},
 {'text': 'any text and provide a sensible prediction of what word comes next.',
  'start': 16.98,
  'duration': 3.98},
 {'text': 'You could then finish the script by feeding in what you have to the machine,',
  'start': 21.5,
  'duration': 4.006},
 {'text': "seeing what it would predict to start the AI's answer,",
  'start': 25.506,
  'duration': 2.862},
 {'text': 'and then repeating this over and over with a growing script completing the dialogue.',
  'start': 28.368,
  'duration': 4.372},
 {'text': "When you interact with

## Step 1b - Indexing (Text Splitting)

In [None]:
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.create_documents([transcript])

In [None]:
len(chunks)

168

In [None]:
chunks[100]

Document(metadata={}, page_content="and and kind of come up with descriptions of the electron clouds where they're gonna go how they're gonna interact when you put two elements together uh and what we try to do is learn a simulation uh uh learner functional that will describe more chemistry types of chemistry so um until now you know you can run expensive simulations but then you can only simulate very small uh molecules very simple molecules we would like to simulate large materials um and so uh today there's no way of doing that and we're building up towards uh building functionals that approximate schrodinger's equation and then allow you to describe uh what the electrons are doing and all materials sort of science and material properties are governed by the electrons and and how they interact so have a good summarization of the simulation through the functional um but one that is still close to what the actual simulation would come out with so what um how difficult is that to ask w

## Step 1c & 1d - Indexing (Embedding Generation and Storing in Vector Store)

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vector_store = FAISS.from_documents(chunks, embeddings)

In [None]:
vector_store.index_to_docstore_id

{0: '7c0f7504-08fa-4f33-8708-d29bfc601f84',
 1: '06b75197-142c-4ac0-88c8-1bf9e865763b',
 2: '6ff986db-4118-4212-a5fe-39f0cf922677',
 3: '447cb65c-cfa2-43aa-ad48-2321366e5762',
 4: '30f35db0-6d97-4a6f-b246-27c4cbcb6d13',
 5: '3fd10381-afd7-427e-958e-89fc520dc260',
 6: '40da6068-0065-4411-9417-11f839de969c',
 7: '733cdcc4-7685-40a2-9164-413b8fc4366d',
 8: '03adb4d2-e52b-4efd-911d-f7ae4caa4eaa',
 9: 'e6997d55-f5b1-4785-bc63-613d57306c68',
 10: 'ec55a375-5d7c-46b5-9d0c-73e8f8810712',
 11: '2b8a87c8-01a6-4390-a751-543668f921d9',
 12: '4a0fa4f7-0abd-4e05-9154-2d90d7f42c9e',
 13: 'f36acaa7-947f-4120-8094-5a0a3521ad9a',
 14: '87dc4c33-6bcd-4b55-b2ca-1e1d2e30e374',
 15: 'bfbfd51c-e879-4c43-8d0f-97cdea1ecf7d',
 16: '237bef6d-bc86-49b6-86a3-f300c6cd96de',
 17: '62de1f42-4306-40c4-aa52-ab93a2d43754',
 18: '22ac2726-8b22-48b7-a457-c9b81799f2a7',
 19: '92d2f60c-80d5-4c12-ad2b-cb30a98851e8',
 20: '23b0db0d-da2e-4ab9-84db-0ae8c5d15a42',
 21: '08bc0031-2f11-468c-a1dd-54449e12db06',
 22: 'eb3b8f90-9ee9-

In [None]:
vector_store.get_by_ids(['2436bdb8-3f5f-49c6-8915-0c654c888700'])

[Document(id='2436bdb8-3f5f-49c6-8915-0c654c888700', metadata={}, page_content='demas establish to support this podcast please check out our sponsors in the description and now let me leave you with some words from edskar dykstra computer science is no more about computers than astronomy is about telescopes thank you for listening and hope to see you next time')]

## Step 2 - Retrieval

In [None]:
retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [None]:
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7fdba029d2d0>, search_kwargs={'k': 4})

In [None]:
retriever.invoke('What is deepmind')

[Document(id='7c0f7504-08fa-4f33-8708-d29bfc601f84', metadata={}, page_content="the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal qu

## Step 3 - Augmentation

In [None]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.2)

In [None]:
prompt = PromptTemplate(
    template="""
      You are a helpful assistant.
      Answer ONLY from the provided transcript context.
      If the context is insufficient, just say you don't know.

      {context}
      Question: {question}
    """,
    input_variables = ['context', 'question']
)

In [None]:
question          = "is the topic of nuclear fusion discussed in this video? if yes then what was discussed"
retrieved_docs    = retriever.invoke(question)

In [None]:
retrieved_docs

[Document(id='3c60d0d6-5d01-4dfc-99fc-5c4bb4422cb0', metadata={}, page_content="so we with this problem and we published it in a nature paper last year uh we held the fusion that we held the plasma in specific shapes so actually it's almost like carving the plasma into different shapes and control and hold it there for the record amount of time so um so that's one of the problems of of fusion sort of um solved so i have a controller that's able to no matter the shape uh contain it continue yeah contain it and hold it in structure and there's different shapes that are better for for the energy productions called droplets and and and so on so um so that was huge and now we're looking we're talking to lots of fusion startups to see what's the next problem we can tackle uh in the fusion area so another fascinating place in a paper title pushing the frontiers of density functionals by solving the fractional electron problem so you're taking on modeling and simulating the quantum mechanical 

In [None]:
context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
context_text

"so we with this problem and we published it in a nature paper last year uh we held the fusion that we held the plasma in specific shapes so actually it's almost like carving the plasma into different shapes and control and hold it there for the record amount of time so um so that's one of the problems of of fusion sort of um solved so i have a controller that's able to no matter the shape uh contain it continue yeah contain it and hold it in structure and there's different shapes that are better for for the energy productions called droplets and and and so on so um so that was huge and now we're looking we're talking to lots of fusion startups to see what's the next problem we can tackle uh in the fusion area so another fascinating place in a paper title pushing the frontiers of density functionals by solving the fractional electron problem so you're taking on modeling and simulating the quantum mechanical behavior of electrons yes um can you explain this work and can ai model and\n\n

In [None]:
final_prompt = prompt.invoke({"context": context_text, "question": question})

In [None]:
final_prompt

StringPromptValue(text="\n      You are a helpful assistant.\n      Answer ONLY from the provided transcript context.\n      If the context is insufficient, just say you don't know.\n\n      so we with this problem and we published it in a nature paper last year uh we held the fusion that we held the plasma in specific shapes so actually it's almost like carving the plasma into different shapes and control and hold it there for the record amount of time so um so that's one of the problems of of fusion sort of um solved so i have a controller that's able to no matter the shape uh contain it continue yeah contain it and hold it in structure and there's different shapes that are better for for the energy productions called droplets and and and so on so um so that was huge and now we're looking we're talking to lots of fusion startups to see what's the next problem we can tackle uh in the fusion area so another fascinating place in a paper title pushing the frontiers of density functionals

## Step 4 - Generation

In [None]:
answer = llm.invoke(final_prompt)
print(answer.content)

Yes, the topic of nuclear fusion is discussed in the video. The discussion includes the following points:

1. The speaker mentions a problem in fusion that was published in a Nature paper, where they developed a controller that can hold plasma in specific shapes for a record amount of time, which is crucial for energy production.

2. They talk about collaborating with EPFL in Switzerland, which has a test reactor that they used for their experiments. The focus is on identifying bottleneck problems in fusion and applying AI methods to address those challenges.

3. The speaker emphasizes the potential of AI to help accelerate solutions in energy and climate, specifically mentioning fusion as an area where AI can contribute.

4. They also mention their work on magnetic control of tokamak plasmas using deep reinforcement learning, indicating that they are exploring how AI can assist in controlling high-temperature plasmas for nuclear fusion.


## Building a Chain

In [None]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser

In [None]:
def format_docs(retrieved_docs):
  context_text = "\n\n".join(doc.page_content for doc in retrieved_docs)
  return context_text

In [None]:
parallel_chain = RunnableParallel({
    'context': retriever | RunnableLambda(format_docs),
    'question': RunnablePassthrough()
})

In [None]:
parallel_chain.invoke('who is Demis')

{'context': "the following is a conversation with demus hasabis ceo and co-founder of deepmind a company that has published and builds some of the most incredible artificial intelligence systems in the history of computing including alfred zero that learned all by itself to play the game of gold better than any human in the world and alpha fold two that solved protein folding both tasks considered nearly impossible for a very long time demus is widely considered to be one of the most brilliant and impactful humans in the history of artificial intelligence and science and engineering in general this was truly an honor and a pleasure for me to finally sit down with him for this conversation and i'm sure we will talk many times again in the future this is the lex friedman podcast to support it please check out our sponsors in the description and now dear friends here's demis hassabis let's start with a bit of a personal question am i an ai program you wrote to interview people until i get

In [None]:
parser = StrOutputParser()

In [None]:
main_chain = parallel_chain | prompt | llm | parser

In [None]:
main_chain.invoke('Can you summarize the video')

'The video features a conversation with Demas, who discusses the need for deeper and simpler explanations in physics, particularly in relation to consciousness, life, and gravity. He emphasizes the limitations of the current standard model of physics and the importance of exploring more fundamental explanations. Additionally, he talks about advancements in fusion research, specifically how they have managed to hold plasma in specific shapes for extended periods, which is a significant step in fusion energy production. The conversation touches on the potential of AI in modeling quantum mechanical behavior, particularly regarding electrons. The discussion concludes with a quote about the nature of computer science.'