In [68]:
from youtube_transcript_api import YouTubeTranscriptApi


In [69]:
def get_transcript(video_id):
    # Initialize the API
    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
    
    try:
        # Get manually created transcript
        manual_transcript = transcript_list.find_manually_created_transcript(["en", "en-US", "en-GB"])
        return manual_transcript.fetch()
    except:
        try:
            auto_transcript = transcript_list.find_generated_transcript(["hi", "en", "en-US", "en-GB"])
            
            # If not in English, translate it to English
            if auto_transcript.language_code != 'en':
                translated = auto_transcript.translate('en')
                return translated.fetch()
            else:
                return auto_transcript.fetch()
        except Exception as e:
            print(f"Error: {e}")
            return None

In [70]:

video_id = "8mk85fyzevc"
transcript = get_transcript(video_id)


In [71]:
transcript_data = "" 

for t in transcript:
    transcript_data += f"At starting timestamp {t.start} and till the duration of {t.duration} Text = {t.text}\n"

print(transcript_data)

At starting timestamp 0.719 and till the duration of 4.961 Text = Yes, welcome to Chai Aur Code,
At starting timestamp 3.6 and till the duration of 3.84 Text = our unique YouTube channel where
At starting timestamp 5.68 and till the duration of 4.24 Text = we talk about coding but we
At starting timestamp 7.44 and till the duration of 4.56 Text = do it leisurely, we are not in any hurry, we
At starting timestamp 9.92 and till the duration of 4.719 Text = discuss things comfortably and in
At starting timestamp 12.0 and till the duration of 4.88 Text = today's video, I will
At starting timestamp 14.639 and till the duration of 4.161 Text = take you to that corner of Python where
At starting timestamp 16.88 and till the duration of 4.08 Text = till now you used to write Python in whatever way you were writing, whether you were
At starting timestamp 18.8 and till the duration of 3.76 Text = writing for learning, writing for data structures,
At starting timestamp 20.96 and till the duration

In [72]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [73]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=800,
    chunk_overlap=100
)

In [74]:
split = text_splitter.split_text(transcript_data)

In [88]:
from langchain_chroma import Chroma
from langchain_google_genai import GoogleGenerativeAIEmbeddings

In [89]:
embedder = GoogleGenerativeAIEmbeddings(
    model="models/embedding-001"
)


In [92]:
store = Chroma.from_texts(
    texts=split,
    embedding=embedder,
    collection_name="tr1",
    persist_directory="yt_db",
)


In [93]:
retriver = store.as_retriever(
    search_type = "similarity",
    search_kwargs={
        "k": 8,
    }
)

In [94]:
from langchain.chains import create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_google_genai import ChatGoogleGenerativeAI


In [95]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro",temperature=0.5, max_tokens=1500)

In [96]:
system_prompt = (
    "You are a helpful assistant that answers questions from the youtube video transcript. {context}"
)

In [97]:
prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]

)

In [98]:
question_answer_chain = create_stuff_documents_chain(llm, prompt) # connect llm and prompt


In [99]:
retrieve_chain = create_retrieval_chain(  # connect retriever/db and chain
    retriver,
    question_answer_chain,
)

In [100]:
query = "What is uv in python and who is teaching it?"

In [101]:
response = retrieve_chain.invoke({"input": query})
print(response['answer'])

Based on the transcript, "uv" seems to be a tool or command-line utility in Python, potentially related to project management and dependency management. It's described as a "very solid foundational project" and is Rust-based, known for its speed.  The speaker is teaching how to use "uv".  Unfortunately, the transcript doesn't provide the speaker's name.
