In [None]:
#pip install -qU langchain langchain-community langchain-core langchain-google-genai langsmith youtube-transcript-api langgraph

from youtube_transcript_api import YouTubeTranscriptApi
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain_core.vectorstores import InMemoryVectorStore
from langsmith import traceable
from langgraph.graph import StateGraph, START
from typing_extensions import TypedDict, List
import os

os.environ["GOOGLE_API_KEY"] = "****"
os.environ["LANGSMITH_API_KEY"] = "***"
os.environ["LANGSMITH_TRACING"] = "true"



# Step 1: Load YouTube transcript https://www.youtube.com/watch?v=pp59n0So-XE
video_id = "pp59n0So-XE"  # Replace with your video ID
transcript = YouTubeTranscriptApi.get_transcript(video_id)
text = "\n".join([entry["text"] for entry in transcript])
doc = Document(page_content=text)

# Step 2: Chunk transcript
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
chunks = splitter.split_documents([doc])

# Step 3: Embed and store
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = InMemoryVectorStore(embeddings)
_ = vectorstore.add_documents(chunks)

# Step 4: Define state
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Step 5: Retrieval
def retrieve(state: State):
    docs = vectorstore.similarity_search(state["question"])
    return {"context": docs}

# Step 6: Generation
@traceable(name="generate_answer")
def generate(state: State):
    context = "\n\n".join(doc.page_content for doc in state["context"])
    messages = [
        ("system", "You are a helpful assistant that answers questions based on YouTube transcripts."),
        ("human", f"Context:\n{context}\n\nQuestion: {state['question']}")
    ]
    llm = ChatGoogleGenerativeAI(model="gemini-2.0-flash")
    response = llm.invoke(messages)
    return {"answer": response.content}

# Step 7: Build graph
graph = StateGraph(State).add_sequence([retrieve, generate])
graph.add_edge(START, "retrieve")
app = graph.compile()

# Step 8: Run
response = app.invoke({"question": "what was jealous?"})
print(response["answer"])