In [1]:
from dataclasses import dataclass
from typing import List, TypedDict
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_anthropic import ChatAnthropic
from langchain_core.messages import BaseMessage
from langgraph.graph import StateGraph, END
from langchain_community.vectorstores import Chroma
from langchain_openai import OpenAIEmbeddings
from get_embedding_function import get_embedding_function
import os, json
import openai
from dotenv import load_dotenv
load_dotenv()
openai.api_key = os.environ['OPENAI_API_KEY']
CHROMA_PATH = "chroma"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
db = Chroma(
        persist_directory=CHROMA_PATH,
        embedding_function=get_embedding_function()
    )

In [3]:
len(db.get(include=[])['ids'])

0

In [4]:
db = Chroma(
        collection_name="podcast",
        persist_directory=CHROMA_PATH,
        embedding_function=get_embedding_function()
    )

retriever = db.as_retriever(search_kwargs={"k": 3})

prompt_template = ChatPromptTemplate.from_template(open('prompts/answer_query.md').read())

model = ChatAnthropic(model='claude-3-5-sonnet-20240620')

chain_with_prompt = prompt_template | model | StrOutputParser()

In [5]:
db.get(include=[])

{'ids': [],
 'embeddings': None,
 'metadatas': None,
 'documents': None,
 'uris': None,
 'data': None,
 'included': []}

In [4]:
retriever.invoke("Who is Lee Cronin?")

[]

In [21]:
@dataclass
class QueryResponse:
    query_text: str
    response_text: str
    sources: List[str]

def query_rag(query_text: str) -> QueryResponse:

    class AgentState(TypedDict):
        question: str
        raw_docs: list[BaseMessage]
        formatted_docs: list[str]
        generation: str
        sources: list[str]

    def get_docs(state: AgentState):
        #print("get_docs:", state)
        question = state["question"]
        docs = retriever.invoke(question)
        state["sources"] = [doc.metadata.get("id") for doc in docs]
        state["raw_docs"] = docs
        return state
    
    def format_docs(state:AgentState):
        #print("format_docs:",state)
        documents = state["raw_docs"]
        state["formatted_docs"] = "\n\n---\n\n".join(["Talk Title:" + doc.metadata.get("vid_title", None) 
                                    + "\nExcerpt:" + doc.page_content + "\nPublished time:" + doc.response_metadata.get("published_dt", None)
                                    for doc in documents])
        return state
    
    def generate(state:AgentState):
        #print("generate:", state)
        question = state["question"]
        formatted_docs = state["formatted_docs"]
        result = chain_with_prompt.invoke({"question": question, "context":formatted_docs})
        state["generation"] = result
        return state

    workflow = StateGraph(AgentState)
    workflow.add_node("get_docs", get_docs)
    workflow.add_node("format_docs", format_docs)
    workflow.add_node("generate", generate)
    workflow.add_edge("get_docs", "format_docs")
    workflow.add_edge("format_docs", "generate")
    workflow.add_edge("generate", END)
    workflow.set_entry_point("get_docs")

    rag_app = workflow.compile()

    result = rag_app.invoke({"question":query_text})

    print(f"Response: {result['generation']}\nSources: {result['sources']}")

    return QueryResponse(
        query_text=query_text, response_text=result['generation'], sources=result['sources']
        )


In [22]:
response = query_rag("What did Sara Walker say about assembly theory?")
print(response.response_text)

Response: I apologize, but I do not have any context or information provided about Sara Walker or assembly theory from the given prompt. There are no excerpts, episode titles, or other details given that I could use to formulate a response about this topic. Without any relevant information to draw from, I cannot accurately answer the question or elaborate on Sara Walker's views about assembly theory. If you could provide some specific context or information about this topic, I would be happy to try answering the question based on that. My apologies that I cannot be more helpful with the limited information available.
Sources: []
I apologize, but I do not have any context or information provided about Sara Walker or assembly theory from the given prompt. There are no excerpts, episode titles, or other details given that I could use to formulate a response about this topic. Without any relevant information to draw from, I cannot accurately answer the question or elaborate on Sara Walker'