In [None]:
import './../../loadenv.mjs'

# Retriever

In [2]:
import { CheerioWebBaseLoader } from '@langchain/community/document_loaders/web/cheerio'
import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters'
import { MemoryVectorStore } from 'langchain/vectorstores/memory'
import { getEmbeddings } from './../../utils.mjs'

const urls = [
    "https://lilianweng.github.io/posts/2023-06-23-agent/",
    "https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/",
    "https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/",
]

const docs = await Promise.all(
    urls.map(url => new CheerioWebBaseLoader(url).load()),
)
const docsList = docs.flat()

const textSplitter = new RecursiveCharacterTextSplitter({
    chunkSize: 250,
    chunkOverlap: 0,
})
const docSplits = await textSplitter.splitDocuments(docsList)

const vectorStore = await MemoryVectorStore.fromDocuments(
    docSplits,
    getEmbeddings(),
)
const retriever = vectorStore.asRetriever()

# State

In [3]:
import { Annotation } from '@langchain/langgraph'
import { DocumentInterface } from '@langchain/core/documents'

const GraphState = Annotation.Root({
    documents: Annotation<DocumentInterface[]>({
        reducer: (x, y) => y ?? x ?? [],
    }),
    question: Annotation<string>({
        reducer: (x, y) => y ?? x ?? '',
    }),
    generation: Annotation<string>({
        reducer: (x, y) => y ?? x,
    }),
})

# Nodes and Edges

In [4]:
import { TavilySearchResults } from '@langchain/community/tools/tavily_search'
import { Document } from '@langchain/core/documents'
import { z } from 'zod'
import { ChatPromptTemplate } from '@langchain/core/prompts'
import { pull } from 'langchain/hub'
import { getModel } from './../../utils.mjs'
import { StringOutputParser } from '@langchain/core/output_parsers'
import { formatDocumentsAsString } from 'langchain/util/document'

const model = getModel({
    temperature: 0,
})

async function retrieve(
    state: typeof GraphState.State
): Promise<Partial<typeof GraphState.State>> {
    console.log('---RETRIEVE---')

    const documents = await retriever
        .withConfig({ runName: 'FetchRelevantDocuments'})
        .invoke(state.question)
    
    return {
        documents,
    }
}

async function generate(
    state: typeof GraphState.State
): Promise<Partial<typeof GraphState.State>> {
    console.log('---GENERATE---')

    const prompt = await pull<ChatPromptTemplate>('rlm/rag-prompt')
    const ragChain = prompt.pipe(model).pipe(new StringOutputParser())

    const generation = await ragChain.invoke({
        context: formatDocumentsAsString(state.documents),
        question: state.question,
    })

    return {
        generation,
    }
}

async function gradeDocuments(
    state: typeof GraphState.State
): Promise<Partial<typeof GraphState.State>> {
    console.log('---CHECK RELEVANCE---')

    const llmWithTool = model.withStructuredOutput(
        z.object({
            binaryScore: z.enum(['yes', 'no']).describe('Relevance score "yes" or "no"')
        }).describe("Grade the relevance of the retrieved documents to the question. Either 'yes' or 'no'."),
        {
            name: 'grade',
        }
    )

    const prompt = ChatPromptTemplate.fromTemplate(
        `You are a grader assessing relevance of a retrieved document to a user question.
  Here is the retrieved document:

  {context}

  Here is the user question: {question}

  If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant.
  Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.`
    )
    const chain = prompt.pipe(llmWithTool)

    const filteredDocs: Array<DocumentInterface> = []
    for await (const doc of state.documents) {
        const grade = await chain.invoke({
            context: doc.pageContent,
            question: state.question,
        })
        if (grade.binaryScore === 'yes') {
            console.log('---GRADE: DOCUMENT RELEVANT---')
            filteredDocs.push(doc)
        } else {
            console.log('---GRADE: DOCUMENT NOT RELEVANT---')
        }
    }

    return {
        documents: filteredDocs,
    }
}

async function transformQuery(
    state: typeof GraphState.State
): Promise<Partial<typeof GraphState.State>> {
    console.log('---TRANSFORM QUERY---')

    const prompt = ChatPromptTemplate.fromTemplate(
        `You are generating a question that is well optimized for semantic search retrieval.
  Look at the input and try to reason about the underlying sematic intent / meaning.
  Here is the initial question:
  \n ------- \n
  {question} 
  \n ------- \n
  Formulate an improved question: `
    )
    const chain = prompt.pipe(model).pipe(new StringOutputParser())
    const betterQuestion = await chain.invoke({
        question: state.question,
    })

    return {
        question: betterQuestion,
    }
}

async function webSearch(
    state: typeof GraphState.State
): Promise<Partial<typeof GraphState.State>> {
    console.log('---WEB SEARCH---')

    const tool = new TavilySearchResults()
    const docs = await tool.invoke({
        query: state.question,
    })
    const webResults = new Document({ pageContent: docs })
    const newDocuments = state.documents.concat(webResults)

    return {
        documents: newDocuments,
    }
}

function decideToGenerate(state: typeof GraphState.State) {
    console.log('---DECIDE TO GENERATE---')

    const filteredDocs = state.documents
    if (filteredDocs.length === 0) {
        console.log('---DECISION: TRANSFORM QUERY---')
        return 'transformQuery'
    }
    console.log('---DECISION: GENERATE---')
    return 'generate'
}

# Build Graph

In [None]:
import { END, START, StateGraph } from '@langchain/langgraph'

const workflow = new StateGraph(GraphState)
    .addNode('retrieve', retrieve)
    .addNode('gradeDocuments', gradeDocuments)
    .addNode('generate', generate)
    .addNode('transformQuery', transformQuery)
    .addNode('webSearch', webSearch)

workflow.addEdge(START, 'retrieve')
workflow.addEdge('retrieve', 'gradeDocuments')
workflow.addConditionalEdges(
    'gradeDocuments',
    decideToGenerate,
)
workflow.addEdge('transformQuery', 'webSearch')
workflow.addEdge('webSearch', 'generate')
workflow.addEdge('generate', END)

const app = workflow.compile()

In [None]:
import { printGraph } from './../../utils.mjs'
await printGraph(app.getGraph())

In [None]:
const inputs = {
    question: 'Explain how the different types of agent memory work.'
}
const config = { recursionLimit: 50 }
let finalGeneration
for await (const output of await app.stream(inputs, config)) {
    for (const [key, value] of Object.entries(output)) {
        console.log(`Node: ${key}`)
        finalGeneration = value
    }
    console.log('\n---\n')
}

console.log(JSON.stringify(finalGeneration, null, 2))