## Lib

In [None]:
import os
import logging
from typing import List, Literal, Annotated, Optional, Union, Any
from typing_extensions import TypedDict
import chromadb
from chromadb.config import Settings
from pydantic import BaseModel, Field, validator

from langchain import PromptTemplate, LLMChain
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain_deepseek import ChatDeepSeek
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_core.runnables import Runnable, RunnableLambda, RunnablePassthrough, RunnableSerializable
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_chroma import Chroma
from langchain_text_splitters import TokenTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain.schema import Document
from langchain_community.retrievers import TavilySearchAPIRetriever
from langchain_huggingface import HuggingFaceEmbeddings

from langgraph.graph import START, END, MessagesState, StateGraph
from langgraph.checkpoint.memory import MemorySaver, InMemorySaver
from langgraph.graph import START, END, MessagesState, StateGraph
from langgraph.checkpoint.memory import InMemorySaver

## Init config and API key

In [None]:
#
# key
#
from dotenv import load_dotenv
load_dotenv()

deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
silicon_api_key = os.getenv("SILICON_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")
linkup_api_key = os.getenv("LINKUP_API_KEY")

In [None]:
#
# config
#
import tomllib
def load_config(config_file):
    try:
        with open(config_file, 'rb') as f:
            config = tomllib.load(f)
            return config
    except Exception as e:
        print(f"Load config file error: {e}")
        return None

# load config file
deepseek_llm_model = None
silicon_base_url = None
silicon_llm_model = None
huggingface_embed_model = None

config_data = load_config("../config/config.toml")
if config_data:
    log_level = config_data.get('log_level')
    if log_level:
        logging.basicConfig(level=log_level)
    
    # deepseek
    deepseek_llm_model = config_data.get('deepseek', {}).get('model')
    deepseek_llm_temperature = config_data.get('deepseek', {}).get('temperature')
    deepseek_llm_max_tokens = config_data.get('deepseek', {}).get('max_tokens')  

    # silicon
    silicon_base_url =  config_data.get('silicon', {}).get('base_url')
    silicon_llm_model = config_data.get('silicon', {}).get('model')

    # huggingface
    huggingface_embed_model = config_data.get('huggingface', {}).get('embed_model')


# deepseek
deepseek_llm_model = deepseek_llm_model or "deepseek-chat"

# silicon
silicon_base_url =  silicon_base_url or "https://api.siliconflow.cn/v1"
silicon_llm_model = silicon_llm_model or "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

# huggingface
huggingface_embed_model = huggingface_embed_model or "sentence-transformers/all-MiniLM-L6-v2"

## LLM

In [None]:
# init silicon LLM mod
silicon_llm = ChatOpenAI(
    model=silicon_llm_model,
    openai_api_key=silicon_api_key,
    base_url=silicon_base_url,
    temperature=1.2,
    max_tokens=2048,
)

In [None]:
# init deepseek LLM mod
llm_deepseek = ChatDeepSeek(
    model=deepseek_llm_model,
    temperature=deepseek_llm_temperature or 0.3,
    max_tokens=deepseek_llm_max_tokens,
    timeout=None,
    top_p=0.9,
    frequency_penalty=0.7,
    presence_penalty=0.5,
    max_retries=3
)

## TavilySearchAPIRetriever: langgraph node

In [None]:
web_retriever = TavilySearchAPIRetriever(api_key=tavily_api_key, k=3)

question = "Describe the health benefits of taking vitamins?"

# return Document
docs = web_retriever.invoke(question)
for doc in docs:
    print("=== web search ===")
    print(f"page_content: {doc.page_content}")
    print(f"metadata: {doc.metadata}")
    print("\n")

## WikipediaRetriever: ?

In [None]:
from langchain_community.retrievers import WikipediaRetriever

wiki_retriever = WikipediaRetriever()

question = "Describe the health benefits of taking vitamins?"
# return Document
docs = wiki_retriever.invoke(question)
for doc in docs:
    print("=== Wikipedia search ===")
    print(f"page_content: {doc.page_content}")
    print(f"metadata: {doc.metadata}")
    print("\n")

# PubMed: biomedical literature
The information content of the query is not suitable for use in general health quiz application scenarios.

In [None]:
!pip install xmltodict

In [None]:
from langchain_community.retrievers import PubMedRetriever

med_retriever = PubMedRetriever()

question = "Describe the health benefits of taking vitamins?"
# return Document
docs = med_retriever.invoke(question)
for doc in docs:
    print("=== PubMed search ===")
    print(f"page_content: {doc.page_content}")
    print("\n")
    print(f"metadata: {doc.metadata}")
    print("\n")

## TavilySearchResults: do not use it

In [None]:
# Web search
web_search_tool = TavilySearchResults(
    api_key = tavily_api_key,
    k=3,
    #include_domains=["wikipedia.org"],
    #search_depth="advanced",
    include_answer=True,
    #verbose=True
)

question = "Describe the health benefits of taking vitamins?"
#docs = web_search_tool.invoke({"query": question})
docs = web_search_tool.invoke(question)
for result in docs:
    print("=== web search ===")
    print(f"url: {result['url']}")
    print(f"content: {result['content']}")
    if "answer" in result:
        print(f"answer: {result['answer']}")
    print("\n")

web_results = "\n".join([d["content"] for d in docs])
documents = Document(page_content=web_results)
print(f"{documents.page_content}")


## RetrievalQA: Tavily

In [None]:
from langchain.chains import RetrievalQA

web_retriever = TavilySearchAPIRetriever(api_key=tavily_api_key, k=3)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm_deepseek,
    retriever=web_retriever,
    chain_type="stuff"
)

question = "Describe the health benefits of taking vitamins?"
response = qa_chain.invoke(question)
#print(response)
print(response["result"])

## Linkup with langchain

In [None]:
!pip install -U langchain-linkup

In [None]:
from langchain_linkup import LinkupSearchRetriever

retriever = LinkupSearchRetriever(
    depth="standard",  # "standard" or "deep"
    linkup_api_key=linkup_api_key
)

In [None]:
# Perform a search query
documents = retriever.invoke(input="Describe the health benefits of taking vitamins?")
for doc in documents:
    print("=== web search ===")
    print(f"page_content: {doc.page_content}")
    print(f"metadata: {doc.metadata}")
    print("\n")

In [None]:
query: str = "Describe the health benefits of taking vitamins?"

def format_retrieved_documents(docs: list[Document]) -> str:
    """Format the documents retrieved by the Linkup API as a text."""

    return "\n\n".join(
        [
            f"{document.metadata['name']} ({document.metadata['url']}):\n{document.page_content}"
            for document in docs
        ]
    )

def inspect_context(state: dict[str, Any]) -> dict[str, Any]:
    """Print the context retrieved by the retriever."""
    print(f"Context: {state['context']}\n\n")
    return state

generation_prompt_template = """Answer the question based only on the following context:

{context}

Question: {question}
"""
prompt = ChatPromptTemplate.from_template(generation_prompt_template)

chain: Runnable[Any, str] = (
    {"context": retriever | format_retrieved_documents, "question": RunnablePassthrough()}
    | RunnableLambda(inspect_context)
    | prompt
    | llm_deepseek
    | StrOutputParser()
)
response = chain.invoke(input=query)
print(f"Response: {response}")