## Lib import

In [None]:
import os
import logging
from typing import List, Literal, Annotated
from typing_extensions import TypedDict
from pydantic import BaseModel, Field
import chromadb
from chromadb.config import Settings

from langchain import PromptTemplate, LLMChain
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_chroma import Chroma
from langchain_text_splitters import TokenTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain.schema import Document
from langchain_community.retrievers import TavilySearchAPIRetriever
from langchain_huggingface import HuggingFaceEmbeddings

from langgraph.graph import START, END, MessagesState, StateGraph
from langgraph.checkpoint.memory import MemorySaver, InMemorySaver
from langgraph.graph import START, END, MessagesState, StateGraph
from langgraph.checkpoint.memory import InMemorySaver

## API key get

In [None]:
from dotenv import load_dotenv
load_dotenv()

# key
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
silicon_api_key = os.getenv("SILICON_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")

# deepseek
deepseek_llm_model = "deepseek-chat"

# silicon
silicon_base_url =  "https://api.siliconflow.cn/v1"
silicon_llm_model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

# huggingface
huggingface_embed_model = "sentence-transformers/all-MiniLM-L6-v2"

In [None]:
logging.basicConfig(level=logging.INFO)

## LLM Init

In [None]:
# init LLM mod
llm_silicon = ChatOpenAI(
    model=silicon_llm_model,
    openai_api_key=silicon_api_key,
    base_url=silicon_base_url,
    temperature=0.8,
    max_tokens=2048,
)

In [None]:
from langchain_deepseek import ChatDeepSeek
from langchain_core.prompts import ChatPromptTemplate

# init LLM mod
llm_deepseek = ChatDeepSeek(
    model=deepseek_llm_model,
    temperature=0.3,
    max_tokens=None,
    timeout=None,
    top_p=0.9,
    frequency_penalty=0.7,
    presence_penalty=0.5,
    max_retries=3
)

## Deepseek Prompt

### Question Re-writer

In [None]:
################################################################################
### Deepseek Question Re-writer
################################################################################
def rewriter_question(llm: ChatDeepSeek, question: str)->str:
    system_role = """
    Role: Question Optimization Specialist
    Task: Re-write input question to a better version

    Guidelines:
    You are a question re-writer that converts an input question to a better version that is optimized \
for vectorstore retrieval. Look at the input and try to reason about the underlying semantic intent / meaning.
    """

    system_respond = """
    Respond to the request:
    1. direct response to core content
    2. disable examples/extended descriptions
    3. use simple sentence structure
    4. Omit non-critical details
    
    Current Scenario: Rapid Response Mode
    """

    key = "refined_question"
    system_format = f"""
    **Format Requirement**:
    - Respond ONLY with a JSON object
    - Key: '{key}'
    - Value: The improved question
    - No explanations or additional text
    """

    instruction = "Please refine and improve this question."
    
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", f"{system_role}\n{system_respond}\n{system_format}"),
            (
                "human",
                f"Original question:\n {{question}}\n\n {instruction}"
            ),
        ]
    )

    #print("=== deepseek question prompt template ===\n")
    #prompt.pretty_print()


    parser = JsonOutputParser()
    rewriter = prompt | llm | parser

    try:
        doc = rewriter.invoke({"question": question})
        rewrite = doc[key]
    except KeyError:
        logging.error("KeyError: 'refined_question' not found in response")
        rewrite = question
    
    return rewrite

In [None]:
##
## Test Question Re-writer
##

user_input = "What are the ingredients in Alpha Hope?"

rewrite_question = rewriter_question(llm=llm_deepseek, question=user_input)

print("\n=== re-write question ===\n")
print(rewrite_question)

### Retrieval documents re-writer

In [None]:
################################################################################
### Deepseek Retrieval documents re-writer
################################################################################
def rewriter_document(llm: ChatDeepSeek, question: str, document: str)->str:
    #
    # role
    #
    role = """
        \rRole: Documents Optimization Specialist
        \rTask: re-write input document to a better version
        \r
        \rGuidelines:
        \rYou are a re-writer that converts a retrieved document to a better version that is optimized.
        \rRewrite the part of the document that relates to the user question and output them.
    """

    #
    # respond request
    #
    respond_request = """
        \rRespond to the request:
        \r1. direct response to core content
        \r2. disable examples/extended descriptions
        \r3. use simple sentence structure
        \r4. Omit non-critical details
        \r
        \rCurrent Scenario: Rapid Response Mode
    """

    #
    # response format
    #
    json_parser = JsonOutputParser()


    response_format_template = PromptTemplate(
        template="\
            \r**Format Requirement**:\n\
            \r- {format_instructions}\n\
            \r- Key: {json_key}\n\
            \r- Value: The improved document\n\
            \r- No explanations or additional text",
        input_variables=["json_key"],
        partial_variables={"format_instructions": "Return a JSON object."},
    )

    key = "refined_question"
    response_format = response_format_template.format(json_key=key)

    #
    # response format
    #
    rewriter_instruction = "Please refine and improve this document."

    #
    # chat prompt
    #
    prompt = ChatPromptTemplate.from_messages(
        [
            ("system", f"{role}\n{respond_request}\n{response_format}"),
            (
                "human",
                f"User question:\n {{question}}\n\n Original retrieved document:\n\n {{document}}\n\n {rewriter_instruction}"
            ),
        ]
    )

    print("=== deepseek question prompt template ===\n")
    prompt.pretty_print()

    rewriter = prompt | llm | json_parser
    try:
        doc = rewriter.invoke({"question": question, "document": document})
        rewrite = doc[key]
    except KeyError:
        logging.error("KeyError: 'refined_question' not found in response")
        rewrite = document
    
    return rewrite

In [None]:
##
## Test Document Re-writer
##

user_input = "What are the ingredients in Alpha Hope?"
retrieval_document = """
Alpha Hope has been formulated with two powerful active ingredients, PQQ and Molecular Hydrogen.
They work synergistically to activate metabolic pathways involved in energy production and cognition.
This is particularly formulated to promote the bodyâ\x80\x99s natural detox process and help the body naturally produce Hope Molecules,\
also known as PGC-1Î±, that fight oxidative damage.
"""

rewrite_document = rewriter_document(llm=llm_deepseek, question=user_input, document=retrieval_document)

print("\n=== re-write document ===\n")
print(rewrite_document)

## Prompt template

In [None]:
# Define a prompt template
template = """
You are an AI assistant. Your task is to help the user with their queries.

User: {user_input}
AI:
"""

# Create a PromptTemplate instance
prompt_template = PromptTemplate(
    input_variables=["user_input"],
    template=template,
)

# Example usage
user_input = "What is the capital of France?"
prompt = prompt_template.format(user_input=user_input)
print(prompt)

In [None]:
# Define your desired data structure.
class Joke(BaseModel):
    setup: str = Field(description="question to set up a joke")
    punchline: str = Field(description="answer to resolve the joke")


# And a query intented to prompt a language model to populate the data structure.
joke_query = "Tell me a joke."

# Set up a parser + inject instructions into the prompt template.
#parser = JsonOutputParser(pydantic_object=Joke)
parser = JsonOutputParser()

print("\n=== format instructions ===\n")
print(parser.get_format_instructions())


prompt_template = PromptTemplate(
    template="Answer the user query.\n{format_instructions}\n{query}\n",
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

# Example usage
user_input = "What is the capital of France?"
prompt = prompt_template.format(query=user_input)
print("\n=== prompt ===\n")
print(prompt)