## Lib import

In [None]:
import os
import logging
from typing import List, Literal, Annotated, Optional, Union
from typing_extensions import TypedDict
import chromadb
from chromadb.config import Settings
from pydantic import BaseModel, Field, validator

from langchain import PromptTemplate, LLMChain
from langchain import hub
from langchain_openai import ChatOpenAI
from langchain_deepseek import ChatDeepSeek
from langchain_core.tools import tool
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage
from langchain_core.runnables import RunnableSerializable
from langchain_community.tools.tavily_search import TavilySearchResults
from langchain_chroma import Chroma
from langchain_text_splitters import TokenTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import DirectoryLoader
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain.schema import Document
from langchain_community.retrievers import TavilySearchAPIRetriever
from langchain_huggingface import HuggingFaceEmbeddings

from langgraph.graph import START, END, MessagesState, StateGraph
from langgraph.checkpoint.memory import MemorySaver, InMemorySaver
from langgraph.graph import START, END, MessagesState, StateGraph
from langgraph.checkpoint.memory import InMemorySaver

## Init and API key

In [None]:
from dotenv import load_dotenv
load_dotenv()

# key
deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
silicon_api_key = os.getenv("SILICON_API_KEY")
tavily_api_key = os.getenv("TAVILY_API_KEY")

# deepseek
deepseek_llm_model = "deepseek-chat"

# silicon
silicon_base_url =  "https://api.siliconflow.cn/v1"
silicon_llm_model = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"

# huggingface
huggingface_embed_model = "sentence-transformers/all-MiniLM-L6-v2"

In [None]:
root_path = '../'
html_directory = os.path.join(root_path, "downloads/website")
persist_directory = os.path.join(root_path, "var/vector")

## Vector

In [None]:
collection_id = "calerie-health"

################################################################################
## Config embeddings model
################################################################################
embeddings = HuggingFaceEmbeddings(model_name=huggingface_embed_model)
print(f"Created HuggingFaceEmbeddings {huggingface_embed_model}")

def vector_get(persist_directory):
    vectordb = Chroma(
        collection_name=collection_id,
        embedding_function=embeddings,
        persist_directory=persist_directory,
    )
    return vectordb


################################################################################
## Creat vector store
################################################################################
if os.path.exists(persist_directory):
    print(f"Get Vector store from {persist_directory}")
    vector_store = vector_get(persist_directory=persist_directory)
    retriever = vector_store.as_retriever(
        search_type="similarity_score_threshold",
        search_kwargs={"score_threshold": 0.3, "k": 1}
    )
else:
    print(f"Do not get Vector store from {persist_directory}")

In [None]:
#
# Test retriever
#

if retriever:
    retrieve_document = retriever.invoke("What are the Alpha Hope?")
    #print(retrieve_document)
    for doc in retrieve_document:
        print(" === document ===")
        print(doc.page_content)
        print("\n")