In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredExcelLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough

llm = ChatOpenAI(temperature=0.1)
cache_dir = LocalFileStore("./.cache/")

# splitter = CharacterTextSplitter.from_tiktoken_encoder(
#     separator="\n",
#     chunk_size=600,
#     chunk_overlap=100,
# )
# Split by newline, treating each line as a separate chunk
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Adjust based on your requirements
    chunk_overlap=50,  # Overlap between chunks
    separators=["\n"]  # Split based on newlines (you can add other separators too)
)

loader = UnstructuredExcelLoader("./ETC(yschang-Total).xlsx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)


vectorstore = Chroma.from_documents(docs, cached_embeddings)

retriver = vectorstore.as_retriever()
prompt = ChatPromptTemplate.from_messages([
    ("system", "You are English helpful teacher. Answer questions using only the following context. If you dont' know the answer just say you don't know, don't make it up:\n\n{context}"),
    ("human", "{question}")
])

chain = {"context": retriver, "question":RunnablePassthrough()} | prompt | llm

chain.invoke("Could you find expression for people character words?")

AIMessage(content="Sure, here are some expressions related to people's character traits:\n\n- Naïve\n- Gullible\n- Talkative\n- Cut someone up\n- Cut in\n- Cut me off\n- Thank for your two cents\n- Make it the whole world blind\n- Bean there, Done that\n- Been in your shoe\n- I have a lot on my plates\n- It didn't go as planned\n- To be economical with the truth\n- Goody-Goody\n- The other side of the coin\n- It will come back to haunt you\n- Take it with a grain of salt\n- To be all talk\n- I'm on it\n- Beef up (on) something\n\nLet me know if you need more information on any of these expressions.")

In [None]:

chain.invoke("could you give me full sentence example using people character?")