In [11]:
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import UnstructuredExcelLoader
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, CacheBackedEmbeddings
from langchain.vectorstores import Chroma
from langchain.storage import LocalFileStore
from langchain.chains import RetrievalQA


llm = ChatOpenAI()
cache_dir = LocalFileStore("./.cache/")

# splitter = CharacterTextSplitter.from_tiktoken_encoder(
#     separator="\n",
#     chunk_size=600,
#     chunk_overlap=100,
# )
# Split by newline, treating each line as a separate chunk
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Adjust based on your requirements
    chunk_overlap=50,  # Overlap between chunks
    separators=["\n"]  # Split based on newlines (you can add other separators too)
)

loader = UnstructuredExcelLoader("./ETC(yschang-Total).xlsx")

docs = loader.load_and_split(text_splitter=splitter)

embeddings = OpenAIEmbeddings()

cached_embeddings = CacheBackedEmbeddings.from_bytes_store(embeddings, cache_dir)


vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)
chain.run("Could you find expression for human character?")

'Here are some expressions related to human character:\n\n1. who are you similar in nature to\n2. it’s not my seen\n3. turn a blind eye\n4. he let me down\n5. they went on and on\n6. neat and tidy\n7. obsessive\n8. uptight\n9. laid-back\n\nThese expressions describe various aspects of human character, behavior, and personality.'

In [13]:
import sqlite3
print(sqlite3.sqlite_version)

vectorstore = Chroma.from_documents(docs, cached_embeddings)

chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vectorstore.as_retriever()
)
chain.run("Could you find expression for human character?")

3.45.2


"Here are some expressions related to human character:\n\n- Close but no cigar (implied failure)\n- Moral dilemma\n- Morals and values\n- Morally wrong\n- Play the devil's advocate\n- Where do you draw the line\n- Obsessive\n- Uptight\n- Laid-back\n\nI hope these help!"

In [16]:
chain.run("Could you make 10 questions and answer from document? and make useful examples per question")

'Certainly! Here are 10 questions and answers based on the provided document:\n\n1. **Question:** What types of lines are commonly used in data visualization?\n   **Answer:** Dotted line, solid line, fluctuating line, and broken line are commonly used in data visualization. For example, a fluctuating line might represent stock prices over time.\n\n2. **Question:** How can you describe a pie chart, a bar graph, and a line graph?\n   **Answer:** A pie chart shows proportions, a bar graph compares categories, and a line graph displays trends over time. For instance, a line graph could illustrate population growth over decades.\n\n3. **Question:** What do phrases like "climbed slightly" and "dropped markedly" indicate in data analysis?\n   **Answer:** These phrases indicate the degree of change in a dataset. For example, "climbed slightly" could describe a small increase in sales figures over a month.\n\n4. **Question:** How is a situation typically described when data shows a temporary de

In [17]:
chain.run("Could you change to Korean")

"I can't take on the role 맡다\n내가 할 수 있는 것 이상을 할 수 없다.\n메이크업을 좀 줄여라\n뭔가를 완전히 멈추게 하다\n멈추게는 안하지만 방해하는 것\n쎄게 하지말고 살짝해라 (안마, 스포츠)\n살살일해라\npositive한 것에 얘기할 때\nnegative한 것에 얘기할 때\n책을 넘기다\n밸런타인 데이에 혼자인 것은 힘들다\n마음 깊은 곳에 있는 감정은 슬픔이다\n프로포즈하다\n결혼 후 권태기\n그냥 믿어줬다\n누구 말이 맞는지 모르겠다\n점수를 잘 받아야 한다\n너무 늦었다\n상황에 따라 변하는\n리더 역할을 하다\n~되도록 ~척하다\n절대 진지하지 않은 사람\n뭐든지 자기가 해서 컨트롤할 수 있어야 한다\n그냥 내가 했다\n다른 사람 도움 없이 내가 했다\n성인을 기쁘게 하는 사람\n최소한의 일만 하는 사람\n최선을 다해 시도해보겠다\n더 나은 결과를 위해 노력하다\n반씩 나눠 일을 하자\n나를 깨우는 것"