In [1]:
from langchain_ollama import OllamaEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

embeddings = OllamaEmbeddings(model="nomic-embed-text",)

# Example

In [2]:
# Create a vector store with a sample text

text = "LangChain is the framework for building context-aware reasoning applications"

vectorstore = InMemoryVectorStore.from_texts([text], embedding=embeddings)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What is LangChain?")

# show the retrieved document's content
retrieved_documents[0].page_content

'LangChain is the framework for building context-aware reasoning applications'

# Load Data

In [1]:
import pandas as pd

tbl_names = ["Schedules", "RecurringSchedules", "Memos"]
tbl_pk = {"Schedules": "schedule_id", "RecurringSchedules": "recurring_schedule_id", "Memos": "memo_id"}
tbls = {tbl_name: pd.read_csv(f"{tbl_name}.csv") for tbl_name in tbl_names}


# Create Vector Database

In [2]:
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_ollama import OllamaEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS

import faiss
from langchain_community.vectorstores import FAISS


embeddings = OllamaEmbeddings(model="nomic-embed-text",)

def create_documents(tbl_name, tbl):
    docs = []
    for idx, row in tbl.iterrows():
        if tbl_name=="Memos":
            doc = Document(
                # page_id=row[tbl_pk[tbl_name]],
                page_content=row["content"],
                metadata={tbl_pk[tbl_name]: row[tbl_pk[tbl_name]],"tbl_name": tbl_name}
            )
        elif tbl_name in ["Schedules", "RecurringSchedules"]:
            doc = Document(
                # page_id=row[tbl_pk[tbl_name]],
                page_title=row["title"],
                page_content=row["description"],
                metadata={tbl_pk[tbl_name]: row[tbl_pk[tbl_name]],"tbl_name": tbl_name}
            )
        docs.append(doc)
    
    return docs
    


In [3]:
chroma_db = Chroma(
    embedding_function=embeddings,
    persist_directory='chroma'
)

index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world")))

faiss_db = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore= InMemoryDocstore(),
    index_to_docstore_id={}
)

for tbl_name in tbl_names:
    docs = create_documents(tbl_name, tbls[tbl_name])
    faiss_db.add_documents(docs)
    chroma_db.add_documents(docs)
    
faiss_db.save_local("faiss")


In [4]:
# search for similar documents

test_query = "presentation ppt"
results = chroma_db.similarity_search_with_score(
    test_query,
    k=5,
)

for res, score in results:
    print(f"* [SIM={score:2f}] {res.page_content} [{res.metadata}]")

* [SIM=0.731714] Prepare slides for the upcoming presentation next week. [{'memo_id': 1, 'tbl_name': 'Memos'}]
* [SIM=0.859489] Prepare slides for the upcoming conference. [{'schedule_id': 8, 'tbl_name': 'Schedules'}]
* [SIM=1.034088] Kickoff meeting for the new project with stakeholders. [{'schedule_id': 1, 'tbl_name': 'Schedules'}]
* [SIM=1.035572] Discuss project requirements over lunch. [{'schedule_id': 2, 'tbl_name': 'Schedules'}]
* [SIM=1.070581] Schedule a meeting with the finance team. [{'memo_id': 8, 'tbl_name': 'Memos'}]


In [5]:
# search for similar documents

test_query = "presentation ppt"
results = faiss_db.similarity_search_with_score(test_query,k=5,)

for res, score in results:
    print(f"* [SIM={score:2f}] {res.page_content} [{res.metadata}]")


* [SIM=0.731714] Prepare slides for the upcoming presentation next week. [{'memo_id': 1, 'tbl_name': 'Memos'}]
* [SIM=0.859489] Prepare slides for the upcoming conference. [{'schedule_id': 8, 'tbl_name': 'Schedules'}]
* [SIM=1.034088] Kickoff meeting for the new project with stakeholders. [{'schedule_id': 1, 'tbl_name': 'Schedules'}]
* [SIM=1.035572] Discuss project requirements over lunch. [{'schedule_id': 2, 'tbl_name': 'Schedules'}]
* [SIM=1.070581] Schedule a meeting with the finance team. [{'memo_id': 8, 'tbl_name': 'Memos'}]


In [6]:
# search for similar documents

test_query = "team discussion"
results = chroma_db.similarity_search_with_score(test_query,k=5,)

for res, score in results:
    print(f"* [SIM={score:2f}] {res.page_content} [{res.metadata}]")

* [SIM=0.743619] Casual lunch with the team. [{'recurring_schedule_id': 6, 'tbl_name': 'RecurringSchedules'}]
* [SIM=0.789572] Discuss project updates and blockers. [{'schedule_id': 5, 'tbl_name': 'Schedules'}]
* [SIM=0.830579] Schedule a meeting with the finance team. [{'memo_id': 8, 'tbl_name': 'Memos'}]
* [SIM=0.875572] Discuss project requirements over lunch. [{'schedule_id': 2, 'tbl_name': 'Schedules'}]
* [SIM=0.880702] Check in with the new team member this week. [{'memo_id': 4, 'tbl_name': 'Memos'}]


In [9]:
test_query = "team discussion"
results = faiss_db.similarity_search_with_score(test_query,k=5,)
for res, score in results:
    print(f"* [SIM={score:.3f}] {res.page_content} [{res.metadata}]")

* [SIM=0.744] Casual lunch with the team. [{'recurring_schedule_id': 6, 'tbl_name': 'RecurringSchedules'}]
* [SIM=0.790] Discuss project updates and blockers. [{'schedule_id': 5, 'tbl_name': 'Schedules'}]
* [SIM=0.831] Schedule a meeting with the finance team. [{'memo_id': 8, 'tbl_name': 'Memos'}]
* [SIM=0.876] Discuss project requirements over lunch. [{'schedule_id': 2, 'tbl_name': 'Schedules'}]
* [SIM=0.881] Check in with the new team member this week. [{'memo_id': 4, 'tbl_name': 'Memos'}]
