In [1]:
%reset -f
%pip install -qU chromadb llama-index-core llama-index-readers-file llama-index-llms-ollama llama-index-embeddings-huggingface llama-index-graph-stores-neo4j graspologic numpy==1.24.4 scipy==1.12.0

Note: you may need to restart the kernel to use updated packages.


In [2]:
import chromadb

from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.llms.ollama import Ollama
from llama_index.core import Settings, PromptTemplate, VectorStoreIndex, StorageContext, SimpleDirectoryReader, VectorStoreIndex, get_response_synthesizer
from llama_index.core.llms import MessageRole
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.chat_engine import ContextChatEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.storage.chat_store import SimpleChatStore
from llama_index.vector_stores.chroma import ChromaVectorStore

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
COLLECTION = "chatbot"
DOCS_PATH = "./data/docs"
PERSISTENT_PATH = "./data/vector_store"

# Control sentence splitting when transform
sentence_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=10)

# Local LLM
llm = Ollama(model="llama3.2", request_timeout=60.0)

# Embed model
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5", device="mps")

# Global settings
Settings.llm = llm
Settings.embed_model = embed_model

db = chromadb.PersistentClient(path=PERSISTENT_PATH)
collection = db.get_or_create_collection(COLLECTION)
vector_store = ChromaVectorStore(chroma_collection=collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    storage_context=storage_context
)

retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

query_engine = RetrieverQueryEngine(
    retriever=retriever
)

'''
Data Loading (Data Ingestion)
Three stages of data ingestion
- load
- transform & indexing
- store
'''
def ingest():
    # Load
    # Create documents out of every single files from ./data directory
    # Support Markdown, PDFs, Word, Powerpoint, Images, Audio, Video
    documents = SimpleDirectoryReader(DOCS_PATH).load_data()
    # [print(f'Data ingestion {doc}\n') for doc in documents]

    # Index & Store
    # Indexing is expensive operation, to avoid re-indexing documents, persist indexed documents into Vector Store
    # Build index over these objects for later query to feed into LLM
    db = chromadb.PersistentClient(path=PERSISTENT_PATH)
    collection = db.get_or_create_collection(COLLECTION)
    vector_store = ChromaVectorStore(chroma_collection=collection)
    storage_context = StorageContext.from_defaults(vector_store=vector_store)

    # Transform & Index
    # Process & transform data before saving into storage system
    # from_documents method receives an array of Document, parse & chunk them up
    VectorStoreIndex.from_documents(
        documents, 
        transformations=[sentence_splitter],
        storage_context=storage_context,
    )

'''
Retrieve relevant document from vector store
'''
def retrieve(question):
    resp = query_engine.query(question)
    return resp

In [4]:
# ingest()

In [5]:
question = "Tell me about Truc Nguyen ?"

# template = (
#     "We have provided context information below. \n"
#     "--------------------------------------------\n"
#     "{context_str}"
#     "\n-------------------------------------------\n"
#     "Given this information, please answer the question {query_str}\n"
#     "Make the answer short and concise, say dont know if you dont know, do not make up the answer\n"
# )

# qa_template = PromptTemplate(template)

# context_str = retrieve(question)
# question_str = question
# prompt = qa_template.format(context_str=context_str, query_str=question_str)

In [12]:
question = "What programming languages Truc knows ?"

memory = ChatMemoryBuffer.from_defaults(
    token_limit=3900,
)

chat_engine = index.as_chat_engine(
    chat_mode="context",
    memory=memory,
    llm=llm,
    verbose=False,
    system_prompt=(
        "You are a chatbot, able to have normal interactions, as well as talk\n"
        "Make the response in Markdown format\n"
    )
)
resp = chat_engine.stream_chat(question)

for chunk in resp.response_gen:
    print(chunk, end="")

# Programming Languages Known by Truc

Truc is proficient in the following programming languages:

*   C#
*   JavaScript (ES)/Typescript
*   Dart
*   SQL

These languages are used for both backend and frontend development, as well as other applications.