#### 1- Install or Load Necessary Libraries

In [None]:
from langchain_openai import AzureChatOpenAI
from langchain.schema import SystemMessage, HumanMessage
import os
from dotenv import load_dotenv
load_dotenv()

In [None]:
import os
import numpy as np
from langchain.text_splitter import RecursiveCharacterTextSplitter
import faiss
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2') # Load the model

#### 2 - Load and Chunk Text

In [None]:
def load_and_chunk_text(file_path, chunk_size=250, chunk_overlap=50):
    # Load the text file
    with open(file_path, 'r') as file:
        content = file.read()
    # Initialize the text splitter with desired parameters
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,  # Set the chunk size to 250 tokens
        chunk_overlap=chunk_overlap  # Set the chunk overlap to 50 tokens
    )
    # Split the document into chunks
    chunks = text_splitter.split_text(content)
    print(f"Total chunks created: {len(chunks)}")
    return chunks

In [None]:
# Load and chunk the text
file_path = "healthy_nutrition_info.txt"
chunks = load_and_chunk_text(file_path)

#### 3- Generate Embeddings

In [None]:
# Generate embeddings for each chunk
embeddings = model.encode(chunks)
 
# Create a FAISS index and add embeddings
def create_faiss_index(embeddings):
    embeddings = np.array(embeddings)
    index = faiss.IndexFlatL2(embeddings.shape[1])  # Using L2 distance
    index.add(embeddings)
    faiss.write_index(index, 'faiss_index.index')
 
create_faiss_index(embeddings)

#### 4- Query FAISS Index and Retrieve Chunks

In [None]:
def query_faiss_index(query, model, chunks, index_file='faiss_index.index', top_k=5):
    index = faiss.read_index(index_file)  # Load the FAISS index
    query_embedding = model.encode([query])[0] # Generate the embedding for the query
    D, I = index.search(np.array([query_embedding]), k=top_k)  # Search for the top k closest chunks
    results = [chunks[i] for i in I[0]]   # Retrieve the top k chunks
    return results

#### 5- Demo of a User Query and Top 5 Chunks

In [None]:
def get_similar_chunks(query, model, chunks):
    return query_faiss_index(query, model, chunks)

In [None]:
query = "What is healthy nutrition?"
top_chunks = get_similar_chunks(query, model, chunks)
top_chunks

#### 6-Use Chat GPT to Create an Answer to a User's Query Based on Top 5 Similar Chunks

In [None]:
#Add API settings; API type; OpenAI object here

In [None]:
#create content message based on query and top chunks
content_message = f"I have a query: {query}\n\n  Here are 5 chunks of related information:\n"

for i, chunk in enumerate(top_chunks):
    content_message += f"Chunk {i+1}: {chunk}\n\n"

content_message += "Please provide an answer based solely on these chunks."
content_message

In [None]:
# Define the messages
messages = [
    SystemMessage(
        content=(
            "You are a friendly, polite and helpful AI assistant. Answer the query based only and only on the provided chunks."
        )
    ),
    HumanMessage(content= content_message)
]

In [None]:
# Invoke the model
res = llm.invoke(messages)

In [None]:
# print the response
print("Final Answer:")
print(res.content)