In [1]:
from src.vectorstore import VectorstoreHandler
from src.models import init_emb, init_llm
from src.rag_tools import build_rag_chain, handle_query, create_answer_fn


# Embedding models dictionary with unique identifiers as keys
# AVAILABLE_EMBS = {y
#     # OpenAI Embeddings
#     "openai-ada-002": lambda: OpenAIEmbeddings(model="text-embedding-ada-002", openai_api_key=os.getenv("OPENAI_API_KEY")),
#     "openai-embedding-3-small": lambda: OpenAIEmbeddings(model="text-embedding-3-small", openai_api_key=os.getenv("OPENAI_API_KEY")),
#     "openai-embedding-3-large": lambda: OpenAIEmbeddings(model="text-embedding-3-large", openai_api_key=os.getenv("OPENAI_API_KEY")),
    
#     # Hugging Face Embeddings
#     "hf-mpnet-base-v2": lambda: HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"),
#     "hf-minilm-l6-v2": lambda: HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"),
#     "hf-multiqa-minilm-l6-v1": lambda: HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-MiniLM-L6-dot-v1")
# }

# # LLM mappings
# AVAILABLE_LLMS = {
#     "ChatGPT4o": "gpt-4o",
#     "ChatGPT3.5-turbo": "gpt-3.5-turbo",
#     "Llama3.2-3b": "llama3.2:3b",
# }

sources_dir = "sources"
subfolder_name = "book"
book_dir = sources_dir+"/"+subfolder_name


book_llm_name = "ChatGPT4o"
book_emb_name = "openai-embedding-3-small"

k = 10


book_emb = init_emb(book_emb_name)
book_llm = init_llm(book_llm_name)
handler = VectorstoreHandler(sources_dir, force_rebuild=False)
book_vs = handler.build_vectorstore(book_dir, book_emb, book_emb_name)
book_retriever = handler._init_retriever(book_vs, book_dir, k)
book_chain = build_rag_chain(book_retriever, book_llm)

Rebuilding vectorstore for sources/book...
Splitting text in sources/book into chunks...


Processing documents: 100%|██████████| 71/71 [00:00<00:00, 44150.57it/s]


Nr of chunks: 71
Adding 71 documents to the vectorstore...


In [None]:
test_prompt = "Explain PSO"
output = book_chain.invoke(test_prompt)
ans = output['answer']
docs = output['docs']
print(ans)
print(docs)


[Document(id='028d063d-f375-475a-9b41-6e38f9abbc5d', metadata={}, page_content='23/10/2024, 14:14 Biologically I spi ed Op imiza io  Me hods: A  I oduc io  23/10/2024, 14:14 Biologically I spi ed Op imiza io  Me hods: A  I oduc io \nh ps:// 2.vle eade .com/Reade ?ea =9781845643447# 102/285 h ps:// 2.vle eade .com/Reade ?ea =9781845643447# 104/285'), Document(id='35ed3699-afd4-4337-a44d-61bbc59b86b3', metadata={}, page_content='23/10/2024, 14:14 Biologically I spi ed Op imiza io  Me hods: A  I oduc io  23/10/2024, 14:14 Biologically I spi ed Op imiza io  Me hods: A  I oduc io \nh ps:// 2.vle eade .com/Reade ?ea =9781845643447# 142/285 h ps:// 2.vle eade .com/Reade ?ea =9781845643447# 144/285'), Document(id='aaa037ac-5d4e-4d3b-8205-0f559044e250', metadata={}, page_content='23/10/2024, 14:14 Biologically I spi ed Op imiza io  Me hods: A  I oduc io  23/10/2024, 14:14 Biologically I spi ed Op imiza io  Me hods: A  I oduc io \nh ps:// 2.vle eade .com/Reade ?ea =9781845643447# 138/285 h ps://

In [None]:
sources_dir = "sources"
subfolder_name = "lectures"
lect_dir = sources_dir+"/"+subfolder_name


lect_llm_name = "ChatGPT4o"
lect_emb_name = "openai-embedding-3-small"

k = 10


lect_emb = init_emb(lect_emb_name)
lect_llm = init_llm(lect_llm_name)
lect_vs = handler.build_vectorstore(lect_dir, lect_emb, lect_emb_name)
lect_retriever = handler._init_retriever(lect_vs, lect_dir, k)
lect_chain = build_rag_chain(lect_retriever, lect_llm)

Rebuilding vectorstore for sources/lectures...
Splitting text in sources/lectures into chunks...


Processing documents: 100%|██████████| 959/959 [06:39<00:00,  2.40it/s]


Nr of chunks: 1459
Adding 1459 documents to the vectorstore...


In [5]:
test_prompt = "Explain PSO"
output = lect_chain.invoke(test_prompt)
ans = output['answer']
docs = output['docs']
print(ans)
print(docs)


Particle Swarm Optimization (PSO) is an optimization method inspired by swarming behavior in nature, such as that observed in birds and fish. It was developed by Eberhart and Kennedy in the mid-1990s. In PSO, a group (swarm) of particles moves through the solution space of an optimization problem, adjusting their positions based on their own experience and that of their neighbors, to find the optimal solution. Each particle has a position and a velocity, which are updated iteratively. The algorithm typically focuses on minimization problems, evaluating each particle based on an objective function. PSO does not use crossover like genetic algorithms (GAs) and allows for velocity-based search, which is more directed than random changes. Suitable for various applications, particularly effective in neural network optimization, PSO is noted for its simplicity in parameter setting and usually employs a smaller population size compared to GAs.
[Document(id='531025d7-79c6-4f68-99a5-9e297cce2d5c

Particle Swarm Optimization (PSO) is an optimization method inspired by the behavior of swarms, such as groups of birds or fish. Developed by Eberhart and Kennedy in the mid-1990s, PSO is particularly used for solving optimization problems within a search space defined by an objective function. 

Key characteristics of PSO include:

1. **Particle Representation**: Individuals in the PSO are referred to as particles. Each particle represents a potential solution and defines a point in an n-dimensional search space.

2. **Velocity and Position**: Unlike genetic algorithms (GAs), PSO incorporates the notion of velocity. Particles have velocities which direct their movement through the search space. Positions and velocities are initially randomized, allowing for both positive and negative values.

3. **Objective Function Evaluation**: The position of each particle is evaluated using the objective function \( f(x_i) \), with the aim to minimize (or maximize by reversing inequalities) the fu