In [None]:
! pip install -U dspy

In [1]:
from settings import *
import dspy
import json

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Anthropic
# lm = dspy.LM('anthropic/claude-3-opus-20240229', api_key='#######')
# dspy.configure(lm=lm)

In [6]:
# Local Ollama
from litellm import completion

llama = dspy.LM(
    "ollama_chat/localllama",
    model_type="chat", 
    temperature=TEMPRATURE,
    api_base="http://localhost:11434"#"http://localhost:8080"#,
)
dspy.configure(lm=llama)

In [None]:
# Local LLAMA CPP (DEPRECATED: Has issues)

# REF-1: LLAMA CPP: https://dspy.ai/deep-dive/language_model_clients/lm_local_models/LlamaCpp/?h=l
# REF-2: GENERAL RAG:  https://dspy.ai/tutorials/rag/#configuring-the-dspy-environment 
# REF-3: https://github.com/stanfordnlp/dspy/blob/main/examples/migration.ipynb 

# from llama_cpp import Llama


# llm = Llama(
#     temperature=0.2,
#     model_path=MODEL_PATH,
#     n_ctx=(N_DOCS_RETREIVE+2)*CHUNK_SIZE,
#     n_gpu_layers=-1,
#     n_batch=32,  # Should be between 1 and n_ctx, consider the amount of VRAM in your GPU.
#     max_tokens=4098,
#     top_p=1,
#     verbose=False
# )

# llamalm = dspy.LlamaCpp(model="llama", llama_model=llm,  model_type="chat", temperature=TEMPRATURE)
# dspy.configure(lm=llamalm)

In [None]:
[{"role":"system", "content":"You are pokemon"}, {"role":"user", "content": "Who are you ?"}]

In [7]:
llama(messages=[
    #{"role":"system", "content":"You are pokemon pickachu who ends every conversation with PIKA PIKA!"},
    {"role":"user", "content": "What is Gen-AI?"}
])

[" Gen-AI stands for General Artificial Intelligence. It refers to the development of artificial intelligence systems that are capable of learning, reasoning, and problem-solving in a way that is indistinguishable from human intelligence.\n\nGen-AI aims to create intelligent machines that can perform tasks such as:\n\n* Reasoning: making decisions based on data and logic\n* Problem-solving: finding solutions to complex problems\n* Learning: adapting to new information and situations\n* Perception: understanding the world through sensory inputs\n\nThe development of Gen-AI has been driven by advances in machine learning, natural language processing, computer vision, and robotics. These technologies have enabled researchers and developers to create intelligent systems that can interact with humans in more natural and intuitive ways.\n\nSome potential applications of Gen-AI include:\n\n* Virtual assistants: AI-powered virtual assistants like Siri, Alexa, and Google Assistant\n* Self-drivi

In [8]:
# Loading Data
with open(QUESTION_FILE) as q_f:
    questions = json.load(q_f)

# Mapping To DSPy Mapping
questions = [{'question': q['question'], 'response': q['answer']} for q in questions]
print(f"Data {len(questions)} is loaded.")

Data 48 is loaded.


In [9]:
# Preparing Data
import random

data = [dspy.Example(**q).with_inputs('question') for q in questions]
# Splitting Data Train, Dev=2*Train, Test=2*Train
n_data = len(data)
n_train, n_test = int(n_data/5), 2*int(n_data/5)
n_dev = n_data - n_train - n_test

random.Random(0).shuffle(data)
trainset, devset, testset = data[:n_train], data[n_train:n_train+n_test], data[n_train+n_test:n_data]
print(f"Divided {n_data} to \n Train:{len(trainset)} Test: {len(testset)} Dev:{len(devset)}")

Divided 48 to 
 Train:9 Test: 21 Dev:18


In [10]:
# Loading the vector_store of Langchain
from langchain_community.vectorstores import FAISS
from utils import LlamaCppEmbeddingsFix

embeding = LlamaCppEmbeddingsFix(
    model_path=MODEL_PATH, verbose=False,
    n_ctx=(N_DOCS_RETREIVE+2)*CHUNK_SIZE,
)

vector_store = FAISS.load_local(
    VECTOR_STORE,
    embeddings=embeding,
    allow_dangerous_deserialization=True
)
vector_store.load_local(VECTOR_STORE, embeddings=embeding, allow_dangerous_deserialization=True)
print(f"Loaded {vector_store.index.ntotal} Documents")

retriever = vector_store.as_retriever(
    search_type=SEARCH_METHOD, 
    search_kwargs={
    'k': N_DOCS_RETREIVE,
    'score_threshold': SCORE_THRESH
})

llama_new_context_with_model: n_ctx_per_seq (12000) < n_ctx_train (131072) -- the full capacity of the model will not be utilized


Loaded 17 Documents


In [11]:
from dspy.evaluate import SemanticF1

# RAG Based
def search(question):
    docs = retriever.invoke(question)
    return "".join(f"Page {d.metadata['page']}\n"+d.page_content for d in docs)

class RAG(dspy.Module):
    def __init__(self):
        self.respond = dspy.ChainOfThought('context, question -> response')

    def forward(self, question):
        context = search(question)
        #print("Context Created", context)
        return self.respond(context=context, question=question)

metric = SemanticF1(decompositional=True)
rag = RAG()
evaluate = dspy.Evaluate(devset=devset, metric=metric, num_threads=1, display_progress=True, display_table=2)

In [12]:
# Single Test Run
example = trainset[0]
pred = rag(question=example.question)
score = metric(example, pred)

print("####### Example ######### \n", example)
print("############ Pred ############## \n", pred)
print("############ Score ############## \n", score)

ValueError: Expected dict_keys(['reasoning', 'ground_truth_key_ideas', 'system_response_key_ideas', 'discussion', 'recall', 'precision']) but got dict_keys([])

In [None]:
dspy.inspect_history()

In [None]:
# Raw Performance
evaluate(rag)

In [None]:
# Optimize RAG
tp = dspy.MIPROv2(metric=metric, auto="medium", num_threads=1)

optimized_rag = tp.compile(RAG(), trainset=trainset,
                           max_bootstrapped_demos=2, max_labeled_demos=2,
                           requires_permission_to_run=False)

In [None]:
evaluate(optimized_rag)