In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size=512,
    chunk_overlap=20,
    length_function=len,
    is_separator_regex=False,
)

In [None]:
from langchain_community.document_loaders import WikipediaLoader

docs = WikipediaLoader(query="Leonardo Di Caprio").load_and_split(text_splitter = text_splitter )

In [None]:
# List to hold the content of each document
doc_contents = [doc.page_content for doc in docs]

# List to hold the IDs for each document
doc_ids = list(range(1, len(docs) + 1))

In [43]:
doc_contents[0]

"Leonardo Wilhelm DiCaprio (; Italian: [diˈkaːprjo]; born November 11, 1974) is an American actor and film producer. Known for his work in biographical and period films, he is the recipient of numerous accolades, including an Academy Award, a British Academy Film Award, and three Golden Globe Awards. As of 2019, his films have grossed over $7.2 billion worldwide, and he has been placed eight times in annual rankings of the world's highest-paid actors."

In [None]:
from qdrant_client import QdrantClient

# Initialize the client
client = QdrantClient(":memory:")

client.add(
    collection_name="leo_collection",
    documents=doc_contents,
    ids=doc_ids,
)

In [15]:
from dspy.retrieve.qdrant_rm import QdrantRM
import dspy

qdrant_retriever_model = QdrantRM("leo_collection", client, k=10)

ollama_model = dspy.OllamaLocal(model="llama3",model_type='text',
                                max_tokens=350,
                                temperature=0.1,
                                top_p=0.8, frequency_penalty=1.17, top_k=40)

dspy.settings.configure(lm= ollama_model, rm=qdrant_retriever_model)

In [41]:
import dspy
def get_top_passages(question):
    retrieve = dspy.Retrieve(k=10)
    topK_passages = retrieve(question,k=10).passages
    print(f"Top {retrieve.k} passages for question: {question} \n", '-' * 30, '\n')
    for idx, passage in enumerate(topK_passages):
        print(f'{idx+1}]', passage, '\n')

In [42]:
get_top_passages("Where was Leo born?")

Top 10 passages for question: Where was Leo born? 
 ------------------------------ 

1] == Early life == 

2] == Early life and acting background == 

3] == Biography ==
DiCaprio was born to George Leon DiCaprio and Olga Anne Jacobs. His father was the son of Italian immigrants, Salvatore Di Caprio and Rosina Cassella, and his mother was of German descent. 

4] Leonardo Wilhelm DiCaprio was born on November 11, 1974, in Los Angeles, California. He is the only child of Irmelin Indenbirken, a legal secretary, and George DiCaprio, an underground comix artist and distributor; they met while attending college and moved to Los Angeles after graduating. His mother is German and his father is of Italian and German descent. His maternal grandfather, Wilhelm Indenbirken, was German, and his maternal grandmother, Helene Indenbirken, was a Russian immigrant living in 

5] DiCaprio's parents named him Leonardo because his pregnant mother first felt him kick while she was looking at a Leonardo da Vi

In [35]:
class GenerateAnswer(dspy.Signature):
    """Answer questions with short factoid answers."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    answer = dspy.OutputField()

In [30]:
class GenerateSearchQuery(dspy.Signature):
    """Write a simple search query that will help answer a complex question."""

    context = dspy.InputField(desc="may contain relevant facts")
    question = dspy.InputField()
    query = dspy.OutputField()

In [31]:
from dsp.utils import deduplicate

class SimplifiedBaleen(dspy.Module):
    def __init__(self, passages_per_hop=3, max_hops=2):
        super().__init__()

        self.generate_query = [dspy.ChainOfThought(GenerateSearchQuery) for _ in range(max_hops)]
        self.retrieve = dspy.Retrieve(k=passages_per_hop)
        self.generate_answer = dspy.ChainOfThought(GenerateAnswer)
        self.max_hops = max_hops

    def forward(self, question):
        context = []

        for hop in range(self.max_hops):
            query = self.generate_query[hop](context=context, question=question).query
            passages = self.retrieve(query).passages
            context = deduplicate(context + passages)

        pred = self.generate_answer(context=context, question=question)
        return dspy.Prediction(context=context, answer=pred.answer)

In [39]:
# Ask any question you like to this simple RAG program.
my_question = "Give me all the co-actors of Leonardo Di Caprio in the movie in which one of his co-stars was Robert De Niro?"

# Get the prediction. This contains `pred.context` and `pred.answer`.
uncompiled_baleen = SimplifiedBaleen()  # uncompiled (i.e., zero-shot) program
pred = uncompiled_baleen(my_question)

# Print the contexts and the answer.
print(f"Question: {my_question}")
print(f"Predicted Answer: {pred.answer}")
print(f"Retrieved Contexts (truncated): {[c[:200] + '...' for c in pred.context]}")

Question: Give me all the co-actors of Leonardo Di Caprio in the movie in which one of his co-stars was Robert De Niro?
Predicted Answer: Jesse Plemons, Tantoo Cardinal, John Lithgow, and Brendan Fraser.
Retrieved Contexts (truncated): ['Leonardo DiCaprio, Robert De Niro, and Lily Gladstone lead an ensemble cast, also including Jesse Plemons, Tantoo Cardinal, John Lithgow, and Brendan Fraser. It is the sixth feature film collaboration...', '== Film ==\n\n\n=== As actor ===\n\n\n=== As producer ===\n\n\n== Television ==\n\n\n== Notes ==\n\n\n== See also ==\nList of awards and nominations received by Leonardo DiCaprio\n\n\n== References ==\n\n\n== External link...', "The duo's films have been nominated for thirty-one Academy Awards, winning nine. In 2013, the duo was awarded National Board of Review Spotlight award for career collaboration. Scorsese's work with Di...", '== References ==\n\n\n== External links ==\nGeorge DiCaprio at IMDb...']


In [38]:
ollama_model.inspect_history(n=3)




Write a simple search query that will help answer a complex question.

---

Follow the following format.

Context: may contain relevant facts

Question: ${question}

Reasoning: Let's think step by step in order to ${produce the query}. We ...

Query: ${query}

---

Context: N/A

Question: Give me all the co-stars of Leonardo Di Caprio in the movie in which one of his co-stars was Robert De Niro?

Reasoning: Let's think step by step in order to[32m Here is a simple search query that can help answer this complex question:

---

Context: N/A

Question: Give me all the co-stars of Leonardo DiCaprio in the movie in which one of his co-stars was Robert De Niro?

Reasoning: Let's think step by step in order to produce the query. We need to find a movie where both Leonardo DiCaprio and Robert De Niro appeared, then extract their co-stars from that specific film.

Query: (Leonardo DiCaprio) AND (Robert De Niro) AND ("co-star" OR "cast list") -site:imdb.com

This query uses the following sea

'\n\n\nWrite a simple search query that will help answer a complex question.\n\n---\n\nFollow the following format.\n\nContext: may contain relevant facts\n\nQuestion: ${question}\n\nReasoning: Let\'s think step by step in order to ${produce the query}. We ...\n\nQuery: ${query}\n\n---\n\nContext: N/A\n\nQuestion: Give me all the co-stars of Leonardo Di Caprio in the movie in which one of his co-stars was Robert De Niro?\n\nReasoning: Let\'s think step by step in order to\x1b[32m Here is a simple search query that can help answer this complex question:\n\n---\n\nContext: N/A\n\nQuestion: Give me all the co-stars of Leonardo DiCaprio in the movie in which one of his co-stars was Robert De Niro?\n\nReasoning: Let\'s think step by step in order to produce the query. We need to find a movie where both Leonardo DiCaprio and Robert De Niro appeared, then extract their co-stars from that specific film.\n\nQuery: (Leonardo DiCaprio) AND (Robert De Niro) AND ("co-star" OR "cast list") -site:imd