In [1]:
import pandas as pd
import numpy as np
import json
import os
import re

from ollama import chat
from ollama import ChatResponse

In [2]:
MODEL_NAME = 'phi4:14b'

# The embeddings and the dataframe created and saved in Part 1
PATH_TO_EMBEDS = 'compressed_array.npz'
PATH_TO_DF = 'compressed_dataframe.csv.gz'

# Define the API clients

In [3]:
from dotenv import load_dotenv
import os

load_dotenv(os.path.expanduser("~/Desktop/dot-env-api-keys/my-api-keys.env"))

TAVILY_API_KEY = os.getenv("TAVILY_API_KEY")

In [4]:
# Tavily web search

from tavily import TavilyClient

tavily_client = TavilyClient(api_key=TAVILY_API_KEY)

# Helper functions

In [5]:
def initialize_message_history(system_message):

    message_history = [
                        {
                            "role": "system",
                            "content": system_message
                        }
                    ]

    return message_history

In [6]:
def create_message_history(system_message, user_input):

    """
    Create a message history messages list.
    Args:
        system_message (str): The system message
        user_query (str): The user input
    Returns:
        A list of dicts in OpenAi chat format
    """

    message_history = [
                        {
                            "role": "system",
                            "content": system_message
                        },
                        {
                            "role": "user",
                            "content": user_input
                        }
                    ]

    return message_history



In [7]:
def initialize_message_history(system_message):

    """
    Create a message history messages list.
    Args:
        system_message (str): The system message
        user_query (str): The user input
    Returns:
        A list of dicts in OpenAi chat format
    """

    message_history = [
                        {
                            "role": "system",
                            "content": system_message
                        }
                    ]

    return message_history



In [8]:
# Function to separate the thinking and the response

def process_response(text):
    
    text1 = text.split('</think>')[0]
    text2 = text.split('</think>')[1]
    
    thinking_text = text1 + '</think>'
    response_text = text2.strip()

    return thinking_text, response_text

## System message

In [9]:
chat_agent_system_message = f"""
You are a friendly and helpful research assistant named Serena.

Your knowledge cutoff: August 2024
Current date: August 2025

1. You provide polite answers to simple questions.
If the user's input requires only a simple answer, then output your answer as JSON.

Example session:

Question: Hello. How are you?

You output:

{{
"Answer": "I'm fine, thanks.",
"Status": "DONE"
}}

2. You can also run in a loop of Thought, Action, PAUSE, Observation.
At the end of the loop, you output an Answer.
Use Thought to describe your thoughts about the question you have been asked.
Use Action to run one of the actions available to you - then return PAUSE.
Observation will be the result of running those actions.
Output your response as a JSON string.

Your available actions are:

find_arxiv_research_papers:
e.g. find_arxiv_research_papers: [list of search keywords and phrases for a RAG search of the ArXiv database.]
Returns research papers from the ArXiv database.

run_web_search:
e.g. run_web_search: [list of search keywords and phrases for a web search]
Returns text content from search results.

You can only call one action at a time.

Example session:

Question: What are the latest techniques for detecting Pneumonia on x-rays using AI?
{{
"Thought": "I should look for relevant research papers in the ArXiv database by using find_arxiv_research_papers.",
"Action": {{"function":"find_arxiv_research_papers", "input": ["Pneumonia detection with AI", "Computer vision", "Object detection"]}},
"Status": "PAUSE"
}}

You will be called again with this:

Observation: <results>A list of research papers and their content</results>

You then output:
{{
"Answer": "Your final report.",
"Status": "DONE"
}}

ALWAYS check your responses to ensure that all required JSON keys and values have been included.
""".strip()


# Set up the LLM

In [10]:
def make_llm_api_call(message_history):

    model_name = MODEL_NAME

    response: ChatResponse = chat(model=model_name, 
                                  messages=message_history,
                                )

    output_text = response['message']['content']

    #thinking_text, response_text = process_response(output_text)

    return output_text


# Example

system_message = "Your name is Molly."
user_message = "What's your name?"

message_history = create_message_history(system_message, user_message)

response_text = make_llm_api_call(message_history)

print(response_text)

#print(response['message']['content'])

My name is Molly! How can I assist you today? 😊


## Set up the tools



### ArXiv RAG search tool

In [11]:
def run_faiss_search(query_text, top_k):
    
    # Run FAISS exhaustive search
    
    query = [query_text]

    # Vectorize the query string
    query_embedding = sent_model.encode(query)

    # Run the query
    # index_vals refers to the chunk_list index values
    scores, index_vals = faiss_index.search(query_embedding, top_k)
    
    # Get the list of index vals
    index_vals_list = index_vals[0]
    
    return index_vals_list
    

def run_rerank(index_vals_list, query_text):
    
    chunk_list = list(df_data['prepared_text'])

    # Replace the chunk index values with the corresponding strings
    pred_strings_list = [chunk_list[item] for item in index_vals_list]

    # Format the input for the cross encoder
    # The input to the cross_encoder is a list of lists
    # [[query_text, pred_text1], [query_text, pred_text2], ...]

    cross_input_list = []

    for item in pred_strings_list:

        new_list = [query_text, item]

        cross_input_list.append(new_list)


    # Put the pred text into a dataframe
    df = pd.DataFrame(cross_input_list, columns=['query_text', 'pred_text'])

    # Save the orginal index (i.e. df_data index values)
    df['original_index'] = index_vals_list

    # Now, score all retrieved passages using the cross_encoder
    cross_scores = cross_encoder.predict(cross_input_list)

    # Add the scores to the dataframe
    df['cross_scores'] = cross_scores

    # Sort the DataFrame in descending order based on the scores
    df_sorted = df.sort_values(by='cross_scores', ascending=False)
    
    # Reset the index (*This was missed previously*)
    df_sorted = df_sorted.reset_index(drop=True)

    pred_list = []

    for i in range(0,len(df_sorted)):

        text = df_sorted.loc[i, 'pred_text']

        # Get the arxiv id
        # original_index refers to the index values in df_filtered
        original_index = df_sorted.loc[i, 'original_index']
        arxiv_id = df_data.loc[original_index, 'id']
        cat_text = df_data.loc[original_index, 'cat_text']
        title = df_data.loc[original_index, 'title']

        # Crete the link to the research paper pdf
        link_to_pdf = f'https://arxiv.org/pdf/{arxiv_id}'

        item = {
            'arxiv_id': arxiv_id,
            'link_to_pdf': link_to_pdf,
            'cat_text': cat_text,
            'title': title,
            'abstract': text
        }

        pred_list.append(item)

    return pred_list


def print_search_results(pred_list, num_results_to_print):
    
    for i in range(0,num_results_to_print):
        
        pred_dict = pred_list[i]
        
        link_to_pdf = pred_dict['link_to_pdf']
        abstract = pred_dict['abstract']
        cat_text = pred_dict['cat_text']
        title = pred_dict['title']

        print('Title:',title)
        print('Categories:',cat_text)
        print('Abstract:',abstract)
        print('Link to pdf:',link_to_pdf)
        print()
    
   
def run_arxiv_search(query_text, top_k=50):
    
    # Run a faiss greedy search
    pred_index_list = run_faiss_search(query_text, top_k)

    # This returns a list of dicts with length equal to top_k
    pred_list = run_rerank(pred_index_list, query_text)
    
    # Print the results
    #print_search_results(pred_list, num_results_to_print)
    
    return pred_list
    

In [12]:
# Load the compressed array
embeddings = np.load(PATH_TO_EMBEDS)

# Access the array by the name you specified ('my_array' in this case)
embeddings = embeddings['array_data']

embeddings.shape

(2421966, 384)

In [13]:
# Load the compressed DataFrame

df_data = pd.read_csv(PATH_TO_DF, compression='gzip')

print(df_data.shape)

df_data.head()

  df_data = pd.read_csv(PATH_TO_DF, compression='gzip')


(2421966, 6)


Unnamed: 0,id,title,abstract,categories,cat_text,prepared_text
0,704.0001,Calculation of prompt diphoton production cros...,A fully differential calculation in perturbati...,hep-ph,High Energy Physics - Phenomenology,Calculation of prompt diphoton production cros...
1,704.0002,Sparsity-certifying Graph Decompositions,"We describe a new algorithm, the $(k,\ell)$-pe...",math.CO cs.CG,"Combinatorics, Computational Geometry",Sparsity-certifying Graph Decompositions {titl...
2,704.0003,The evolution of the Earth-Moon system based o...,The evolution of Earth-Moon system is describe...,physics.gen-ph,General Physics,The evolution of the Earth-Moon system based o...
3,704.0004,A determinant of Stirling cycle numbers counts...,We show that a determinant of Stirling cycle n...,math.CO,Combinatorics,A determinant of Stirling cycle numbers counts...
4,704.0005,From dyadic $\Lambda_{\alpha}$ to $\Lambda_{\a...,In this paper we show how to compute the $\Lam...,math.CA math.FA,"Classical Analysis and ODEs, Functional Analysis",From dyadic $\Lambda_{\alpha}$ to $\Lambda_{\a...


In [14]:
# Initialize FAISS

import faiss

embed_length = embeddings.shape[1]

faiss_index = faiss.IndexFlatL2(embed_length)

# Add the embeddings to the index
faiss_index.add(embeddings)

faiss_index.is_trained

True

In [15]:
# Initialize sentence_transformers

from sentence_transformers import SentenceTransformer

sent_model = SentenceTransformer("all-MiniLM-L6-v2")

In [16]:
# Initialize the cross_encoder for reranking

from sentence_transformers import CrossEncoder

# We use a cross-encoder, to re-rank the results list to improve the quality
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

In [17]:
# Example

query_text = """
I want to build an invisibility cloak like the one in Harry Potter.
"""


# RUN THE SEARCH
num_results_to_print = 20 # top_k = 300
pred_list = run_arxiv_search(query_text, top_k=5)

In [18]:
pred_list[0]

{'arxiv_id': 1101.0904,
 'link_to_pdf': 'https://arxiv.org/pdf/1101.0904',
 'cat_text': 'Classical Physics',
 'title': "Harry Potter's Cloak",
 'abstract': 'Harry Potter\'s Cloak {title} The magic "Harry Potter\'s cloak" has been the dream of human beings for really long time. Recently, transformation optics inspired from the advent of metamaterials offers great versatility for manipulating wave propagation at will to create amazing illusion effects. In the present work, we proposed a novel transformation recipe, in which the cloaking shell somehow behaves like a "cloaking lens", to provide almost all desired features one can expect for a real magic cloak. The most exciting feature of the current recipe is that an object with arbitrary characteristics (e.g., size, shape or material properties) can be invisibilized perfectly with positive-index materials, which significantly benefits the practical realization of a broad-band cloaking device fabricated with existing materials. Moreover, 

### Tavily web search

In [19]:
def run_tavily_search(query, num_results=10):

    """
    Uses the Tavily API to run a web search
    Args:
        query (str): The user query
        num_results (int): Num search results
    Returns:
        tav_response (json string): The search results in json format
    """

    # For basic search:
    tav_response = tavily_client.search(query=query, max_results=num_results)

    return tav_response


# Example

query = "Who is the current UK Prime Minister?"

results = run_tavily_search(query, num_results=2)

print(results)

{'query': 'Who is the current UK Prime Minister?', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://www.parliament.uk/site-information/glossary/prime-minister/', 'title': 'Prime Minister - UK Parliament', 'content': '... leader of the Government. He or she is the leader of the party that wins the most seats at a general election. The current Prime Minister is David Cameron.', 'score': 0.8808076, 'raw_content': None}, {'url': 'https://en.wikipedia.org/wiki/Prime_Minister_of_the_United_Kingdom', 'title': 'Prime Minister of the United Kingdom - Wikipedia', 'content': "Prime Minister of the United Kingdom ; Incumbent Keir Starmer. since 5 July 2024 ; Government of the United Kingdom · Prime Minister's Office", 'score': 0.87782305, 'raw_content': None}], 'response_time': 1.74, 'request_id': 'a2663d0d-41e2-4bcc-8f13-6b4e406bcc48'}


# Set up the Agents

In [20]:
def run_chat_agent(message_history):

    print("---CHAT AGENT---")

    # Prompt the llm
    response = make_llm_api_call(message_history)

    response = response.replace('```json', '')
    response = response.replace('```', '')
    response = response.strip()

    return response


# Example

user_message = "Hello"

message_history = create_message_history(chat_agent_system_message, user_message)

response_text = run_chat_agent(message_history)

print(response_text)

#print(response['message']['content'])

---CHAT AGENT---
{
  "Answer": "Hi there! I'm fine, thanks for asking. How can I assist you today?",
  "Status": "DONE"
}


In [21]:
def run_router_agent(llm_response):

    """
    Route to web search or not.
    Args:
        state (dict): The current graph state
    Returns:
        str: Next node to call
    """

    print("---ROUTER AGENT---")

    # Extract the status
    json_response = json.loads(llm_response)
    status = json_response['Status']
    
    print("Status:", status)

    if status == 'PAUSE':
        print("Route: to_research_agent")
        return "to_research_agent"

    else:
        print("Route: to_final_answer")
        return "to_final_answer"
            



# Example

user_message = "hello"

message_history = create_message_history(chat_agent_system_message, user_message)

# Prompt the chat_agent
response = run_chat_agent(message_history)

# Run router_agent
route = run_router_agent(response)

print(route)

---CHAT AGENT---
---ROUTER AGENT---
Status: DONE
Route: to_final_answer
to_final_answer


In [22]:
def run_research_agent(llm_response):

    print("---RESEARCH AGENT---")

    # Extract the status
    json_response = json.loads(llm_response)
    action_dict = json_response['Action']
    func_to_run = action_dict['function']
    func_input_list = action_dict['input']
    
    answer_list = []

    if func_to_run == "find_arxiv_research_papers":
        for search_query in func_input_list:
            answer = run_arxiv_search(search_query, top_k=5)
            answer_list.append(answer)
    else:
        for search_query in func_input_list:
            answer = run_tavily_search(search_query, num_results=5)
            answer_list.append(answer)

    print("func_to_run:", func_to_run)
    print("func_arg:", func_input_list)
    print("Output:", answer_list)

    return answer_list



# Example

user_message = "Has OpenAi released any new open source models?"

message_history = create_message_history(chat_agent_system_message, user_message)

# Prompt the chat_agent
response = run_chat_agent(message_history)

# Run router_agent
route = run_router_agent(response)


if route == "to_research_agent":
    answer = run_research_agent(response)

    # Update message history
    #message = {"role": "user", "content": f"Observation: {answer}"}
    #message_history.append(message)



---CHAT AGENT---
---ROUTER AGENT---
Status: PAUSE
Route: to_research_agent
---RESEARCH AGENT---
func_to_run: run_web_search
func_arg: ['OpenAI new open source models', 'OpenAI model releases']
Output: [{'query': 'OpenAI new open source models', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://www.seangoedecke.com/gpt-oss-is-phi-5/', 'title': "OpenAI's new open-source model is basically Phi-5", 'content': "OpenAI's new open-source model is basically Phi-5 OpenAI's new open-source model is basically Phi-5 ### Phi models and training on synthetic data The big idea behind those models was to train exclusively on synthetic data: instead of text pulled from books or the internet, text generated by other language models or hand-curated textbooks. But since you’re “teaching for the test”, you should expect to do worse than other language models who are training on broad data and end up being good at the benchmarks by accident. Why would OpenAI train Phi-s

In [23]:
def check_output_type(output):
    try:
        json.loads(output)
        return "is_json"
    except json.JSONDecodeError:
        return "is_plain_text"

In [24]:
def run_final_answer_agent(llm_response):

    # Check if the output is JSON
    output_type = check_output_type(llm_response)

    print("---FINAL ANSWER AGENT---")

    if output_type == 'is_json':

        json_response = json.loads(llm_response)
        final_answer = json_response['Answer']
    
        print("Final answer:", final_answer)

    # The model has ouput plain text that's
    # not being formatted as per the system message.
    else:
        print(llm_response)


# Run the system

In [25]:

user_input = "What is the current year?"

message_history = create_message_history(chat_agent_system_message, user_input)

for i in range(0,10):

    # Prompt the chat_agent
    llm_response = run_chat_agent(message_history)
    
    # Update message history
    message = {"role": "assistant", "content": llm_response}
    message_history.append(message)

    # Run router_agent
    route = run_router_agent(llm_response)


    if route == "to_research_agent":

        answer = run_research_agent(llm_response)
        
        user_input = f"Observation: {answer}"
        message = {"role": "user", "content": user_input}
        message_history.append(message)

    else:

        run_final_answer_agent(llm_response)

        break



---CHAT AGENT---
---ROUTER AGENT---
Status: DONE
Route: to_final_answer
---FINAL ANSWER AGENT---
Final answer: The current year is 2025.


# Run a chat loop

In [26]:
# NOTES
# 1. The model thinking is being printed out.


# Initialize the message history
message_history = initialize_message_history(chat_agent_system_message)

while True:

    print()
    print("==========")
    user_input = input("Enter something (or 'q' to quit): ")
    print("==========")

    # Update message history
    message = {"role": "user", "content": user_input}
    message_history.append(message)
    
    i = i + 1

    if user_input.lower() == 'q':
        print("Exiting the loop. Goodbye!")
        break  # Exit the loop


    for i in range(0,10):
           
        llm_response = run_chat_agent(message_history)
        
        # Update message history
        message = {"role": "assistant", "content": llm_response}
        message_history.append(message)

        # Run router_agent
        route = run_router_agent(llm_response)
            

        if route == "to_research_agent":

            answer = run_research_agent(llm_response)
            
            user_input = f"Observation: {answer}"
            message = {"role": "user", "content": user_input}
            message_history.append(message)

        else:

            run_final_answer_agent(llm_response)

            break






Enter something (or 'q' to quit):  Hi


---CHAT AGENT---
---ROUTER AGENT---
Status: DONE
Route: to_final_answer
---FINAL ANSWER AGENT---
Final answer: Hello! How can I assist you today?



Enter something (or 'q' to quit):  Has OpenAi released any new open source models recently?


---CHAT AGENT---
---ROUTER AGENT---
Status: PAUSE
Route: to_research_agent
---RESEARCH AGENT---
func_to_run: run_web_search
func_arg: ['OpenAI new open source models release']
Output: [{'query': 'OpenAI new open source models release', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'url': 'https://gaiinsights.substack.com/p/openai-releases-two-powerful-open', 'title': 'OpenAI Releases Two Powerful Open Source AI Models', 'content': '# OpenAI Releases Two Powerful Open Source AI Models: What You Need To Know OpenAI Releases Two Powerful Open Source AI Models: What You Need To Know OpenAI’s recent release of two new open source models — gpt-oss-120b and gpt-oss-20b — is a major shift in the AI landscape. Until now, OpenAI’s AI models have been closed source and expensive, especially compared to open source alternatives—particularly those from China, which offer approximately 90% of the performance at 90% lower cost. * Encourage your AI team to test OpenAI’s new o

Enter something (or 'q' to quit):  Thanks. I'd like to create an invisibility cloak similar to the one that Harry Potter uses. Can you suggest some research papers that I can consult/


---CHAT AGENT---
---ROUTER AGENT---
Status: PAUSE
Route: to_research_agent
---RESEARCH AGENT---
func_to_run: find_arxiv_research_papers
func_arg: ['invisibility cloak', 'metamaterials', 'cloaking technology', 'light manipulation']
Output: [[{'arxiv_id': 811.0458, 'link_to_pdf': 'https://arxiv.org/pdf/811.0458', 'cat_text': 'Optics', 'title': 'A complementary media invisibility cloak that can cloak objects at a   distance outside the cloaking shell', 'abstract': 'A complementary media invisibility cloak that can cloak objects at a   distance outside the cloaking shell {title} Based on the concept of complementary media, we propose an invisibility cloak operating at a finite frequency that can cloak an object with a pre-specified shape and size within a certain distance outside the shell. The cloak comprises of a dielectric core, and an "anti-object" embedded inside a negative index shell. The cloaked object is not blinded by the cloaking shell since it lies outside the cloak. Full-wave 

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [29]:
#message_history

In [30]:
print(llm_response)

The provided text appears to be a collection of summaries for various academic papers related to optics, particularly focusing on concepts like cloaking, structured light, and metasurfaces. Below is an overview of the topics covered in these summaries:

1. **Complementary Media Cloak**: This involves designing an invisibility cloak that can hide objects situated at a distance from the cloaking shell. It uses complementary media theories to achieve this effect, verified through full-wave simulations.

2. **Transmittable Nonreciprocal Cloaking**: A novel type of cloaking that allows for bidirectional control over light transmission; it acts as an omnidirectional cloak externally but can selectively transmit light outward from its center.

3. **Momentum Transformation in Metasurfaces**: This method uses momentum conservation principles to manipulate light at a distance using metasurfaces, enabling precise control over reflected and transmitted fields.

4. **Multimode Light Shaping Roadmap