In [2]:
!pip install jupyter openai minsearch requests



In [3]:
!pip install python-dotenv



In [4]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [5]:
documents[2]

{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
 'section': 'General course-related questions',
 'question': 'Course - Can I still join the course after the start date?',
 'course': 'data-engineering-zoomcamp'}

In [6]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x11eb620c0>

In [7]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [8]:
search('Can I still join the course?')

[{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
  'section': 'General course-related questions',
  'question': 'Course - Can I still join the course after the start date?',
  'course': 'data-engineering-zoomcamp',
  '_id': 2},
 {'text': "No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.",
  'section': 'General course-related questions',
  'question': 'Certificate - Can I follow the course in a self-paced mode and get a certificate?',
  'course': 'data-engineering-zoomcamp',
  '_id': 11},
 {'text': 'Yes, we will keep all the materials after the course finishes, so you can follow the cou

In [9]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [10]:
question = 'Can I still join the course?'

In [11]:
search_results = search(question)

In [12]:
prompt = build_prompt(question, search_results)

In [13]:
print(prompt)

You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
Can I still join the course?
</QUESTION>

<CONTEXT>
section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Certificate - Can I follow the course in a self-paced mode and get a certificate?
answer: No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.

section: General

In [14]:
from dotenv import load_dotenv
import os
load_dotenv()

True

In [15]:
from openai import OpenAI
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [16]:
answer = llm(prompt)
print(answer)

Yes, you can still join the course after the start date. Even if you don't register, you're eligible to submit the homework. However, be mindful of the deadlines for turning in the final projects, so it's advisable not to leave everything for the last minute.


In [17]:
rag('How do I patch KDE under FreeBSD?')

"I'm sorry, but there is no information provided in the context regarding how to patch KDE under FreeBSD. Please provide additional details or check the relevant resources for assistance."

In [18]:
print(llm(('How do I patch KDE under FreeBSD?')))

Patching KDE or any application on FreeBSD generally involves a few steps, which can include downloading the source code, applying your patches, and rebuilding the software. Here's a general guide on how to patch KDE under FreeBSD:

### Prerequisites

1. **Install the Port Tree**: Make sure the FreeBSD ports tree is installed and up to date. You can install it using:

   ```shell
   portsnap fetch extract
   ```

   Or update it with:

   ```shell
   portsnap fetch update
   ```

2. **Install Required Packages**: Ensure you have the necessary tools to build from ports. You may need `git`, `make`, `gmake`, and others. Install them if you don't have them already:

   ```shell
   pkg install git gmake
   ```

### Steps to Patch KDE

1. **Locate the KDE Port**: Find the specific KDE port you want to patch. You can browse the ports collection in `/usr/ports/` or use the command:

   ```shell
   cd /usr/ports/x11/kde5
   ```

   Replace `x11/kde5` with the specific KDE port you want to patch

## "Agentic" RAG

In [20]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}
""".strip()

In [21]:
question = "how do I run docker on gentoo?"
context = "EMPTY"

prompt = prompt_template.format(question=question, context=context)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
how do I run docker on gentoo?
</QUESTION>

<CONTEXT> 
EMPTY
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}

If you can answer the QUESTION using CONTEXT, use this template:

{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}

If the context doesn't contain the answer, use your own knowledge to answer the question

{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}


In [22]:
answer_json = llm(prompt)
print(answer_json)

{
"action": "ANSWER",
"answer": "To run Docker on Gentoo, you can follow these steps:\n\n1. **Install Docker**: First, ensure that you have the necessary kernel features enabled. You will need to install Docker from the Portage tree. Use the following command to emerge Docker:\n   \n   ```bash\n   sudo emerge app-containers/docker\n   ```\n\n2. **Start Docker Service**: After installation, you will need to start the Docker daemon. You can do this using OpenRC (if you use it) by running:\n   \n   ```bash\n   sudo rc-service docker start\n   ```\n\n3. **Enable Docker on Boot**: If you want Docker to start at boot time, you can enable it with:\n   \n   ```bash\n   sudo rc-update add docker default\n   ```\n\n4. **Test Docker Installation**: To verify that Docker is running correctly, you can run:\n   \n   ```bash\n   sudo docker run hello-world\n   ```\n\nThis command pulls a test image and runs it. If it runs successfully, then your Docker installation is working properly.\n\n5. **Manage

In [23]:
import json

In [24]:
answer = json.loads(answer_json)
print(answer)

{'action': 'ANSWER', 'answer': 'To run Docker on Gentoo, you can follow these steps:\n\n1. **Install Docker**: First, ensure that you have the necessary kernel features enabled. You will need to install Docker from the Portage tree. Use the following command to emerge Docker:\n   \n   ```bash\n   sudo emerge app-containers/docker\n   ```\n\n2. **Start Docker Service**: After installation, you will need to start the Docker daemon. You can do this using OpenRC (if you use it) by running:\n   \n   ```bash\n   sudo rc-service docker start\n   ```\n\n3. **Enable Docker on Boot**: If you want Docker to start at boot time, you can enable it with:\n   \n   ```bash\n   sudo rc-update add docker default\n   ```\n\n4. **Test Docker Installation**: To verify that Docker is running correctly, you can run:\n   \n   ```bash\n   sudo docker run hello-world\n   ```\n\nThis command pulls a test image and runs it. If it runs successfully, then your Docker installation is working properly.\n\n5. **Manage 

In [25]:
question = "how do I join the course?"
context = "EMPTY"

prompt = prompt_template.format(question=question, context=context)
answer = llm(prompt)
print(answer)

{
"action": "ANSWER",
"answer": "To join the course, you typically need to enroll through the course platform or the institution offering it. This usually involves signing up or registering an account, selecting the course from a list, and completing any required payment or application forms. If you need specific guidance, I recommend checking the course's official website or contacting the course administrator for detailed instructions.",
"source": "OWN_KNOWLEDGE"
}


In [26]:
def build_context(search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    return context.strip()

In [27]:
search_results = search(question)
context = build_context(search_results)
prompt = prompt_template.format(question=question, context=context)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
how do I join the course?
</QUESTION>

<CONTEXT> 
section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Course - When will the course start?
answer: The purpose of this document is to capture frequently asked technical questions
The exact day and hour of the course will be 15th Jan 2024 at 17h00. The course will start with the first  “Office Hours'' live.1
Subscribe to course public Google Calendar (it works from Desktop only).
Register before the course start

In [28]:
answer_json = llm(prompt)

In [29]:
print(answer_json)

{
"action": "ANSWER",
"answer": "To join the course, you should register before the start date using the registration link provided in the course materials. The course will officially start on January 15, 2024, at 17h00. After registering, make sure to subscribe to the course's public Google Calendar for updates, join the Telegram channel for announcements, and register on DataTalks.Club's Slack to connect with other participants.",
"source": "CONTEXT"
}


In [30]:
def agentic_rag_v1(question):
    context = "EMPTY"
    prompt = prompt_template.format(question=question, context=context)
    answer_json = llm(prompt)
    answer = json.loads(answer_json)
    print(answer)

    if answer['action'] == 'SEARCH':
        print('need to perform search...')
        search_results = search(question)
        context = build_context(search_results)
        
        prompt = prompt_template.format(question=question, context=context)
        answer_json = llm(prompt)
        answer = json.loads(answer_json)
        print(answer)

    return answer

In [31]:
agentic_rag_v1('how do I join the course?')

{'action': 'SEARCH', 'reasoning': 'The CONTEXT is EMPTY and I need to find the information regarding how to join the course.'}
need to perform search...
{'action': 'ANSWER', 'answer': "To join the course, you need to register before it starts. The course will begin on January 15, 2024, at 17:00. You can register using the provided link in the course materials. Additionally, make sure to join the course’s Telegram channel for announcements and subscribe to the course's public Google Calendar. Even if you miss the registration, you can still submit homework, but be aware of the project deadlines.", 'source': 'CONTEXT'}


{'action': 'ANSWER',
 'answer': "To join the course, you need to register before it starts. The course will begin on January 15, 2024, at 17:00. You can register using the provided link in the course materials. Additionally, make sure to join the course’s Telegram channel for announcements and subscribe to the course's public Google Calendar. Even if you miss the registration, you can still submit homework, but be aware of the project deadlines.",
 'source': 'CONTEXT'}

In [32]:
agentic_rag_v1('how patch KDE under FreeBSD?')

{'action': 'ANSWER', 'answer': 'To patch KDE under FreeBSD, you can follow these steps: \n\n1. **Retrieve the source code**: First, ensure you have the KDE source code you want to patch. You can use `ports` or download the source code directly from the KDE website.\n\n2. **Create a patch file**: If you have made changes to the source code, you can create a patch file using the command:\n   ```\n   diff -u original_file modified_file > my_patch.patch\n   ```\n   Replace `original_file` and `modified_file` with the correct filenames.\n\n3. **Apply the patch**: Navigate to the directory of the KDE source code and apply your patch using the `patch` command:\n   ```\n   patch < /path/to/my_patch.patch\n   ```\n\n4. **Compile and install**: After applying the patch, compile the software using the FreeBSD ports system or the traditional `make` method, then install it. For example:\n   ```\n   make install clean\n   ```\n\n5. **Test your changes**: Run KDE to ensure that your changes work as e

{'action': 'ANSWER',
 'answer': 'To patch KDE under FreeBSD, you can follow these steps: \n\n1. **Retrieve the source code**: First, ensure you have the KDE source code you want to patch. You can use `ports` or download the source code directly from the KDE website.\n\n2. **Create a patch file**: If you have made changes to the source code, you can create a patch file using the command:\n   ```\n   diff -u original_file modified_file > my_patch.patch\n   ```\n   Replace `original_file` and `modified_file` with the correct filenames.\n\n3. **Apply the patch**: Navigate to the directory of the KDE source code and apply your patch using the `patch` command:\n   ```\n   patch < /path/to/my_patch.patch\n   ```\n\n4. **Compile and install**: After applying the patch, compile the software using the FreeBSD ports system or the traditional `make` method, then install it. For example:\n   ```\n   make install clean\n   ```\n\n5. **Test your changes**: Run KDE to ensure that your changes work as 

## Agentic Search

In [34]:
def dedup(seq):
    seen = set()
    result = []
    for el in seq:
        _id = el['_id']
        if _id in seen:
            continue
        seen.add(_id)
        result.append(el)
    return result

search_results = dedup(search_results)

In [35]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than {max_iterations} iterations for a given student question.
The current iteration number: {iteration_number}. If we exceed the allowed number 
of iterations, give the best possible answer with the provided information.

Output templates:

If you want to perform search, use this template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>",
"keywords": ["search query 1", "search query 2", ...]
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER_CONTEXT",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}

<QUESTION>
{question}
</QUESTION>

<SEARCH_QUERIES>
{search_queries}
</SEARCH_QUERIES>

<CONTEXT> 
{context}
</CONTEXT>

<PREVIOUS_ACTIONS>
{previous_actions}
</PREVIOUS_ACTIONS>
""".strip()

In [36]:
question = "how do I do well on module 1?"
max_iterations = 3
iteration_number = 0
search_queries = []
search_results = []
previous_actions = []

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [38]:
context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [39]:
answer_json = llm(prompt)

In [40]:
answer = json.loads(answer_json)

In [41]:
previous_actions.append(answer)

In [42]:
previous_actions

[{'action': 'SEARCH',
  'reasoning': 'To provide targeted strategies for succeeding in Module 1, I will search for specific advice or tips related to this module in our FAQ database.',
  'keywords': ['tips for succeeding in module 1',
   'how to do well in module 1',
   'module 1 resources']}]

In [43]:
keywords = answer['keywords']

In [44]:
for kw in keywords:
    search_queries.append(kw)
    sr = search(kw)
    search_results.extend(sr)

In [45]:
len(search_results)

15

In [46]:
search_results = dedup(search_results)

In [47]:
iteration_number = 2

context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [48]:
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is build with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current iteration number

In [49]:
answer_json = llm(prompt)

In [50]:
print(answer['answer'])

KeyError: 'answer'

### Function calling ("tool use")

In [54]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [56]:
search_tool = {
    "type": "function",
    "name": "search",
    "description": "Search the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "query": {
                "type": "string",
                "description": "Search query text to look up in the course FAQ."
            }
        },
        "required": ["query"],
        "additionalProperties": False
    }
}

In [106]:
def do_call(tool_call_response):
    function_name = tool_call_response.name
    arguments = json.loads(tool_call_response.arguments)

    f = globals()[function_name]
    result = f(**arguments)

    return {
        "type": "function_call_output",
        "call_id": tool_call_response.call_id,
        "output": json.dumps(result, indent=2),
    }

In [98]:
question = "How do I do well in module 1?"

developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.
If you look up something in FAQ, convert the student question into multiple queries.
""".strip()

tools = [search_tool]

chat_messages = [
    {"role": "developer", "content": developer_prompt},
    {"role": "user", "content": question}
]

response = client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)
response.output

[ResponseFunctionToolCall(arguments='{"query":"how to do well in module 1"}', call_id='call_6erCfcI6ST5M5NTuON7iEy2l', name='search', type='function_call', id='fc_68a5a61722808194b56c4f09e0d3d84f00423db61e7b94bb', status='completed'),
 ResponseFunctionToolCall(arguments='{"query":"module 1 study tips"}', call_id='call_H1ftCzsBa1qOWCa2LuwUQV2b', name='search', type='function_call', id='fc_68a5a617665c81949288a77d26f2bc2800423db61e7b94bb', status='completed'),
 ResponseFunctionToolCall(arguments='{"query":"module 1 assessment criteria"}', call_id='call_B4vQofmioRu4co6AKvTlipms', name='search', type='function_call', id='fc_68a5a6179ff88194bc2162174d57f87300423db61e7b94bb', status='completed')]

In [99]:
calls = response.output

In [108]:
for call in calls:
    result = do_call(call)
    chat_messages.append(call)
    chat_messages.append(result)

In [104]:
response = client.responses.create(
    model='gpt-4o-mini',
    input=chat_messages,
    tools=tools
)
response.output

[ResponseOutputMessage(id='msg_68a5a6e9fb6c8194ada70cfa037ac56d00423db61e7b94bb', content=[ResponseOutputText(annotations=[], text="To do well in Module 1, you can focus on the following areas:\n\n1. **Study Materials**: Make sure to thoroughly read and understand the course materials related to Docker and Terraform. Check the official documentation and any supplementary resources provided.\n\n2. **Practice**: Engage in hands-on practice with the tools and technologies covered in Module 1. Setting up Docker environments and running Terraform scripts are essential for solidifying your understanding.\n\n3. **Programming Exercises**: Complete all programming exercises and projects related to the module. This will not only help you grasp the concepts better but will also enhance your coding skills.\n\n4. **Collaboration**: Participate in discussions with your peers. Exploring different perspectives can deepen your understanding of the topics.\n\n5. **Ask for Help**: Don't hesitate to reach

In [96]:
print(response.output[0].content[0].text)

To do well in Module 1, here are some tips and best practices:

1. **Understand Key Concepts**: Make sure you grasp the fundamental concepts related to Docker and Terraform as these are crucial for success in later modules.

2. **Environment Setup**: Pay close attention to setting up your environment correctly. Follow the installation instructions step-by-step to avoid issues like missing modules.

3. **Practice Hands-On**: Engage in practical exercises. The best way to solidify your understanding of tools like Docker and Terraform is to use them in real projects.

4. **Solve Common Errors**: Familiarize yourself with common errors, such as `ModuleNotFoundError`. For example, if you're encountering issues with PostgreSQL, ensure you have the necessary Python packages installed (like `psycopg2`).

5. **Ask for Help**: If you encounter problems, don’t hesitate to reach out to the community or your peers for support. Engaging with others can provide valuable insights.

6. **Review Documen

In [114]:
for entry in response.output:
    chat_messages.append(entry)
    print(entry.type)

    if entry.type == 'function_call':      
        result = do_call(entry)
        chat_messages.append(result)
    elif entry.type == 'message':
        print(entry.text) 

message


AttributeError: 'ResponseOutputMessage' object has no attribute 'text'

In [116]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.
When using FAQ, perform deep topic exploration: make one request to FAQ,
and then based on the results, make more requests.

At the end of each response, ask the user a follow up question based on your answer.
""".strip()

chat_messages = [
    {"role": "developer", "content": developer_prompt},
]

In [118]:
while True: # main Q&A loop
    question = input() # How do I do my best for module 1?
    if question == 'stop':
        break

    message = {"role": "user", "content": question}
    chat_messages.append(message)

    while True: # request-response loop - query API till get a message
        response = client.responses.create(
            model='gpt-4o-mini',
            input=chat_messages,
            tools=tools
        )

        has_tool_calls = False
        
        for entry in response.output:
            chat_messages.append(entry)
        
            if entry.type == 'function_call':      
                print('function_call:', entry)
                print()
                result = do_call(entry)
                chat_messages.append(result)
                has_tool_calls = True

            elif entry.type == 'message':
                print(entry.content[0].text)
                print()

        if not has_tool_calls:
            break

 How do I do well on module 1?


function_call: ResponseFunctionToolCall(arguments='{"query":"module 1 success tips"}', call_id='call_vj0qFHfm8hzxHLHsKWAfX29y', name='search', type='function_call', id='fc_68a5a8d4c39881a1a1022d246da2d915003250917db588d1', status='completed')

To do well in Module 1 of the course, here are some key tips:

1. **Understand Your Environment**: Make sure you have set up your development environment correctly using Docker and Terraform. This foundational step is critical for smooth execution of subsequent tasks.

2. **Resolve Common Errors**: If you encounter errors like `ModuleNotFoundError: No module named 'psycopg2'`, you can resolve them by installing the required packages. Use the command:
   ```bash
   pip install psycopg2-binary
   ```
   If the issue persists, consider updating the package or check if your environment setup is correct.

3. **Using SQLAlchemy**: When working with SQLAlchemy, ensure you are using the correct connection strings. For PostgreSQL, it should look somewhat 

 Docker and Terraform


function_call: ResponseFunctionToolCall(arguments='{"query":"Docker Terraform success tips"}', call_id='call_xOYu5I2NwenGvaw4A5hWdJMp', name='search', type='function_call', id='fc_68a5a8faa5c881a1a0f9f6f290c3fee1003250917db588d1', status='completed')

To excel in Docker and Terraform during Module 1, here are some focused tips:

### Docker:
1. **Installation and Setup**: Ensure Docker is properly installed on your system. Follow the official [Docker documentation](https://docs.docker.com/get-started/) for guidance on setup for your operating system.

2. **Basic Commands**: Familiarize yourself with basic Docker commands such as `docker run`, `docker build`, and `docker-compose`. Practicing these commands can significantly help in understanding how Docker works.

3. **Networking**: Pay attention to networking in Docker. Understand how containers communicate with each other and with your host machine. 

4. **Debugging Issues**: If you encounter errors, check the logs of your containers u

 stop


### Multiple tools

In [120]:
!wget https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py

--2025-08-20 12:07:53--  https://raw.githubusercontent.com/alexeygrigorev/rag-agents-workshop/refs/heads/main/chat_assistant.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3495 (3.4K) [text/plain]
Saving to: ‘chat_assistant.py’


2025-08-20 12:07:53 (47.6 MB/s) - ‘chat_assistant.py’ saved [3495/3495]



In [131]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [133]:
add_entry_description = {
    "type": "function",
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
}

In [5]:
import chat_assistant

tools = chat_assistant.Tools()
tools.add_tool(search, search_tool)

NameError: name 'search' is not defined

In [3]:
tools.add_tool(add_entry, add_entry_description)

NameError: name 'tools' is not defined

In [139]:
tools.get_tools()

[{'type': 'function',
  'name': 'search',
  'description': 'Search the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'query': {'type': 'string',
     'description': 'Search query text to look up in the course FAQ.'}},
   'required': ['query'],
   'additionalProperties': False}},
 {'type': 'function',
  'name': 'add_entry',
  'description': 'Add an entry to the FAQ database',
  'parameters': {'type': 'object',
   'properties': {'question': {'type': 'string',
     'description': 'The question to be added to the FAQ database'},
    'answer': {'type': 'string', 'description': 'The answer to the question'}},
   'required': ['question', 'answer'],
   'additionalProperties': False}}]

In [141]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.

At the end of each response, ask the user a follow up question based on your answer.
""".strip()

chat_interface = chat_assistant.ChatInterface()

chat = chat_assistant.ChatAssistant(
    tools=tools,
    developer_prompt=developer_prompt,
    chat_interface=chat_interface,
    client=client
)

In [143]:
chat.run()

You: How do i do well in module 1?


You: stop


Chat ended.


In [145]:
index.docs[-1]

{'text': 'Problem description\nInfrastructure created in AWS with CD-Deploy Action needs to be destroyed\nSolution description\nFrom local:\nterraform init -backend-config="key=mlops-zoomcamp-prod.tfstate" --reconfigure\nterraform destroy --var-file vars/prod.tfvars\nAdded by Erick Calderin',
 'section': 'Module 6: Best practices',
 'question': 'How to destroy infrastructure created via GitHub Actions',
 'course': 'mlops-zoomcamp'}