In [1]:
import requests 

docs_url = 'https://github.com/alexeygrigorev/llm-rag-workshop/raw/main/notebooks/documents.json'
docs_response = requests.get(docs_url)
documents_raw = docs_response.json()

documents = []

for course in documents_raw:
    course_name = course['course']

    for doc in course['documents']:
        doc['course'] = course_name
        documents.append(doc)

In [2]:
documents[2]

{'text': "Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
 'section': 'General course-related questions',
 'question': 'Course - Can I still join the course after the start date?',
 'course': 'data-engineering-zoomcamp'}

In [3]:
from minsearch import AppendableIndex

index = AppendableIndex(
    text_fields=["question", "text", "section"],
    keyword_fields=["course"]
)

index.fit(documents)

<minsearch.append.AppendableIndex at 0x745b2d952780>

In [4]:
question = 'Can i still join the course?'

In [5]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [6]:
prompt_template = """
You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
Use only the facts from the CONTEXT when answering the QUESTION.

<QUESTION>
{question}
</QUESTION>

<CONTEXT>
{context}
</CONTEXT>
""".strip()

def build_prompt(query, search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

In [7]:
search_results = search(question)

In [8]:
prompt = build_prompt(question, search_results)

In [9]:
prompt

"You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.\nUse only the facts from the CONTEXT when answering the QUESTION.\n\n<QUESTION>\nCan i still join the course?\n</QUESTION>\n\n<CONTEXT>\nsection: General course-related questions\nquestion: Course - Can I still join the course after the start date?\nanswer: Yes, even if you don't register, you're still eligible to submit the homeworks.\nBe aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.\n\nsection: General course-related questions\nquestion: Certificate - Can I follow the course in a self-paced mode and get a certificate?\nanswer: No, you can only get a certificate if you finish the course with a “live” cohort. We don't award certificates for the self-paced mode. The reason is you need to peer-review capstone(s) after submitting a project. You can only peer-review projects at the time the course is running.\n

In [None]:
from groq import Groq
import os

In [None]:
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [13]:
model = "llama-3.1-8b-instant"

In [14]:
system_prompt = "You're a course teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database. \
Use only the facts from the CONTEXT when answering the QUESTION."

In [15]:
def llm(prompt):
    response = client.chat.completions.create(
        model=model,
        messages=[
          {"role":"user","content":prompt}
        ]
    )
    return response.choices[0].message.content

In [16]:
answer = llm(prompt)

In [17]:
print(answer)

According to the context, the course starts on 15th Jan 2024 at 17h00. Since the question is not about the start date, it seems more relevant to another question.

However, since this part of context does not clarify if course still open for registration, lets refer to the previous answers: 

You can still join the course even after the start date.


In [18]:
def rag(query):
    search_results = search(query)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)
    return answer

In [19]:
rag("How do I patch KDE under FreeBSD?")

"Unfortunately, I don't see any context or information related to FreeBSD or patching KDE in the provided text. However, I can suggest some general information. If you have any specific version of KDE, FreeBSD, or other relevant information, I'd be happy to try and assist you further.\n\nIf you're looking for a solution, please note that there is a separate package for 'ports/kde-applications' and 'ports/kde-frameworks' which includes KDE Frameworks, in the FreeBSD Ports Collection."

In [20]:
print(llm("How do I patch KDE under FreeBSD?"))

Patching KDE on FreeBSD is a multi-step process that involves cloning the KDE Git repository, updating the repository with patches, configuring and building KDE, and setting up the desktop environment. Here's a step-by-step guide:

**Prerequisites:**

1. Install the ports or packages required for building KDE, including `ports-mgmt/pkg` for package management, `devel/gmake`, `lang/perl5`, `devel/gdbm`, `devel/boost-libs`, and `x11-toolkits/qt5-kdetoools` (for Qt5).
2. You need a Qt5 base, which might be installed from ports or packages, and you'll need the version that matches the one in the port of kde4. 
3. Update your `ports` or package database.
4. Ensure that you have a working X11 server (e.g., `xfree86`).

**Step-by-Step Instructions:**

### Step 1: Clone the KDE Git repository

```bash
# Clone the KDE Git repository
git clone https://invent.kde.org/frameworks/kde.git kde-source
cd kde-source
```

### Step 2: Update the repository with FreeBSD patches

```bash
# Create a branch 

In [47]:
## Agentic RAG ####

In [21]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
{question}
</QUESTION>

<CONTEXT> 
{context}
</CONTEXT>

If CONTEXT is EMPTY, you can use our FAQ database.
In this case, use the following output template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>"
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}

ONLY SHOW THE RESULT IN A JSON FORMAT GIVEN
""".strip()

In [22]:
question = 'Can I still join the course, if the course is already started?'
context = 'EMPTY'

In [23]:
prompt = prompt_template.format(question=question, context=context)

In [24]:
import json

In [25]:
def build_context(search_results):
    context = ""

    for doc in search_results:
        context = context + f"section: {doc['section']}\nquestion: {doc['question']}\nanswer: {doc['text']}\n\n"
    
    return context.strip()

In [26]:
search_results = search(question)
context = build_context(search_results)
prompt = prompt_template.format(question=question, context=context)
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.
At the beginning the context is EMPTY.

<QUESTION>
Can I still join the course, if the course is already started?
</QUESTION>

<CONTEXT> 
section: General course-related questions
question: Course - Can I still join the course after the start date?
answer: Yes, even if you don't register, you're still eligible to submit the homeworks.
Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.

section: General course-related questions
question: Course - Can I follow the course after it finishes?
answer: Yes, we will keep all the materials after the course finishes, so you can follow the course at your own pace after it finishes.
You can also continue looking at the homeworks and continue preparing for the next cohort. I guess you can also start working on your f

In [27]:
answer_json = llm(prompt)

In [28]:
print(answer_json)

{
"action": "ANSWER",
"answer": "Yes, even if you don't register, you're still eligible to submit the homeworks. Be aware, however, that there will be deadlines for turning in the final projects. So don't leave everything for the last minute.",
"source": "CONTEXT"
}


In [29]:
def agentic_rag_v1(question):
    context = "EMPTY"
    prompt = prompt_template.format(question=question, context=context)
    answer_json = llm(prompt)
    answer = json.loads(answer_json)
    print(answer)

    if answer['action'] == 'SEARCH':
        print('need to perform search...')
        search_results = search(question)
        context = build_context(search_results)
        
        prompt = prompt_template.format(question=question, context=context)
        answer_json = llm(prompt)
        answer = json.loads(answer_json)
        print(answer)

    return answer

In [30]:
agentic_rag_v1('how do I join the course?')
agentic_rag_v1('how patch KDE under FreeBSD?')

{'action': 'SEARCH', 'reasoning': 'CONTEXT is EMPTY, using FAQ database'}
need to perform search...
{'action': 'SEARCH', 'reasoning': 'Since the CONTEXT is EMPTY, use the FAQ database to find the answer.'}
{'action': 'SEARCH', 'reasoning': 'CONTEXT is EMPTY, need to search FAQ database for information'}
need to perform search...
{'action': 'SEARCH', 'reasoning': 'Context is EMPTY, searching FAQ database'}


{'action': 'SEARCH', 'reasoning': 'Context is EMPTY, searching FAQ database'}

In [99]:
### AGENTIC SEARCH ###

In [31]:
def dedup(seq):
    seen = set()
    result = []
    for el in seq:
        _id = el['_id']
        if _id in seen:
            continue
        seen.add(_id)
        result.append(el)
    return result

search_results = dedup(search_results)

In [32]:
prompt_template = """
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is built with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than {max_iterations} iterations for a given student question.
The current iteration number: {iteration_number}. If we exceed the allowed number 
of iterations, give the best possible answer with the provided information.

Output templates:

If you want to perform search, use this template:

{{
"action": "SEARCH",
"reasoning": "<add your reasoning here>",
"keywords": ["search query 1", "search query 2", ...]
}}

If you can answer the QUESTION using CONTEXT, use this template:

{{
"action": "ANSWER_CONTEXT",
"answer": "<your answer>",
"source": "CONTEXT"
}}

If the context doesn't contain the answer, use your own knowledge to answer the question

{{
"action": "ANSWER",
"answer": "<your answer>",
"source": "OWN_KNOWLEDGE"
}}

<QUESTION>
{question}
</QUESTION>

<SEARCH_QUERIES>
{search_queries}
</SEARCH_QUERIES>

<CONTEXT> 
{context}
</CONTEXT>

<PREVIOUS_ACTIONS>
{previous_actions}
</PREVIOUS_ACTIONS>
""".strip()

In [33]:
question = 'how do i do well on module 1?'
max_iterations = 3
iteration_number = 0
search_queries = []
search_results = []
previous_actions = []

In [34]:
context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [35]:
answer_json = llm(prompt)

In [36]:
answer = json.loads(answer_json)

In [37]:
answer

{'action': 'SEARCH',
 'reasoning': "The question is about performing well on Module 1, so I'll search the FAQ database for information related to Module 1, study tips, and general advice for academic success.",
 'keywords': ['Module 1 study tips',
  'academic success strategies',
  'tips for performing well in a module']}

In [38]:
previous_actions.append(answer)

In [39]:
previous_actions

[{'action': 'SEARCH',
  'reasoning': "The question is about performing well on Module 1, so I'll search the FAQ database for information related to Module 1, study tips, and general advice for academic success.",
  'keywords': ['Module 1 study tips',
   'academic success strategies',
   'tips for performing well in a module']}]

In [40]:
keywords = answer['keywords']

In [41]:
for kw in keywords:
    search_queries.append(kw)
    sr = search(kw)
    search_results.extend(sr)

In [42]:
search_results = dedup(search_results)

In [43]:
len(search_results)

6

In [44]:
iteration_number = 1

context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [45]:
print(prompt)

You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is built with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current iteration number

In [46]:
answer_json = llm(prompt)

In [47]:
print(answer_json)

Since the CONTEXT and PREVIOUS_ACTIONS are already populated, I will first check if the CONTEXT already contains the answer to the question. If not, I will perform a search in the FAQ database to get more information to help answer the question.

However, given that the question "how do i do well on module 1?" seems to be quite general and not directly related to any specific technical issue that can be looked up in the provided CONTEXT, I need to carefully consider the keywords and sections provided in the SEARCH_QUERIES to generate the search request.

Given the keywords ["Module 1 study tips", "academic success strategies", "tips for performing well in a module"] from the previous iteration, my search request will not be based solely on the keywords used to generate the previous search. I will instead focus on generating a new search that directly addresses the question asked, while keeping in mind the potential relevance of the keywords.

Here is my first ACTION:

{
"action": "SEAR

In [48]:
iteration_number = 2

context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [58]:
answer_json = llm(prompt)

In [59]:
answer_json

'{\n  "action": "SEARCH",\n  "reasoning": "To provide a comprehensive answer, we need to gather more information about the requirements and expectations for Module 1.",\n  "keywords": ["Module 1 requirements", "Module 1 expectations", "success criteria for Module 1"]\n}'

In [60]:
answer = json.loads(answer_json)

In [62]:
answer

{'action': 'SEARCH',
 'reasoning': 'To provide a comprehensive answer, we need to gather more information about the requirements and expectations for Module 1.',
 'keywords': ['Module 1 requirements',
  'Module 1 expectations',
  'success criteria for Module 1']}

In [63]:
iteration_number = 3

context = build_context(search_results)

prompt = prompt_template.format(
    question=question,
    context=context,
    search_queries="\n".join(search_queries),
    previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
    max_iterations=max_iterations,
    iteration_number=iteration_number
)

In [64]:
answer_json = llm(prompt)

In [66]:
answer = json.loads(answer_json)

In [67]:
answer

{'action': 'SEARCH',
 'reasoning': "To provide a more informed answer, I'll start by searching for information related to 'module 1' success.",
 'keywords': ['module 1 success',
  'module 1 requirements',
  'achieving success in module 1']}

In [68]:
question = "what do I need to do to be successful at module 1?"

search_queries = []
search_results = []
previous_actions = []


iteration = 0

while True:
    print(f'ITERATION #{iteration}...')

    context = build_context(search_results)
    prompt = prompt_template.format(
        question=question,
        context=context,
        search_queries="\n".join(search_queries),
        previous_actions='\n'.join([json.dumps(a) for a in previous_actions]),
        max_iterations=3,
        iteration_number=iteration
    )

    print(prompt)

    answer_json = llm(prompt)
    answer = json.loads(answer_json)
    print(json.dumps(answer, indent=2))

    previous_actions.append(answer)

    action = answer['action']
    if action != 'SEARCH':
        break

    keywords = answer['keywords']
    search_queries = list(set(search_queries) | set(keywords))
    
    for k in keywords:
        res = search(k)
        search_results.extend(res)

    search_results = dedup(search_results)
    
    iteration = iteration + 1
    if iteration >= 4:
        break

    print()

ITERATION #0...
You're a course teaching assistant.

You're given a QUESTION from a course student and that you need to answer with your own knowledge and provided CONTEXT.

The CONTEXT is built with the documents from our FAQ database.
SEARCH_QUERIES contains the queries that were used to retrieve the documents
from FAQ to and add them to the context.
PREVIOUS_ACTIONS contains the actions you already performed.

At the beginning the CONTEXT is empty.

You can perform the following actions:

- Search in the FAQ database to get more data for the CONTEXT
- Answer the question using the CONTEXT
- Answer the question using your own knowledge

For the SEARCH action, build search requests based on the CONTEXT and the QUESTION.
Carefully analyze the CONTEXT and generate the requests to deeply explore the topic. 

Don't use search queries used at the previous iterations.

Don't repeat previously performed actions.

Don't perform more than 3 iterations for a given student question.
The current 

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [179]:
### FUNCTION CALLING (TOOL USE) ###

In [6]:
import os
import json
from groq import Groq

In [8]:
client = Groq(api_key=os.getenv("GROQ_API_KEY"))

In [9]:
def search(query):
    boost = {'question': 3.0, 'section': 0.5}

    results = index.search(
        query=query,
        filter_dict={'course': 'data-engineering-zoomcamp'},
        boost_dict=boost,
        num_results=5,
        output_ids=True
    )

    return results

In [45]:
def add_entry(question, answer):
    doc = {
        'question': question,
        'text': answer,
        'section': 'user added',
        'course': 'data-engineering-zoomcamp'
    }
    index.append(doc)

In [74]:
add_entry_description = {
    "type": "function",
    "function": {
    "name": "add_entry",
    "description": "Add an entry to the FAQ database",
    "parameters": {
        "type": "object",
        "properties": {
            "question": {
                "type": "string",
                "description": "The question to be added to the FAQ database",
            },
            "answer": {
                "type": "string",
                "description": "The answer to the question",
            }
        },
        "required": ["question", "answer"],
        "additionalProperties": False
    }
    }
}

In [10]:
search_tool = {
    "type": "function",
    "function": {
        "name": "search",
        "description": "Search the FAQ database",
        "parameters": {
            "type": "object",
            "properties": {
                "query": {
                    "type": "string",
                    "description": "Search query text to look up in the course FAQ."
                }
            },
            "required": ["query"],
            "additionalProperties": False
        }
    }
}


In [12]:
def do_call(tool_call_response):
    function_name = tool_call_response.function.name
    arguments = json.loads(tool_call_response.function.arguments)

    f = globals()[function_name]
    result = f(**arguments)

    return {
        "role": "tool",
        "type": "function_call_output",
        "call_id": tool_call_response.id,
        "output": json.dumps(result, indent=2),
    }

In [32]:
model = "meta-llama/llama-4-scout-17b-16e-instruct"

In [44]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.
When using FAQ, perform deep topic exploration: make one request to FAQ,
and then if you need more information for forming an answer, make another request.
""".strip()

tools = [search_tool]

chat_messages = [
        {"role": "system", "content": developer_prompt}
]


while True:
    question = input()
    if question == 'stop':
        break

    
    message = {"role": "user",   "content": question}
    chat_messages.append(message)

    while True:

        response = client.chat.completions.create(
            model=model,
            messages=chat_messages,
            tools=tools,
            tool_choice="auto"    # or "manual" if you want to force a tool call
        )

        fin_reason = response.choices[0].finish_reason

        has_message = False

        if fin_reason == 'tool_calls':
            tool_calls = response.choices[0].message.tool_calls
            for tool_call in tool_calls:
                print('tool_call')
                print(tool_call)
                print()
                res = do_call(tool_call)
                chat_messages.append(
                    {
                        "role": "tool",
                        "content": str(res),
                        "tool_call_id": tool_call.id,
                    }
                )
        elif fin_reason == 'stop':
            has_message = True
            print('has_message')
            print(response.choices[0].message.content)
            print()

        if has_message:
            break

 How to do well in module 1 of the course?


tool_call
ChatCompletionMessageToolCall(id='0vzcgmr9q', function=Function(arguments='{"query":"module1 course tips"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='zz6rwf6zt', function=Function(arguments='{"query":"module1"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='rxn8fykxr', function=Function(arguments='{"query":"Module 1 course tips"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='kkzva7atm', function=Function(arguments='{"query":"module1 docker terraform challenges"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='t4d7zetz5', function=Function(arguments='{"query":"Module 1 Docker and Terraform tips for success"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='rxr5229nw', function=Function(arguments='{"query":"module 1 docker terraform common issues"}', name='search'), type='function')

tool_call
ChatCompletionMessag

 how to run Docker?


has_message
To run Docker, you can follow these general steps:

1. **Install Docker**: First, ensure that Docker is installed on your system. You can download it from the official Docker website if you haven't already.

2. **Pull a Docker Image**: You can pull a Docker image from Docker Hub using the command `docker pull <image_name>`. For example, to pull the official Ubuntu image, you would use `docker pull ubuntu`.

3. **Run a Docker Container**: After pulling an image, you can run a Docker container using the command `docker run -it <image_name>`. The `-it` flag allows you to interact with the container.

4. **Docker Compose**: If your project uses Docker Compose, you can run `docker-compose up` to start all services defined in your `docker-compose.yml` file.

5. **Verify Docker Installation**: To verify that Docker is installed correctly, you can run `docker --version`.

If you encounter issues or need module-specific guidance, feel free to ask!



 stop


In [95]:
from agentic_chat_groq import Tools, ChatAssistant

In [96]:
tools = Tools()

In [97]:
tools.add_tool(search, search_tool)

In [98]:
tools.get_tools()

[<function __main__.search_tool(ctx: pydantic_ai._run_context.RunContext, query: str) -> Dict[str, str]>]

In [81]:
developer_prompt = """
You're a course teaching assistant. 
You're given a question from a course student and your task is to answer it.

Use FAQ if your own knowledge is not sufficient to answer the question.
When using FAQ, perform deep topic exploration: make one request to FAQ,
and then if you need more information for forming an answer, make another request.
""".strip()

In [82]:
chat = ChatAssistant(
    tools=tools,
    developer_prompt=developer_prompt
)

In [83]:
tools.add_tool(add_entry, add_entry_description)

In [84]:
tools.get_tools()

[{'type': 'function',
  'function': {'name': 'search',
   'description': 'Search the FAQ database',
   'parameters': {'type': 'object',
    'properties': {'query': {'type': 'string',
      'description': 'Search query text to look up in the course FAQ.'}},
    'required': ['query'],
    'additionalProperties': False}}},
 {'type': 'function',
  'function': {'name': 'add_entry',
   'description': 'Add an entry to the FAQ database',
   'parameters': {'type': 'object',
    'properties': {'question': {'type': 'string',
      'description': 'The question to be added to the FAQ database'},
     'answer': {'type': 'string',
      'description': 'The answer to the question'}},
    'required': ['question', 'answer'],
    'additionalProperties': False}}}]

In [85]:
chat.run()

 How do i do well in module 1?


tool_call
ChatCompletionMessageToolCall(id='4cyssd0pr', function=Function(arguments='{"query":"module1 success tips"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='s7wzbsqg3', function=Function(arguments='{"query":"module1"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='229v66c90', function=Function(arguments='{"query":"How to do well in Module 1"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='e1dx985we', function=Function(arguments='{"query":"Module 1 tips"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='kbcrebb32', function=Function(arguments='{"query":"Module 1 Docker and Terraform tips"}', name='search'), type='function')

tool_call
ChatCompletionMessageToolCall(id='xrgtjwswq', function=Function(arguments='{"query":"module 1 common issues"}', name='search'), type='function')

has_message
To do well in Module 1, it's essential to have a solid foundat

 Add this back to FAQ


tool_call
ChatCompletionMessageToolCall(id='jdjwxj3vh', function=Function(arguments='{"answer":"To do well in Module 1, make sure to troubleshoot issues effectively. This involves checking the error message, restarting your application or server, and searching online for solutions. Additionally, review the course materials and documentation for Module 1, which covers Docker and Terraform. Ensure you have a solid understanding of the topics and have completed any prerequisite steps.","question":"How do i do well in module1"}', name='add_entry'), type='function')

has_message
To do well in Module 1, it's essential to have a solid understanding of the fundamentals of Docker and Terraform. Here are some general tips:

1. **Familiarize yourself with the basics**: Make sure you understand the core concepts of Docker, including containers, images, and volumes. For Terraform, understand the basics of infrastructure as code, providers, and resources.

2. **Practice, practice, practice**: Hands-

 stop


In [86]:
index.docs[-1]

{'question': 'How do i do well in module1',
 'text': 'To do well in Module 1, make sure to troubleshoot issues effectively. This involves checking the error message, restarting your application or server, and searching online for solutions. Additionally, review the course materials and documentation for Module 1, which covers Docker and Terraform. Ensure you have a solid understanding of the topics and have completed any prerequisite steps.',
 'section': 'user added',
 'course': 'data-engineering-zoomcamp'}

In [87]:
index.docs[-2]

{'text': 'Problem description\nInfrastructure created in AWS with CD-Deploy Action needs to be destroyed\nSolution description\nFrom local:\nterraform init -backend-config="key=mlops-zoomcamp-prod.tfstate" --reconfigure\nterraform destroy --var-file vars/prod.tfvars\nAdded by Erick Calderin',
 'section': 'Module 6: Best practices',
 'question': 'How to destroy infrastructure created via GitHub Actions',
 'course': 'mlops-zoomcamp'}

In [88]:
#### PYDANTIC #####

In [89]:
from pydantic_ai import Agent, RunContext

In [90]:
chat_agent = Agent(  
    'groq:meta-llama/llama-4-scout-17b-16e-instruct',
    system_prompt=developer_prompt
)

In [91]:
from typing import Dict


@chat_agent.tool
def search_tool(ctx: RunContext, query: str) -> Dict[str, str]:
    """
    Search the FAQ for relevant entries matching the query.

    Parameters
    ----------
    query : str
        The search query string provided by the user.

    Returns
    -------
    list
        A list of search results (up to 5), each containing relevance information 
        and associated output IDs.
    """
    print(f"search('{query}')")
    return search(query)

In [92]:
@chat_agent.tool
def add_entry_tool(ctx: RunContext, question: str, answer: str) -> None:
    """
    Add a new question-answer entry to FAQ.

    This function creates a document with the given question and answer, 
    tagging it as user-added content.

    Parameters
    ----------
    question : str
        The question text to be added to the index.

    answer : str
        The answer or explanation corresponding to the question.

    Returns
    -------
    None
    """
    return add_entry(question, answer)

In [94]:
user_prompt = "I just discovered the course. Can I join now?"
agent_run = await chat_agent.run(user_prompt)
print(agent_run.output)

search('join course now')
You can still join the course, but be aware that there are deadlines for turning in the final projects, so don't leave everything for the last minute. It's recommended to register before the course starts and join the course Telegram channel with announcements. You can find more information on the course start date and registration link in the course materials. 

If you have any more questions or need further clarification, feel free to ask.
