In [6]:
from openai import AzureOpenAI
import json
import os
import time

from dotenv import load_dotenv
load_dotenv()

azure_deployment=os.getenv("AZURE_OPENAI_DEPLOYMENT")
azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT")
api_key=os.getenv("AZURE_OPENAI_KEY")

# Create an AzureOpenAI client
client = AzureOpenAI(
  azure_endpoint = azure_endpoint,
  api_key= api_key,
  api_version="2024-05-01-preview"
)
# Create a file
file = client.files.create(
  file=open("./data/bigfootsightings.csv", "rb"),
  purpose='assistants'
)

# Create an assistant
assistant = client.beta.assistants.create(
  model=azure_deployment,
  name="bigfootsightings",
  instructions="""You are an assistant answering questions about bigfootsightings dataset.""",
  tools=[{"type":"code_interpreter"}],
  tool_resources={"code_interpreter":{"file_ids":[file.id]}},
  temperature=1,
  top_p=1,
  
)

# Create a thread
thread = client.beta.threads.create()

In [7]:
PROMPT_SUFFIX = """
- **ALWAYS** before giving the Final Answer, try another method.
Then reflect on the answers of the two methods you did and ask yourself
if it answers correctly the original question.
If you are not sure, try another method.
- If the methods tried do not give the same result,reflect and
try again until you have two methods that have the same result.
- If you still cannot arrive to a consistent result, say that
you are not sure of the answer.
- If you are sure of the correct answer, create a beautiful
and thorough response using Markdown.
- **DO NOT MAKE UP AN ANSWER OR USE PRIOR KNOWLEDGE,
ONLY USE THE RESULTS OF THE CALCULATIONS YOU HAVE DONE**.
- **ALWAYS**, as part of your "Final Answer", explain how you got
to the answer on a section that starts with: "\n\nExplanation:\n".
In the explanation, mention the column names that you used to get
to the final answer and provide the python code you used.

"""

Questions:
1. If 1 row in the dataset is 1 sighthing, what's our total sightings?
2. Which state has the most sightings?
3. Which season has the most sightings?
4. What are the top 10 years with the most sightings?
5. How many sightings did we have for Washington state in the year 2000?

Answers:
1. 5021
2. Washington (601)
3. Summer
4. 2004, 2005, 2006, 2003, 2000, 2007, 2008, 2012, 2001, 2011
5. 28

In [8]:
user_question ="""If 1 row in the dataset is 1 sighting, what's our total sightings
"""

# Add a user question to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content=PROMPT_SUFFIX + user_question
)

# Run the thread
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Looping until the run completes or fails
while run.status in ['queued', 'in_progress', 'cancelling']:
  time.sleep(1)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )
if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
elif run.status == 'requires_action':
  # the assistant requires calling some functions
  # and submit the tool outputs back to the run
  pass
else:
  print(run.status)

# Print the assistant response
content_block = messages.data[0].content[0]
value = content_block.text.value
print(value)


SyncCursorPage[Message](data=[Message(id='msg_hTNTpBlEDW7OAeS5a4oxrKCG', assistant_id='asst_RwKsbjgYuiWwVY504iQbSFKQ', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value='Both methods yield the same result, confirming the total number of bigfoot sightings in the dataset.\n\n### Final Answer:\n\nThe total number of bigfoot sightings in the dataset is **5021**.\n\n#### Explanation:\n- The dataset was loaded and examined to understand its structure, where each row represents a single sighting.\n- The total number of rows in the dataset was calculated using two methods:\n  1. Using `shape[0]` to get the total number of rows.\n  2. Using `len(index)` to get the total number of rows.\n\nBoth methods provided the same result of 5021 sightings.\n\n```python\n# Method 1: Using shape[0]\ntotal_sightings = bigfoot_sightings.shape[0]\n\n# Method 2: Using len(index)\ntotal_sightings_verification = len(bigfoot_sightings.index)\n```\n'), type='text')], create

In [9]:
user_question ="""Which state has the most sightings? Provide the number of sightings in that state
"""

# Add a user question to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content=PROMPT_SUFFIX + user_question
)

# Run the thread
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Looping until the run completes or fails
while run.status in ['queued', 'in_progress', 'cancelling']:
  time.sleep(1)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )

if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
elif run.status == 'requires_action':
  # the assistant requires calling some functions
  # and submit the tool outputs back to the run
  pass
else:
  print(run.status)


content_block = messages.data[0].content[0]
value = content_block.text.value
print(value)


SyncCursorPage[Message](data=[Message(id='msg_XSSp7FljwKfUJiFh3y8TlAAA', assistant_id='asst_RwKsbjgYuiWwVY504iQbSFKQ', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Both methods yield the same result, confirming the state with the most sightings and the number of sightings in that state.\n\n### Final Answer:\n\nThe state with the most sightings is **Washington** with **601** sightings.\n\n#### Explanation:\n- The dataset was analyzed to count the number of sightings in each state.\n- Two methods were used to ensure consistency:\n  1. Using the `value_counts` method to get the count of sightings in each state and identify the state with the highest count.\n  2. Using the `groupby` method to group the data by state, count the number of sightings in each state, and identify the state with the highest count.\n\nBoth methods provided the same result of Washington with 601 sightings.\n\n```python\n# Method 1: Using value_counts\nstate_sightings

In [10]:
user_question ="""Which season has the most sightings?
"""

# Add a user question to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content=PROMPT_SUFFIX + user_question
)

# Run the thread
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Looping until the run completes or fails
while run.status in ['queued', 'in_progress', 'cancelling']:
  time.sleep(1)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )

if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
elif run.status == 'requires_action':
  # the assistant requires calling some functions
  # and submit the tool outputs back to the run
  pass
else:
  print(run.status)


content_block = messages.data[0].content[0]
value = content_block.text.value
print(value)


SyncCursorPage[Message](data=[Message(id='msg_HgL573vcdJTlf8ySNLvIelV4', assistant_id='asst_RwKsbjgYuiWwVY504iQbSFKQ', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Both methods yield the same result, confirming the season with the most sightings and the number of sightings in that season.\n\n### Final Answer:\n\nThe season with the most sightings is **Summer** with **1867** sightings.\n\n#### Explanation:\n- The dataset was analyzed to count the number of sightings in each season.\n- Two methods were used to ensure consistency:\n  1. Using the `value_counts` method to get the count of sightings in each season and identify the season with the highest count.\n  2. Using the `groupby` method to group the data by season, count the number of sightings in each season, and identify the season with the highest count.\n\nBoth methods provided the same result of Summer with 1867 sightings.\n\n```python\n# Method 1: Using value_counts\nseason_sight

In [12]:
user_question ="""What are the top 10 years with the most sightings? Use the date column and get the year there.
"""

# Add a user question to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content=PROMPT_SUFFIX + user_question
)

# Run the thread
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Looping until the run completes or fails
while run.status in ['queued', 'in_progress', 'cancelling']:
  time.sleep(1)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )

if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
elif run.status == 'requires_action':
  # the assistant requires calling some functions
  # and submit the tool outputs back to the run
  pass
else:
  print(run.status)


content_block = messages.data[0].content[0]
value = content_block.text.value
print(value)


SyncCursorPage[Message](data=[Message(id='msg_bokKOTopjZ0xfs1lfq2b0im9', assistant_id='asst_RwKsbjgYuiWwVY504iQbSFKQ', attachments=[], completed_at=None, content=[TextContentBlock(text=Text(annotations=[], value="Both methods yield the same result, confirming the top 10 years with the most sightings and their respective counts.\n\n### Final Answer:\n\nThe top 10 years with the most sightings are as follows:\n\n| Year  | Sightings |\n|-------|-----------|\n| 2004  | 241       |\n| 2005  | 209       |\n| 2006  | 176       |\n| 2003  | 152       |\n| 2000  | 146       |\n| 2007  | 145       |\n| 2008  | 141       |\n| 2012  | 141       |\n| 2001  | 131       |\n| 2011  | 121       |\n\n#### Explanation:\n- The dataset's `date` column was converted to `datetime` format, and the year was extracted.\n- Two methods were used to ensure consistency:\n  1. Using the `value_counts` method to count the occurrences of each year and identify the top 10 years with the most sightings.\n  2. Using the 

In [5]:
user_question ="""How many sightings did we have for Washington state in the year 2000?
"""

# Add a user question to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content=PROMPT_SUFFIX + user_question
)

# Run the thread
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Looping until the run completes or fails
while run.status in ['queued', 'in_progress', 'cancelling']:
  time.sleep(1)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )

if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
elif run.status == 'requires_action':
  # the assistant requires calling some functions
  # and submit the tool outputs back to the run
  pass
else:
  print(run.status)


content_block = messages.data[0].content[0]
value = content_block.text.value
print(value)


failed
### Mt. Mitchell Sighting in the Dataset

There is one recorded Bigfoot sighting in the dataset which mentions Mt. Mitchell. Here are the details of that sighting:

- **Observed:** "In early spring 1988, some friends of mine and I were hiking around Mt. Mitchell. We noticed deep impressions in the snow..."
- **County:** Yancey County
- **State:** North Carolina
- **Season:** Spring
- **Date:** March 15, 1988
- **Classification:** Class B

### Explanation:
To find out if there are any Bigfoot sightings observed at Mt. Mitchell, I checked for the occurrence of "Mt. Mitchell" in the "observed" column of the dataset. The steps taken are as follows:

1. Loaded the data and inspected the columns to understand its structure.
2. Filtered the dataset to locate rows where the "observed" column contains the text "Mt. Mitchell".
3. Verified the count and details of the sightings mentioning Mt. Mitchell.

Here is the Python code used:

```python
import pandas as pd

# Load the dataframe from

In [None]:
user_question ="""Using the "observed" column, are there any sightings observed at Mt. Mitchell?
"""

# Add a user question to the thread
message = client.beta.threads.messages.create(
  thread_id=thread.id,
  role="user",
  content=PROMPT_SUFFIX + user_question
)

# Run the thread
run = client.beta.threads.runs.create(
  thread_id=thread.id,
  assistant_id=assistant.id
)

# Looping until the run completes or fails
while run.status in ['queued', 'in_progress', 'cancelling']:
  time.sleep(1)
  run = client.beta.threads.runs.retrieve(
    thread_id=thread.id,
    run_id=run.id
  )
if run.status == 'completed':
  messages = client.beta.threads.messages.list(
    thread_id=thread.id
  )
  print(messages)
elif run.status == 'requires_action':
  # the assistant requires calling some functions
  # and submit the tool outputs back to the run
  pass
else:
  print(run.status)

# Print the assistant response
content_block = messages.data[0].content[0]
value = content_block.text.value
print(value)


# Delete Assistant

In [7]:
response = client.beta.assistants.delete(assistant.id)