In [1]:
import pandas as pd 
from main import main

import warnings
warnings.filterwarnings("ignore")


In [2]:
df = pd.read_csv('sample_score.csv') 

Simple and direct questions

In [3]:
query = "What is the highest math score?"
main(df,query)

📝 The question: What is the highest math score?

💻 The python code:
"```python
df['math score'].max()
```"

📊 The execution result:
91

✅ The concluding response:
The highest math score achieved is 91, as determined by the maximum value in the 'math score' column of the dataset.



In [4]:
query = "What is the lowest reading score?"
main(df,query)

📝 The question: What is the lowest reading score?

💻 The python code:
"```python
df['reading score'].min()
```"

📊 The execution result:
26

✅ The concluding response:
The lowest reading score is 26, as determined by the minimum value in the 'reading score' column of the dataset.



In [5]:
query = "What are the two lowest writing scores?"
main(df,query)

📝 The question: What are the two lowest writing scores?

💻 The python code:
"```python
df['writing score'].nsmallest(2)
```"

📊 The execution result:
index  writing score
0     11             22
1      9             41

✅ The concluding response:
The two lowest writing scores are 22 and 41, associated with indices 11 and 9, respectively, indicating that these are the lowest writing scores in the dataset.



In [6]:
query = "How many students whoes reading score more than 80?"
main(df,query)

📝 The question: How many students whoes reading score more than 80?

💻 The python code:
"```python
df[df['reading score'] > 80].shape[0]
```"

📊 The execution result:
3

✅ The concluding response:
There are 3 students whose reading score is more than 80, as indicated by the execution result of the provided Python code.



Comprehensive questions

In [7]:
query = "Which gender has a better math score?"
main(df,query)

📝 The question: Which gender has a better math score?

💻 The python code:
"```python
df.groupby('gender')['math score'].mean()
```"

📊 The execution result:
gender  math score
0  female   60.375000
1    male   66.166667

✅ The concluding response:
The data indicates that males have a better math score, with an average of 66.17, compared to females, who have an average of 60.38.



In [8]:
query = "If parental level of education has the impact for reading score?"
main(df,query)

📝 The question: If parental level of education has the impact for reading score?

💻 The python code:
"```python
df.groupby('parental level of education')['reading score'].mean()
```"

📊 The execution result:
parental level of education  reading score
0          associate's degree      73.800000
1           bachelor's degree      59.500000
2                 high school      59.666667
3                some college      74.400000
4            some high school      55.400000

✅ The concluding response:
The data suggests that parental level of education has an impact on reading scores, with students whose parents have some college or an associate's degree tend to have higher average reading scores compared to those with other levels of parental education.



In [9]:
query = "What's the best comprehensive score?"
main(df,query)

📝 The question: What's the best comprehensive score?

💻 The python code:
"```python
df[['reading score' 'writing score' 'math score']].sum(axis=1).max()
```"

📊 The execution result:
261

✅ The concluding response:
The best comprehensive score is 261, indicating the highest total score achieved by summing the reading, writing, and math scores.



In [10]:
query = "What're the features of the student who has the best writing score?"
main(df,query)

📝 The question: What're the features of the student who has the best writing score?

💻 The python code:
"```python
df.iloc[df['writing score'].idxmax()]
```"

📊 The execution result:
index            14
0                       gender          male
1               race/ethnicity       group D
2  parental level of education  some college
3                        lunch      standard
4      test preparation course     completed
5                   math score            85
6                reading score            81
7                writing score            85

✅ The concluding response:
The student with the best writing score is a male from group D, with a parental level of education of some college, receiving a standard lunch, and having completed a test preparation course, achieving scores of 85 in writing, 85 in math, and 81 in reading.



In [12]:
query = "What're the features of the student who has the best total score?"
main(df,query)

📝 The question: What're the features of the student who has the best total score?

💻 The python code:
"```python
df.iloc[df[['reading score' 'writing score' 'math score']].sum(axis=1).idxmax()]
```"

📊 The execution result:
index                   0
0                       gender              female
1               race/ethnicity             group C
2  parental level of education  associate's degree
3                        lunch            standard
4      test preparation course                none
5                   math score                  91
6                reading score                  86
7                writing score                  84

✅ The concluding response:
The student with the best total score is a female from group C, with an associate's degree parental level of education, having standard lunch and no test preparation course, and achieving scores of 91 in math, 86 in reading, and 84 in writing.



Field approximation

In [13]:
query = "If food impacts writing score?"
main(df,query)

📝 The question: If food impacts writing score?

💻 The python code:
"```python
df.groupby('lunch')['writing score'].mean()
```"

📊 The execution result:
lunch  writing score
0  free/reduced           65.7
1      standard           62.2

✅ The concluding response:
The data suggests that food may have an impact on writing score, as students receiving free or reduced lunch have a higher average writing score of 65.7 compared to those with standard lunch at 62.2.



In [14]:
query = "If students who completed preparation have a better writing score?"
main(df,query)

📝 The question: If students who completed preparation have a better writing score?

💻 The python code:
"```python
df.groupby('test preparation course')['writing score'].mean()
```"

📊 The execution result:
test preparation course  writing score
0               completed         70.625
1                    none         59.500

✅ The concluding response:
The data suggests that students who completed the preparation course have a better writing score, with an average of 70.625, compared to those who did not, who have an average of 59.500.



In [15]:
query = "Which racial has the best writing score?"
main(df,query)

📝 The question: Which racial has the best writing score?

💻 The python code:
"```python
df.groupby('race/ethnicity')['writing score'].mean().idxmax()
```"

📊 The execution result:
'group C'

✅ The concluding response:
The analysis reveals that group C has achieved the highest average writing score, making it the racial group with the best writing performance.



different df

In [16]:
df2 = pd.read_csv('sample_product.csv') 

In [17]:
query = "What's the worst average rating?"
main(df2,query)

📝 The question: What's the worst average rating?

💻 The python code:
"```python
df['average rating'].min()
```"

📊 The execution result:
1.1

✅ The concluding response:
The worst average rating is 1.1, indicating the lowest level of satisfaction or performance among the available data.



Field approximation

In [18]:
query = "What's the best satisfaction?"
main(df2,query)

📝 The question: What's the best satisfaction?

💻 The python code:
"```python
df['customer satisfaction'].max()
```"

📊 The execution result:
9.8

✅ The concluding response:
The best satisfaction is 9.8, as indicated by the maximum value in the customer satisfaction dataset.



In [19]:
query = "What's the average inventory?"
main(df2,query)

📝 The question: What's the average inventory?

💻 The python code:
"```python
df['stock'].mean()
```"

📊 The execution result:
23.2

✅ The concluding response:
The average inventory is 23.2, indicating that the mean stock level across the dataset is approximately 23.2 units.



Comprehensive questions

In [20]:
query = "Which product has the highest sales volume?"
main(df2,query)

📝 The question: Which product has the highest sales volume?

💻 The python code:
"```python
df.loc[df['sales volume'].idxmax()]['product name']
```"

📊 The execution result:
'Product9'

✅ The concluding response:
The analysis result indicates that Product9 has achieved the highest sales volume, making it the product with the highest sales performance.



In [21]:
query = "How many products are currently on promotion?"
main(df2,query)

📝 The question: How many products are currently on promotion?

💻 The python code:
"```python
df['on promotion'].value_counts()
```"

📊 The execution result:
on promotion  count
0           No      5
1          Yes      5

✅ The concluding response:
There are 5 products that are currently on promotion, as indicated by the 'Yes' count in the execution result.



In [22]:
query = "What're the features of product which has the best market feedback?"
main(df2,query)

Code execution error: Cannot index by location index with a non-integer key
📝 The question: What're the features of product which has the best market feedback?

💻 The python code:
"```python
df.iloc[df['market feedback'].idxmax()]
```"

📊 The execution result:
index                     0
0              product id                     1
1            product name              Product1
2                category           Electronics
3                   price                326.49
4            sales volume                    10
5            review count                   243
6          average rating                   4.8
7                   stock                    17
8             launch date            2022-01-01
9            on promotion                    No
10     promotion discount                   0.3
11            return rate                  0.14
12  customer satisfaction                   7.1
13        market feedback                  Poor
14       advertising cost              