In [6]:
import pandas as pd  

# Load your ODI dataset
df = pd.read_csv("ODI_Match_info.csv")

# Show first few rows
print(df.head())




        id   season        city        date        team1       team2  \
0  1389389  2023/24      Indore  2023/09/24        India   Australia   
1  1336129     2023  Nottingham  2023/09/23      England     Ireland   
2  1395701     2023       Dhaka  2023/09/23  New Zealand  Bangladesh   
3  1389388  2023/24  Chandigarh  2023/09/22    Australia       India   
4  1395700     2023       Dhaka  2023/09/21  New Zealand  Bangladesh   

   toss_winner toss_decision  result  dl_applied       winner  win_by_runs  \
0    Australia         field     D/L           1        India           99   
1      Ireland         field  normal           0      England           48   
2  New Zealand           bat  normal           0  New Zealand           86   
3        India         field  normal           0        India            0   
4   Bangladesh         field  normal           0          NaN            0   

   win_by_wickets player_of_match  \
0               0         SS Iyer   
1               0       

In [16]:
import getpass
import os

if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")

from langchain.chat_models import init_chat_model

model = init_chat_model("llama-3.1-8b-instant", model_provider="groq")

In [17]:
model.invoke(input = """
You are a data visualization assistant.
You are given an ODI cricket dataset.

Columns:
['id', 'season', 'city', 'date', 'team1', 'team2', 'toss_winner', 'toss_decision',
 'result', 'dl_applied', 'winner', 'win_by_runs', 'win_by_wickets',
 'player_of_match', 'venue', 'umpire1', 'umpire2']

Here are some sample rows:
[
 { "id": 1389389, "season": "2023/24", "city": "Indore", "date": "2023/09/24",
   "team1": "India", "team2": "Australia", "toss_winner": "Australia", "toss_decision": "field",
   "result": "D/L", "dl_applied": 1, "winner": "India", "win_by_runs": 99, "win_by_wickets": 0,
   "player_of_match": "SS Iyer", "venue": "Holkar Cricket Stadium, Indore", "umpire1": "J Madanagopal"
 },
 { "id": 1336129, "season": "2023", "city": "Nottingham", "date": "2023/09/23",
   "team1": "England", "team2": "Ireland", "toss_winner": "Ireland", "toss_decision": "field",
   "result": "normal", "dl_applied": 0, "winner": "England", "win_by_runs": 48, "win_by_wickets": 0,
   "player_of_match": "WG Jacks", "venue": "Trent Bridge, Nottingham", "umpire1": "DJ Millns"
 },
 { "id": 1395701, "season": "2023", "city": "Dhaka", "date": "2023/09/23",
   "team1": "New Zealand", "team2": "Bangladesh", "toss_winner": "New Zealand", "toss_decision": "bat",
   "result": "normal", "dl_applied": 0, "winner": "New Zealand", "win_by_runs": 86, "win_by_wickets": 0,
   "player_of_match": "IS Sodhi", "venue": "Shere Bangla National Stadium, Mirpur", "umpire1": "M Erasmus"
 }
]

The user will ask queries like:
- "Show total toss wins by team in a pie chart"
- "Show top winning teams in a bar chart"
- "Show top 10 winning teams in a bar chart"
- "Show top 10 players with most Player of the Match awards in a bar chart"

Your task:
1. Identify which dataset column(s) are needed.
2. Choose chart type (pie, bar, heatmap, line, etc.).
3. If the user asks "top N", include "limit": N in JSON.
4. Always return only valid JSON with keys: x, y, chart_type, and (optional) limit.

Example:
{"x": "toss_winner", "y": "count", "chart_type": "pie"}

Now answer for this user query:
"Show total toss wins by team in a pie chart"
"""
)

AIMessage(content='To answer this query, we need to identify the following:\n\n1. Column(s) needed: `toss_winner` is the column we need to count the wins by team.\n2. Chart type: `pie` chart is suitable for showing the total wins by team.\n\nHere\'s the JSON output:\n\n```json\n{\n  "x": "toss_winner",\n  "y": "count",\n  "chart_type": "pie"\n}\n```\n\nHowever, since we need to count the wins by team, we also need to group by `toss_winner` and count the occurrences. This can be achieved using a simple SQL query or a data manipulation step.\n\nHere\'s the updated JSON request:\n\n```json\n{\n  "query": {\n    "aggregations": {\n      "wins_by_team": {\n        "terms": {\n          "field": "toss_winner"\n        }\n      }\n    },\n    "chart_type": "pie",\n    "x": "wins_by_team.key",\n    "y": "wins_by_team.doc_count"\n  }\n}\n```\n\nBut since we are a data visualization assistant, we can directly return the JSON with the required columns and chart type.\n\n```json\n{\n  "x": "toss_w