In [5]:
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain.tools.tavily_search import TavilySearchResults
from langchain.tools import tool
from collections import defaultdict
from dotenv import load_dotenv
from typing import List, Dict, Optional, Union
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from bs4 import BeautifulSoup
from langchain.tools import DuckDuckGoSearchRun
import difflib
import time
import requests
import re
import os


In [55]:
load_dotenv()
LLM = ChatOpenAI(model = "gpt-4.1")
search = TavilySearchResults() # will be used to search the web

search_2 = DuckDuckGoSearchRun()

In [56]:
print(search_2.invoke({"query": "how had been mumbai indians bowling this year IPL2025"}))

The disciplined bowling attack from Mumbai Indians led to regular wickets, preventing Lucknow from gaining momentum. Ultimately, Lucknow Super Giants were bowled out for 161 runs in 20 overs. Among Mumbai's bowlers, Jasprit Bumrah delivered a standout performance, taking 4 crucial wickets for just 22 runs. 2. Strong Bowling Attack with Key Wicket-Takers. Mumbai Indians' bowling unit has been effective, led by Jasprit Bumrah and Trent Boult. Bumrah's death bowling expertise and ability to pick wickets at crucial moments make him invaluable. Boult has been consistent with 19 wickets in 14 matches, providing early breakthroughs. In IPL 2025, explosive batting has been met with a strategic bowling response. ... the tournament's two most aggressive batting units — Sunrisers Hyderabad and Kolkata Knight Riders — had reached the final. Despite IPL 2025 almost matching last season's scoring rates, the better bowling attacks are dominating this year. Mumbai Indians ... The 12-run win over Delhi

In [5]:
@tool
def get_ipl_series_id_dict() -> dict[str, str]:
    """Returns a dictionary mapping IPL season names to their Cricbuzz series IDs."""
    year_to_id = {
        "2008": "2058",
        "2009": "2059",
        "2010": "2060",
        "2011": "2037",
        "2012": "2115",
        "2013": "2170",
        "2014": "2261",
        "2015": "2330",
        "2016": "2430",
        "2017": "2568",
        "2018": "2676",
        "2019": "2810",
        "2020": "3130",
        "2021": "3472",
        "2022": "4061",
        "2023": "5945",
        "2024": "7607",
        "2025": "9237"
    }

    return {f"Indian Premier League {year}": series_id for year, series_id in year_to_id.items()}


In [4]:
IPL_IDs_dict = get_ipl_series_id_dict.invoke({})

In [5]:
print(search.invoke({"query": "is virat kohli available for today's ipl match vs LSG"}))

[{'title': 'LSG vs RCB, IPL 2025 Highlights: RCB book a spot in Qualifier 1 as ...', 'url': 'https://www.firstpost.com/firstcricket/lsg-vs-rcb-ipl-today-match-live-score-lucknow-super-giants-vs-royal-challengers-bangalore-scorecard-updates-ekana-cricket-stadium-lucknow-liveblog-13892206.html', 'content': "Today IPL Match LIVE, LSG vs RCB: A much better over for LSG as Rathi concedes 8 in his first over\n\nToday IPL Match LIVE, LSG vs RCB: Kohli smashes four consecutive fours as O'Rourke begins with a 22-run over!\n\nToday IPL Match LIVE, LSG vs RCB: FOUR! What a way for Phil Salt to get RCB's chase started!\n\nToday IPL Match LIVE, LSG vs RCB: Virat Kohli and Phil Salt make their way to the centre to begin RCB's chase of the daunting 228-run target [...] Today IPL Match LIVE, LSG vs RCB: Kohli keeps RCB going in big chase after LSG strike early\n\nToday IPL Match LIVE, LSG vs RCB: Akash Deep provides breakthrough, dismisses Phil Salt\n\nToday IPL Match LIVE, LSG vs RCB: Virat Kohli com

In [74]:
def fetch_stats(url: str, headers: dict) -> dict:
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
       return {}
    return response.json()

In [128]:
# TOOLS
@tool
def match_info():
    """chghmhnfgb"""
    url = "https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/matches/upcoming"
    headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM', # API KEY
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '1943a818-98e9-48ea-8d1c-1554e116ef44'
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise Exception(f"API request failed: {response.status_code}")
    
    data = response.json()
    ipl_match_list = []

    for type_match in data.get("typeMatches", []):
        for series_match in type_match.get("seriesMatches", []):
            series = series_match.get("seriesAdWrapper", {})
            if "Indian Premier League" in series.get("seriesName", ""):
                for match in series.get("matches", []):
                    match_info = match.get("matchInfo", {})
                    match_id = match_info.get("matchId")
                    match_desc = match_info.get("matchDesc")
                    match_status = match_info.get("status")
                    team1 = match_info.get("team1", {}).get("teamName", "Team 1")
                    team2 = match_info.get("team2", {}).get("teamName", "Team 2")
                    venue = match_info.get("venueInfo", {})
                    venue_id = venue.get("id", "Unknown ID")
                    ground = venue.get("ground", "Unknown Ground")
                    city = venue.get("city", "Unknown City")

                    ipl_match_list.append({
                        "Match ID": match_id,
                        "Match Desc": match_desc,
                        "Teams": f"{team1} vs {team2}",
                        "Status": match_status,
                        "Venue ID": venue_id,
                        "Venue": f"{ground}, {city}"
                    })
    
    return ipl_match_list


@tool
def additional_info(match_id: str) -> str: # about pitch, probable players, injuries about ground,
    """zxcv"""
    url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/match/{match_id}/commentary"
    headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '8cb69a0f-bcaa-45b5-a016-229a2e7594f6'
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise Exception(f"Failed to get commentary: {response.status_code}")

    data = response.json()
    full_text = ""

    # Concatenate all commText entries
    for item in data.get("commentaryList", []):
        full_text += item.get("commText", "") + " "

    # Remove ALL markers like B0$, B1$, B14$ (anywhere in the text)
    cleaned = re.sub(r'\s*B\d+\$', '', full_text)
    
    # Remove escaped newlines and excess spaces
    cleaned = cleaned.replace("\\n", " ")
    cleaned = re.sub(r'\s+', ' ', cleaned).strip()

    return cleaned

In [None]:
# 1. Researcher Agent: gathers match details (teams, date, venue, weather, pitch, odds).
research_agent = create_react_agent(
    model = LLM,
    name = "researcher",
    tools = [search, match_info, additional_info],
    prompt = (
        "You are an agent which is phenomenal at maths. Answer the query of the user to the best you can."
    )
)

In [30]:
inputs = {"messages": [{"role": "user", "content": "what is the output when we add, multiply, divide, and find diff of 2 and 20"}]}
result = research_agent.invoke(inputs)
for r in result['messages']:
    print(r)
print(result["messages"][-1].content)

content='what is the output when we add, multiply, divide, and find diff of 2 and 20' additional_kwargs={} response_metadata={} id='24d471ae-1813-4530-b5a5-b29aaffc4df6'
content='' additional_kwargs={'tool_calls': [{'id': 'call_dZcP5Zhc0ooHxC62rwmKRCf7', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'add_num'}, 'type': 'function'}, {'id': 'call_JYMf0QZGr1BdIJvztkDsmOqP', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'multiply_num'}, 'type': 'function'}, {'id': 'call_w9u1r8iEMUuLWR9Npq1oUO2J', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'div_num'}, 'type': 'function'}, {'id': 'call_RyoPERFckjxjzj7yyiOp4plB', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'diff_num'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 88, 'prompt_tokens': 174, 'total_tokens': 262, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt

In [91]:
# TOOLS -> DATA COLLECTOR AGENT
@tool
def player_details(player_names: List[str]) -> List[dict]:
    """This tool returns the player ID and name for a list of player names.
        It takes a list containing names of players as input and output a list 
        of dict with each dict containing the player id and name of that player."""
    
    id_headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': 'b0242771-45ea-4c07-be42-a6da38cdec41'
    }

    results = []

    for name in player_names:
        id_url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/browse/player?search={name.replace(' ', '+')}"
        data = fetch_stats(id_url, id_headers)
        players = data.get("player", [])
        if not players:
            results.append({"player name": name, "error": "No player found"})
            continue

        player_id = players[0].get("id")
        player_name = players[0].get("name")
        is_overseas = True
        if players[0].get("teamName") == "India":
            is_overseas = False

        if not player_id:
            results.append({"player name": name, "error": "Player ID not found"})
            continue

        role_url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/browse/player/{player_id}"
        role_headers = {
            'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
            'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
            'x-apihub-endpoint': 'a055bf38-0796-4fab-8fe3-6f042f04cdba'
        }
        info = fetch_stats(role_url, role_headers)
        player_role = info.get("role", "Unknown").lower()
        
        is_wicketkeeper = "wk" in player_role

        batting_style = None
        bowling_style = None
        if "batsman" in player_role:
            batting_style = info.get("bat", "Unknown")
        elif "bowler" in player_role:
            bowling_style = info.get("bowl", "Unknown")
        else:
            batting_style = info.get("bat", "Unknown")
            bowling_style = info.get("bowl", "Unknown")

        results.append({
            "name": player_name,
            "role": player_role,
            "is_wicketkeeper": is_wicketkeeper,
            "is_overseas": is_overseas,
            "batting_style": batting_style,
            "bowling_style": bowling_style
        })

    return results

In [None]:
print(player_details.invoke({"player_names": ["virat kohli", "hardik pandya", "jasprit bumrah", "pant", "rickelton"]}))

[{'name': 'Virat Kohli', 'role': 'batsman', 'is_wicketkeeper': False, 'is_overseas': False, 'batting_style': 'Right Handed Bat', 'bowling_style': None}, {'name': 'Hardik Pandya', 'role': 'batting allrounder', 'is_wicketkeeper': False, 'is_overseas': False, 'batting_style': 'Right Handed Bat', 'bowling_style': 'Right-arm fast-medium'}, {'name': 'Jasprit Bumrah', 'role': 'bowler', 'is_wicketkeeper': False, 'is_overseas': False, 'batting_style': None, 'bowling_style': 'Right-arm fast'}, {'name': 'Rishabh Pant', 'role': 'wk-batsman', 'is_wicketkeeper': True, 'is_overseas': False, 'batting_style': 'Left Handed Bat', 'bowling_style': None}, {'name': 'Ryan Rickelton', 'role': 'wk-batsman', 'is_wicketkeeper': True, 'is_overseas': True, 'batting_style': 'Left Handed Bat', 'bowling_style': None}]


In [156]:
@tool
def player_ipl_stats(player_ids: List[str]) -> List[dict]: # Overall ipl stats
    """It fetches IPL batting and bowling stats for a list of player IDs.
    It returns stats based on player role:
    - Batter: only batting stats
    - Bowler: only bowling stats
    - All-rounder: both
    """

    def fetch_stats(url: str, headers: dict) -> dict:
        response = requests.get(url, headers=headers)
        if response.status_code != 200:
            return {}
        return response.json()

    def extract_ipl_stats(data: dict) -> dict:
        headers_list = data.get("headers", [])
        values_list = data.get("values", [])

        try:
            ipl_index = headers_list.index("IPL")
        except ValueError:
            return {}

        stats = {}
        for item in values_list:
            row = item.get("values", [])
            if len(row) > ipl_index:
                stat_name = row[0]
                stat_value = row[ipl_index]
                stats[stat_name] = stat_value
        return stats

    results = []

    for player_id in player_ids:
        # Fetch role info first
        role_url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/browse/player/{player_id}"
        role_headers = {
            'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
            'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
            'x-apihub-endpoint': 'a055bf38-0796-4fab-8fe3-6f042f04cdba'
        }
        role_info = fetch_stats(role_url, role_headers)
        player_role = role_info.get("role", "Unknown").lower()

        name_url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/browse/player/{player_id}"

        name_headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': 'a055bf38-0796-4fab-8fe3-6f042f04cdba'
        }
        name_info = fetch_stats(name_url, name_headers)
        player_name = name_info.get("name", "Unknown")       

        player_result = {
            "Name": player_name,
            "ID": player_id,
            "Role": player_role.title(),
        }

        # Fetch batting if applicable
        if "batsman" in player_role or "allrounder" in player_role:
            batting_url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/browse/player/{player_id}/batting"
            batting_headers = {
                'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
                'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
                'x-apihub-endpoint': '07a4d9b5-092e-4035-adc7-253bc3532a81'
            }
            batting_data = fetch_stats(batting_url, batting_headers)
            ipl_batting_stats = extract_ipl_stats(batting_data)
            player_result["Batting Stats"] = ipl_batting_stats

        # Fetch bowling if applicable
        if "bowler" in player_role or "allrounder" in player_role:
            bowling_url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/browse/player/{player_id}/bowling"
            bowling_headers = {
                'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
                'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
                'x-apihub-endpoint': '5ba067de-b9a5-446f-916b-9dfbef717211'
            }
            bowling_data = fetch_stats(bowling_url, bowling_headers)
            ipl_bowling_stats = extract_ipl_stats(bowling_data)
            player_result["Bowling Stats"] = ipl_bowling_stats

        results.append(player_result)

    return results


In [157]:

ipl_stats = player_ipl_stats({"player_ids": ["576", "7909", "9647"]})  # innings, affregate functions, balls
for player in ipl_stats:
    print(player)

{'Name': 'Rohit Sharma', 'ID': '576', 'Role': 'Batsman', 'Batting Stats': {'Matches': '270', 'Innings': '265', 'Runs': '6957', 'Balls': '5277', 'Highest': '109', 'Average': '29.6', 'SR': '131.84', 'Not Out': '30', 'Fours': '630', 'Sixes': '298', 'Ducks': '18', '50s': '46', '100s': '2', '200s': '0', '300s': '0', '400s': '0'}}
{'Name': 'Mohammed Shami', 'ID': '7909', 'Role': 'Bowler', 'Bowling Stats': {'Matches': '119', 'Innings': '119', 'Balls': '2606', 'Runs': '3748', 'Maidens': '3', 'Wickets': '133', 'Avg': '28.18', 'Eco': '8.63', 'SR': '19.59', 'BBI': '4/11', 'BBM': '4/11', '4w': '2', '5w': '0', '10w': '0'}}
{'Name': 'Hardik Pandya', 'ID': '9647', 'Role': 'Batting Allrounder', 'Batting Stats': {'Matches': '150', 'Innings': '138', 'Runs': '2712', 'Balls': '1849', 'Highest': '91', 'Average': '28.25', 'SR': '146.68', 'Not Out': '42', 'Fours': '206', 'Sixes': '145', 'Ducks': '6', '50s': '10', '100s': '0', '200s': '0', '300s': '0', '400s': '0'}, 'Bowling Stats': {'Matches': '150', 'Inning

In [158]:
@tool
def get_team_id(series_id: str) -> List[dict]:
    """dcftv"""
    url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/series/{series_id}/squads"

    headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '038d223b-aca5-4096-8eb1-184dd0c09513'
    }

    response = requests.get(url, headers=headers)
    data = response.json()

    # Extract relevant team info from squads
    team_list = []
    for squad in data.get("squads", []):
        if "teamId" in squad and "squadType" in squad:
            team_list.append({
                "teamName": squad["squadType"],
                "teamId": squad["teamId"]
            })

    return team_list 

In [159]:
@tool
def get_recent_match_ids(series_id: str, team_ids: List[str]) -> List[dict]:
    """Get up to 5 most recent match IDs for a team in a given series (IPL season)."""

    url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/series/{series_id}"
    headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '661c6b89-b558-41fa-9553-d0aca64fcb6f'
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise Exception(f"Failed to fetch series data: {response.status_code}")
    
    # Convert team_ids to integers for correct comparison
    team_ids = [int(team_id) for team_id in team_ids]

    data = response.json()
    match_entries = data.get("matchDetails", [])
    
    # Dictionary to collect matches for each team
    team_matches = {team_id: [] for team_id in team_ids}

    for entry in match_entries:
        day_matches = entry.get("matchDetailsMap", {}).get("match", [])
        for match in day_matches:
            info = match.get("matchInfo", {})
            if info.get("state") != "Complete":
                continue

            t1 = info.get("team1", {}).get("teamId")
            t2 = info.get("team2", {}).get("teamId")
            match_id = info.get("matchId")
            start_date = int(info.get("startDate", 0))

            for team_id in team_ids:
                if team_id == t1 or team_id == t2:
                    team_matches[team_id].append({
                        "matchId": match_id,
                        "startDate": start_date
                    })

    # Format result: sort each list by date and pick top 5
    result = []
    for team_id in team_ids:
        sorted_matches = sorted(team_matches[team_id], key=lambda x: x["startDate"], reverse=True)
        recent_ids = [m["matchId"] for m in sorted_matches[:5]]
        result.append({
            "teamId": team_id,
            "matchIds": recent_ids
        })

    return result

In [None]:
@tool
def get_selected_player_stats(player_ids: List[str], match_ids: List[str]) -> List[dict]:
    """
    Fetch and aggregate batting and bowling stats for a given list of player IDs across multiple matches.
    Player roles are fetched individually. Only players in the input list are processed.
    """
    player_stats = defaultdict(lambda: {
        'player_name': '',
        'player_id': 0,
        'role': None,
        'is_wicketkeeper': False,
        'is_overseas': False,
        'Batting Stats': {
            'runs': 0,
            'balls': 0,
            '4s': 0,
            '6s': 0,
            'strike_rate': 0,
            'average': 0,
            'innings': 0,
            'notouts': 0,
        },
        'Bowling Stats': {
            'runs_conceded': 0,
            'balls_bowled': 0,
            'dot_balls': 0,
            'wicket': 0,
            'economy': 0,
        }
    })

    def fetch_json(url: str, headers: dict) -> dict:
        response = requests.get(url, headers=headers)
        return response.json() if response.status_code == 200 else {}

    scorecard_headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '5f260335-c228-4005-9eec-318200ca48d6'
    }

    for match_id in match_ids:
        url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/match/{match_id}/scorecard"
        data = fetch_json(url, scorecard_headers)
        scorecards = data.get("scoreCard", [])

        for innings in scorecards:
            # Batting stats
            for batsman in innings.get("batTeamDetails", {}).get("batsmenData", []):
                pid = str(batsman["batId"])
                if pid not in player_ids:
                    continue

                player = player_stats[pid]
                player['player_name'] = batsman["batName"]
                player['player_id'] = pid
                player['is_wicketkeeper'] |= batsman.get("isKeeper", False)
                player['is_overseas'] |= batsman.get("isOverseas", False)

                stats = player['Batting Stats']
                stats['runs'] += batsman.get("runs", 0)
                stats['balls'] += batsman.get("balls", 0)
                stats['4s'] += batsman.get("fours", 0)
                stats['6s'] += batsman.get("sixes", 0)
                stats['innings'] += 1
                if batsman.get("outDesc", "").lower() == "not out":
                    stats['notouts'] += 1

            # Bowling stats
            for bowler in innings.get("bowlTeamDetails", {}).get("bowlersData", []):
                pid = str(bowler["bowlerId"])
                if pid not in player_ids:
                    continue

                player = player_stats[pid]
                player['player_name'] = bowler["bowlName"]
                player['player_id'] = pid
                player['is_wicketkeeper'] |= bowler.get("isKeeper", False)
                player['is_overseas'] |= bowler.get("isOverseas", False)

                stats = player['Bowling Stats']
                stats['balls_bowled'] += bowler.get("balls", 0)
                stats['wicket'] += bowler.get("wickets", 0)
                stats['dot_balls'] += bowler.get("dotBalls", 0)
                stats['runs_conceded'] += bowler.get("runs", 0)

    # Final aggregation with role fetch and derived metrics
    final_stats = []
    for pid in player_ids:
        pid = str(pid)
        player = player_stats.get(pid)
        if not player:
            continue

        # Fetch player role
        role_url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/browse/player/{pid}"
        role_headers = {
            'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
            'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
            'x-apihub-endpoint': 'a055bf38-0796-4fab-8fe3-6f042f04cdba'
        }
        role_data = fetch_json(role_url, role_headers)
        role = role_data.get("role", "Unknown").lower()
        player['role'] = role.title()

        # Batting averages
        bat = player['Batting Stats']
        if bat['balls'] > 0:
            bat['strike_rate'] = round((bat['runs'] / bat['balls']) * 100, 2)
        outs = bat['innings'] - bat['notouts']
        if outs > 0:
            bat['average'] = round(bat['runs'] / outs, 2)

        # Bowling economy
        bowl = player['Bowling Stats']
        if bowl['balls_bowled'] > 0:
            overs = bowl['balls_bowled'] / 6
            bowl['economy'] = round(bowl['runs_conceded'] / overs, 2)

        # Final formatted output
        filtered_player = {
            "player_id": player['player_id'],
            "player_name": player['player_name'],
            "role": player['role'],
            "is_wicketkeeper": player['is_wicketkeeper'],
            "is_overseas": player['is_overseas']
        }

        if "batter" in role:
            filtered_player["Batting Stats"] = bat
        elif "bowler" in role:
            filtered_player["Bowling Stats"] = bowl
        elif "allrounder" in role:
            filtered_player["Batting Stats"] = bat
            filtered_player["Bowling Stats"] = bowl
        else:
            # Optional fallback
            filtered_player["Batting Stats"] = bat
            filtered_player["Bowling Stats"] = bowl

        final_stats.append(filtered_player)

    return final_stats


In [None]:

ipl_stats = get_selected_player_stats({"team_id": "971", "match_ids": ["118880", "118862"]})  # innings, affregate functions, balls
for player in ipl_stats:
    print(player)

{'player_id': 10808, 'player_name': 'Mohammed Siraj', 'role': 'Bowler', 'is_wicketkeeper': False, 'is_overseas': False, 'Bowling Stats': {'runs_conceded': 0, 'balls_bowled': 80, 'dot_balls': 0, 'wicket': 0, 'economy': 0.0}}
{'player_id': 18637, 'player_name': 'Arshad Khan', 'role': 'Bowling Allrounder', 'is_wicketkeeper': False, 'is_overseas': False, 'Batting Stats': {'runs': 21, 'balls': 17, '4s': 0, '6s': 3, 'strike_rate': 123.53, 'average': 10.5, 'innings': 2, 'notouts': 0}, 'Bowling Stats': {'runs_conceded': 0, 'balls_bowled': 50, 'dot_balls': 0, 'wicket': 1, 'economy': 0.0}}
{'player_id': 10551, 'player_name': 'Prasidh Krishna', 'role': 'Bowler', 'is_wicketkeeper': False, 'is_overseas': False, 'Bowling Stats': {'runs_conceded': 0, 'balls_bowled': 80, 'dot_balls': 0, 'wicket': 2, 'economy': 0.0}}
{'player_id': 13320, 'player_name': 'Gerald Coetzee', 'role': 'Bowler', 'is_wicketkeeper': False, 'is_overseas': True, 'Bowling Stats': {'runs_conceded': 0, 'balls_bowled': 30, 'dot_balls'

In [None]:
# 2. Data Collector Agent: 
data_collector_agent = create_react_agent(
    model = LLM,
    name = "data_miner",
    tools = [search, player_id_search],
    prompt = (
        "You are an agent which is phenomenal at maths. Answer the query of the user to the best you can."
    )
)

In [None]:
# Player Comparison agent
player_comparator = create_react_agent(
    model = LLM,
    name = "player_faceoff",
    tools = [search, player_id_search],
    prompt = (
        "You are an agent which is phenomenal at maths. Answer the query of the user to the best you can."
    )
)

In [None]:
@tool
def get_match_ids_by_venue(IPL_IDs_dict: Dict[str, str], target_venue: str, player_ids: List[str]) -> List[int]:
    """
    Returns all match IDs from IPL seasons where the match was played at the given venue.
    Supports partial/loose venue name matching (e.g., 'Eden' will match 'Eden Gardens').
    """
    match_ids = []

    headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '661c6b89-b558-41fa-9553-d0aca64fcb6f'
    }

    # Normalize target venue
    target_venue_clean = re.sub(r'\s+', ' ', target_venue.strip()).lower()

    for season, series_id in IPL_IDs_dict.items():
        try:
            url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/series/{series_id}"
            response = requests.get(url, headers=headers)
            if response.status_code != 200:
                print(f"Failed to fetch series {season}: {response.status_code}")
                continue

            data = response.json()
            match_details = data.get("matchDetails", [])

            for match_day in match_details:
                for match in match_day.get("matchDetailsMap", {}).get("match", []):
                    match_info = match.get("matchInfo", {})
                    venue_info = match_info.get("venueInfo", {})
                    venue_name = venue_info.get("ground", "")

                    # Clean and normalize venue name
                    venue_name_clean = re.sub(r'\s+', ' ', venue_name.strip()).lower()

                    # Check if input venue (even partial) is in the match venue
                    if target_venue_clean in venue_name_clean:
                        match_ids.append(match_info.get("matchId"))

            match_ids_recent = match_ids[:5]
            sghfgrj = get_selected_player_stats({"player_ids": player_ids, "match_ids": match_ids})

        except Exception as e:
            print(f"Error processing series {season}: {e}")

    

In [165]:
print(get_match_ids_by_venue.invoke({"IPL_IDs_dict": IPL_IDs_dict, "target_venue": "chinnaswamy"}))

Failed to fetch series Indian Premier League 2014: 400
Failed to fetch series Indian Premier League 2015: 400
Failed to fetch series Indian Premier League 2016: 400
Failed to fetch series Indian Premier League 2017: 400
Failed to fetch series Indian Premier League 2018: 400
Failed to fetch series Indian Premier League 2019: 400
Failed to fetch series Indian Premier League 2020: 400
Failed to fetch series Indian Premier League 2021: 400
Failed to fetch series Indian Premier League 2022: 400
Failed to fetch series Indian Premier League 2023: 400
Failed to fetch series Indian Premier League 2024: 400
Failed to fetch series Indian Premier League 2025: 400
[10557, 10546, 10544, 10537, 10532, 10516, 10503, 10668, 10671, 10625, 10627, 10647, 10650, 10659, 9906, 9917, 9933, 9945, 9948, 9956, 9967, 11185, 11190, 11199, 11202, 11215, 11225, 11231, 11242, 11254, 11867, 11916, 11876, 11885, 11891, 11895, 11913, 11936]


In [6]:
def get_espn_player_id(player_name: str) -> Optional[int]:
    query = f"{player_name} site:espncricinfo.com/cricketers"
    url = f"https://www.bing.com/search?q={query.replace(' ', '+')}"
    headers={"User-Agent": "Mozilla/5.0"}
    response = requests.get(url, headers=headers)
    
    # Look for ESPNcricinfo player URL
    match = re.search(r"espncricinfo\.com/cricketers/[^/]+-(\d+)", response.text)
    if match:
        player_id = match.group(1)
        return player_id
    return None

def resolve_to_id(name: str, mapping: Dict[str, int]) -> Optional[int]:
    """
    Fuzzy-match the provided name against mapping keys.
    Returns the mapped ID if a close match is found, else None.
    """
    if not name:
        return None
    # Exact case-insensitive match
    for key, val in mapping.items():
        if key.lower() == name.lower():
            return val
    # Substring match
    for key, val in mapping.items():
        if name.lower() in key.lower():
            return val
    # Fuzzy close match
    keys = list(mapping.keys())
    matches = difflib.get_close_matches(name, keys, n=1, cutoff=0.6)
    if matches:
        return mapping[matches[0]]
    return None

In [7]:
print(get_espn_player_id("jasprit bumrah"))

625383


In [8]:
def get_espn_stats(player_id: int, opposition_id: int, venue_id: int, data_type: str) -> Dict:
    """
    Fetch and aggregate stats over the last n matches for a player.
    - batting: total_runs, total_balls_faced, total_4s, total_6s, average, strike_rate
    - bowling: total_overs, total_maidens, total_runs_conceded, total_wkts, average, economy
    - allround: includes both batting and bowling aggregates

    Returns a dict with keys depending on data_type.
    """
    n = 5  # number of recent matches

    def scrape_table(datatype):
        # Build URL + params
        url = f"https://stats.espncricinfo.com/ci/engine/player/{player_id}.html"
        params = {
            'class': 6,
            'template': 'results',
            'type': datatype,
            'view': 'match'
        }
        if opposition_id is not None:
            params['opposition'] = opposition_id
        elif venue_id is not None:
            params['ground'] = venue_id

        r = requests.get(url, params=params, headers={"User-Agent": "Mozilla/5.0"})
        r.raise_for_status()
        soup = BeautifulSoup(r.text, 'html.parser')

        # find the Match by match list table
        tbl = None
        for t in soup.find_all('table', class_='engineTable'):
            cap = t.find('caption')
            if cap and 'Match by match list' in cap.text:
                tbl = t
                break
        if not tbl:
            raise RuntimeError("Match-by-match table not found")

        # parse headers
        headers = [th.get_text(strip=True) for th in tbl.find('tr').find_all('th')]
        rows = tbl.find_all('tr')[1:]
        last = rows[-n:]
        return headers, last

    def aggregate_batting(headers, rows):
        # find indices
        i_run = headers.index('Bat1')
        i_bf  = headers.index('BF')
        i_4s  = headers.index('4s')
        i_6s  = headers.index('6s')

        total_runs = total_balls = total_4s = total_6s = 0
        inns = not_outs = 0

        for r in rows:
            tds = r.find_all('td')

            raw_html = str(tds[i_run])               # e.g. '<td>82*</td>'
            m = re.search(r'>(\d+\*)<', raw_html)
            if m:
                run_txt = m.group(1)                 # '82*'
                not_out = True
                run_txt = run_txt[:-1].strip()       # '82'
            else:
                run_txt = tds[i_run].get_text(strip=True)
                not_out = run_txt.endswith('*')
                if not_out:
                    run_txt = run_txt[:-1].strip()
            # --------------------------------------------------

            if not run_txt.isdigit():
                continue

            runs = int(run_txt)
            to_int = lambda idx: int(tds[idx].get_text(strip=True) or 0) if tds[idx].get_text(strip=True).isdigit() else 0
            balls = to_int(i_bf)
            fours = to_int(i_4s)
            sixes = to_int(i_6s)

            inns += 1
            not_outs += 1 if not_out else 0
            total_runs  += runs
            total_balls += balls
            total_4s    += fours
            total_6s    += sixes

        dismissals = max(inns - not_outs, 0)
        avg = (total_runs / dismissals) if dismissals else float(total_runs)
        sr  = (total_runs / total_balls * 100) if total_balls else 0.0

        return {
            'innings': inns,
            'not_outs': not_outs,
            'dismissals': dismissals,
            'total_runs': total_runs,
            'total_balls': total_balls,
            'total_4s': total_4s,
            'total_6s': total_6s,
            'average': round(avg, 2),
            'strike_rate': round(sr, 2)
        }

    def aggregate_bowling(headers, rows):
        # find indices
        i_ov = headers.index('Overs')
        i_md = headers.index('Mdns')
        i_cd = headers.index('Runs')
        i_wk = headers.index('Wkts')

        total_overs = total_maidens = total_conceded = total_wkts = 0.0

        for r in rows:
            tds = r.find_all('td')
            def to_float(idx):
                txt = tds[idx].get_text(strip=True)
                try:
                    return float(txt)
                except:
                    return 0.0

            total_overs    += to_float(i_ov)
            total_maidens  += to_float(i_md)
            total_conceded += to_float(i_cd)
            total_wkts     += to_float(i_wk)

        avg = (total_conceded / total_wkts) if total_wkts else None
        econ = (total_conceded / total_overs) if total_overs else None

        return {
            'innings': len(rows),
            'total_overs': round(total_overs, 1),
            'total_maidens': int(total_maidens),
            'total_conceded': int(total_conceded),
            'total_wickets': int(total_wkts),
            'bowling_average': round(avg, 2) if avg is not None else None,
            'economy_rate': round(econ, 2) if econ is not None else None
        }

    if data_type == 'batting':
        hdrs, rs = scrape_table('batting')
        return aggregate_batting(hdrs, rs)
    elif data_type == 'bowling':
        hdrs, rs = scrape_table('bowling')
        return aggregate_bowling(hdrs, rs)
    elif data_type == 'allround':
        out = {}
        hdrs_bat, rs_bat = scrape_table('batting')
        hdrs_bwl, rs_bwl = scrape_table('bowling')
        out['batting'] = aggregate_batting(hdrs_bat, rs_bat)
        out['bowling'] = aggregate_bowling(hdrs_bwl, rs_bwl)
        return out
    else:
        raise ValueError("data_type must be 'batting', 'bowling', or 'allround'")

In [9]:
def combine_recent_stats(player_name: str, player_role: str,
                         overall_stats: dict, opp_stats: dict, venue_stats: dict, opposition_label: str, venue_label: str):
    n = 5 # number of recent matches
    result = {
        "name": player_name,
        "role": player_role,
        "recent_stats": []
    }

    # 1) Overall
    result["recent_stats"].append({
        "title": f"last_{n}_innings",
        "data": overall_stats
    })

    # 2) Versus opposition
    if opp_stats is not None and opposition_label:
        safe_label = opposition_label.replace(' ', '_')
        result["recent_stats"].append({
            "title": f"last_{n}_innings_vs_{safe_label}",
            "data": opp_stats
        })

    # 3) At venue
    if venue_stats is not None and venue_label:
        # only take first part before comma if present
        base = venue_label.split(',')[0].replace(' ', '_')
        result["recent_stats"].append({
            "title": f"last_{n}_innings_at_{base}",
            "data": venue_stats
        })

    return result

In [10]:
@tool
def recent_stats(player_details: List[Dict[str, str]], venue_name: str):
    """
    For each player in player_details, fetches aggregated stats over the last `n` matches
    in three categories: overall, vs specified opposition, and at specified venue.

    Args:
        player_details: A list of dicts, each with keys:
            - name (str): Player name to lookup
            - role (str): 'batting', 'bowling', or other (treated as 'allround')
            - opposition (str): Opponent team name filter
        venue_name: Name of venue to filter by
        n: Number of recent matches to include in aggregation

    Returns:
        A list of summary dicts, one per player, each with the structure:
        {
          "player name": <int>,
          "role": <str>,
          "recent_stats": [
            {
              "title": "overall_last_<n>",
              "data": { ... overall stats keys ... }
            },
            {
              "title": "vs_<Opposition>_last_<n>",
              "data": { ... vs-opposition stats keys ... }
            },
            {
              "title": "at_<Venue>_last_<n>",
              "data": { ... at-venue stats keys ... }
            }
          ]
        }

    Example Return Value:
    [
      {
        "name": "virat",
        "role": "batsman"
        "recent_stats": [
          {
            "title": "overall_last_5",
            "data": {
              "innings": 5,
              "not_outs": 1,
              "dismissals": 4,
              "total_runs": 321,
              "total_balls": 240,
              "total_4s": 28,
              "total_6s": 9,
              "average": 80.25,
              "strike_rate": 133.75
            }
          },
          {
            "title": "vs_Mumbai_Indians_last_5",
            "data": { ... }
          },
          {
            "title": "at_Wankhede_Stadium_last_5",
            "data": { ... }
          }
        ]
      },
      ...
    ]
    """

    # Static Mapping
    opposition_ids = {
        "Royal Challengers Bengaluru": 4340,
        "Kolkata Knight Riders": 4341,
        "Punjab Kings": 4342,
        "Chennai Super Kings": 4343,
        "Delhi Capitals": 4344,
        "Rajasthan Royals": 4345,
        "Mumbai Indians": 4346,
        "Sunrisers Hyderabad": 5143,
        "Lucknow Super Giants": 6903,
        "Gujrat Titans": 6904
    }

    # Static Mapping
    venue_ids = {
        "Arun Jaitley Stadium, Delhi": 333,
        "Ekana Cricket Stadium, Lucknow": 3355,
        "Eden Gardens, Kolkata":292,
        "Chinnaswamy Stadium, Bengaluru": 683,
        "MA Chidambaram Stadium, Chepauk, Chennai": 291,
        "Narendra Modi Stadium, Ahmedabad": 840,
        "Sawai Mansingh Stadium, Jaipur": 664,
        "Wankhede Stadium, Mumbai": 713,
        "Maharaja Yadavindra Singh International Cricket Stadium, Mullanpur, CHandigarh": 3585,
        "Rajiv Gandhi International stadium, Hyderabad": 1981,
        "Barsapara Cricket Stadium, Guwahati": 2865,
        "Himachal Pradesh Cricket Association Stadium, Dharamshala": 1920,
        "Brabourne Stadium, Mumbai": 393,
        "Barabati Stadium, Cuttack, ": 442,
        "Dr DY Patil Sports Academy, Navi Mumbai": 2361,
        "Dr Y.S. Rajasekhara Reddy ACA-VDCA Cricket Stadium, Visakhapatnam": 1896,
        "Holkar Cricket Stadium, Indore": 1055
    }
    
    results = []

    for detail in player_details:
        name = detail.get("name", "").strip()
        opposition = detail.get("opposition", "").strip()
        role = detail.get("role", "").strip().lower()

        # Retry logic for resolving player ID
        player_id = None
        err = "Unknown error"
        for attempt in range(10):
            try:
                player_id = get_espn_player_id(name)
                if player_id is not None:
                    break  # Success
            except Exception as e:
                err = str(e)
            time.sleep(0.25)

        if player_id is None:
            results.append({"name": name, "error": f"Could not resolve ID after 10 attempts. Last error: {err}"})
            continue

        # Resolve opposition and venue IDs
        opposition_id = resolve_to_id(opposition, opposition_ids)
        venue_id = resolve_to_id(venue_name, venue_ids)

        if role == "batsman":
            data_type = "batting"
        elif role == "bowler":
            data_type = "bowling"
        else:
            data_type = "allround"

        overall_stats_dict = get_espn_stats(player_id = player_id, opposition_id = None, venue_id = None, data_type = data_type)
        opp_stats_dict = get_espn_stats(player_id = player_id, opposition_id = opposition_id, venue_id = None, data_type = data_type)
        
        venue_stats_dict = get_espn_stats(player_id = player_id, opposition_id = None, venue_id = venue_id, data_type = data_type)
    return combine_recent_stats(name, role, overall_stats_dict, opp_stats_dict, venue_stats_dict, opposition, venue_name)



In [11]:
print(recent_stats.invoke({"player_details": [{"name": "hardik pandya", "role": "battingallrounder", "opposition": "kolkata"}], "venue_name": "eden"}))

{'name': 'hardik pandya', 'role': 'battingallrounder', 'recent_stats': [{'title': 'last_5_innings', 'data': {'batting': {'innings': 5, 'not_outs': 1, 'dismissals': 4, 'total_runs': 83, 'total_balls': 54, 'total_4s': 8, 'total_6s': 3, 'average': 20.75, 'strike_rate': 153.7}, 'bowling': {'innings': 5, 'total_overs': 5.0, 'total_maidens': 0, 'total_conceded': 59, 'total_wickets': 1, 'bowling_average': 59.0, 'economy_rate': 11.8}}}, {'title': 'last_5_innings_vs_kolkata', 'data': {'batting': {'innings': 4, 'not_outs': 0, 'dismissals': 4, 'total_runs': 96, 'total_balls': 76, 'total_4s': 6, 'total_6s': 3, 'average': 24.0, 'strike_rate': 126.32}, 'bowling': {'innings': 5, 'total_overs': 12.0, 'total_maidens': 0, 'total_conceded': 120, 'total_wickets': 3, 'bowling_average': 40.0, 'economy_rate': 10.0}}}, {'title': 'last_5_innings_at_eden', 'data': {'batting': {'innings': 5, 'not_outs': 2, 'dismissals': 3, 'total_runs': 162, 'total_balls': 89, 'total_4s': 13, 'total_6s': 10, 'average': 54.0, 'st

In [4]:
# 3 Agents More -> Strategizer, Selevctor, FAQ's solver
# https://allcric.com/blog/a-guide-to-analyzing-pitch-conditions-for-fantasy-cricket-success/

In [None]:
def create_vector_store():
    """
    Build or load FAISS vectorstore for the Fantasy Cricket Guide PDF.
    If the vectorstore directory already exists, skips re-embedding and loading.
    """
    FAISS_DB_PATH = "FAISS_VECTORSTORE"

    # If embeddings already exist, skip rebuilding
    if os.path.isdir(FAISS_DB_PATH) and \
       os.path.exists(os.path.join(FAISS_DB_PATH, "index.faiss")):
        emd_model = OpenAIEmbeddings(model="text-embedding-3-small")
        db = FAISS.load_local(FAISS_DB_PATH, emd_model, allow_dangerous_deserialization = True)
        return db

    # otherwise build the vectorstore
    # step 1: laod the pdf
    loader = PyPDFLoader("FANTASY CRICKET GUIDE.pdf")
    docs = loader.load() # in the document format, where each document has pafe_content and metadate

    # step 2: create chunks 
    text_splitter = RecursiveCharacterTextSplitter(
            chunk_size = 1000,
            chunk_overlap = 100,
        )
    chunks = text_splitter.split_documents(docs)

    # step 3: initilaize the emb model
    emd_model = OpenAIEmbeddings(model = "text-embedding-3-small") # 3072 dimensions, model by openai

    # step 4: make emd fo the chunks and store it in the vectorstore
    db = FAISS.from_documents(chunks, emd_model)
    db.save_local(FAISS_DB_PATH)
    return db

def clean_text(text: str) -> str:
    return re.sub(r'\s+', ' ', text).strip()


In [58]:
@tool 
def fantasy_guide_RAG(query: str) -> Dict[str, Union[str, List[str]]]:
    """ 
    return {
        "query": query,                    # str
        "retrieved_text": List[str]        # list of str
    }
    """
    # 1) Load or build your FAISS store
    db = create_vector_store()  

    # 2) Perform similarity search
    docs = db.similarity_search(query, k = 3) # top 3 docs will be retrieved

    # 3) Extract the content from each Document
    retrieved_texts: List[str] = [clean_text(doc.page_content) for doc in docs]

    # 4) Return structured dict
    return {
        "query": query,
        "retrieved_text": retrieved_texts
    }

In [59]:
print(fantasy_guide_RAG.invoke({"query": "tell me when creating a fantasy team do there is a cap on the numebr of overseas or not"}))

{'query': 'tell me when creating a fantasy team do there is a cap on the numebr of overseas or not', 'retrieved_text': ['any Super Over), normal scoring applies and winners are decided by points. • Is there a maximum cap on the number of overseas players in the team? Yes, when creating a fantasy cricket team, there is typically a cap on the number of overseas players you can include—most platforms like Dream11, MPL, or My11Circle allow a maximum of 4 overseas players in your playing XI. This restriction mirrors real-world rules, such as those in the IPL, where teams can field only four foreign players per match. It’s designed to maintain competitive balance and push users to scout strong domestic talent rather than stacking their team with international stars. So, you’ll need to be strategic—reserve those overseas slots for high-impact players and make sure they’re actually', 'lineup is locked. You should always wait for the playing XI and toss news, as any player who is not in the fin

In [47]:
Fantasy_FAQ_Agent = create_react_agent(
    model = LLM,
    name = "fantasy_expert",
    tools = [search, fantasy_guide_RAG],
    prompt = ""
)