In [199]:
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain_community.tools.tavily_search.tool import TavilySearchResults
from langchain.tools import tool
from collections import defaultdict
from dotenv import load_dotenv
from typing import List, Dict, Optional, Union, Any, Tuple
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from bs4 import BeautifulSoup
from langchain.tools import DuckDuckGoSearchRun
import difflib
import time
import requests
import urllib.parse
import re
import os

In [10]:
load_dotenv()
LLM = ChatOpenAI(model = "gpt-4.1")
search = TavilySearchResults() # will be used to search the web

search_2 = DuckDuckGoSearchRun()

In [11]:
def fetch_stats(url: str, headers: dict) -> dict:
    response = requests.get(url, headers=headers)
    if response.status_code != 200:
       return {}
    return response.json()

In [None]:
# TOOLS
@tool
def match_info():
    """chghmhnfgb"""
    url = "https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/matches/upcoming"
    headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM', # API KEY
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '1943a818-98e9-48ea-8d1c-1554e116ef44'
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise Exception(f"API request failed: {response.status_code}")
    
    data = response.json()
    ipl_match_list = []

    for type_match in data.get("typeMatches", []):
        for series_match in type_match.get("seriesMatches", []):
            series = series_match.get("seriesAdWrapper", {})
            if "Indian Premier League" in series.get("seriesName", ""):
                for match in series.get("matches", []):
                    match_info = match.get("matchInfo", {})
                    match_id = match_info.get("matchId")
                    match_desc = match_info.get("matchDesc")
                    match_status = match_info.get("status")
                    team1 = match_info.get("team1", {}).get("teamName", "Team 1")
                    team2 = match_info.get("team2", {}).get("teamName", "Team 2")
                    venue = match_info.get("venueInfo", {})
                    venue_id = venue.get("id", "Unknown ID")
                    ground = venue.get("ground", "Unknown Ground")
                    city = venue.get("city", "Unknown City")

                    ipl_match_list.append({
                        "Match ID": match_id,
                        "Match Desc": match_desc,
                        "Teams": f"{team1} vs {team2}",
                        "Status": match_status,
                        "Venue ID": venue_id,
                        "Venue": f"{ground}, {city}"
                    })
    
    return ipl_match_list

@tool
def additional_info(match_id: str) -> str: # about pitch, probable players, injuries about ground,
    """zxcv"""
    url = f"https://Cricbuzz-Official-Cricket-API.proxy-production.allthingsdev.co/match/{match_id}/commentary"
    headers = {
        'x-apihub-key': '9HN92wz6l7bberNNuKkhDCXeb4YH4lXo2fIKuVdgCpB82jpHlM',
        'x-apihub-host': 'Cricbuzz-Official-Cricket-API.allthingsdev.co',
        'x-apihub-endpoint': '8cb69a0f-bcaa-45b5-a016-229a2e7594f6'
    }

    response = requests.get(url, headers=headers)
    if response.status_code != 200:
        raise Exception(f"Failed to get commentary: {response.status_code}")

    data = response.json()
    full_text = ""

    # Concatenate all commText entries
    for item in data.get("commentaryList", []):
        full_text += item.get("commText", "") + " "

    # Remove ALL markers like B0$, B1$, B14$ (anywhere in the text)
    cleaned = re.sub(r'\s*B\d+\$', '', full_text)
    
    # Remove escaped newlines and excess spaces
    cleaned = cleaned.replace("\\n", " ")
    cleaned = re.sub(r'\s+', ' ', cleaned).strip()

    return cleaned

In [None]:
# 1. Researcher Agent: gathers match details (teams, date, venue, weather, pitch, odds).
research_agent = create_react_agent(
    model = LLM,
    name = "researcher",
    tools = [search, match_info, additional_info],
    prompt = (
        "You are an agent which is phenomenal at maths. Answer the query of the user to the best you can."
    )
)

In [30]:
inputs = {"messages": [{"role": "user", "content": "what is the output when we add, multiply, divide, and find diff of 2 and 20"}]}
result = research_agent.invoke(inputs)
for r in result['messages']:
    print(r)
print(result["messages"][-1].content)

content='what is the output when we add, multiply, divide, and find diff of 2 and 20' additional_kwargs={} response_metadata={} id='24d471ae-1813-4530-b5a5-b29aaffc4df6'
content='' additional_kwargs={'tool_calls': [{'id': 'call_dZcP5Zhc0ooHxC62rwmKRCf7', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'add_num'}, 'type': 'function'}, {'id': 'call_JYMf0QZGr1BdIJvztkDsmOqP', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'multiply_num'}, 'type': 'function'}, {'id': 'call_w9u1r8iEMUuLWR9Npq1oUO2J', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'div_num'}, 'type': 'function'}, {'id': 'call_RyoPERFckjxjzj7yyiOp4plB', 'function': {'arguments': '{"a": 2, "b": 20}', 'name': 'diff_num'}, 'type': 'function'}], 'refusal': None} response_metadata={'token_usage': {'completion_tokens': 88, 'prompt_tokens': 174, 'total_tokens': 262, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt

In [108]:
# 2 Agents More -> Strategizer, Selevctor
# https://allcric.com/blog/a-guide-to-analyzing-pitch-conditions-for-fantasy-cricket-success/

In [None]:



def _name_variants(full_name: str) -> list:
    """
    Generate possible abbreviated variants for a multi-word name.
    - If two words (First Last): ["First Last", "F Last"]
    - If three words (A B C): ["A B C", "AB C", "A BC"]
    Additional variants can be added as needed.
    """
    parts = full_name.strip().split()
    variants = [full_name.strip()]

    if len(parts) == 2:
        first, last = parts
        variants.append(f"{first[0]} {last}")
    elif len(parts) == 3:
        a, b, c = parts
        variants.append(f"{a[0]}{b[0]} {c}")  # e.g. "AB C"
        variants.append(f"{a[0]} {b} {c}")    # e.g. "A B C" → but already full

    return variants


def fetch_cricmetric_table_via_scrapedo(
    batsman: str, bowler: str
) -> str:
    """
    1. Tries full `batsman` vs `bowler` name pair on CricMetric via Scrape.do.
    2. If no <table> found, tries abbreviated variants (e.g., "F Last", "AB C").
    3. Filters for only the “T20I” and “TWENTY20” tables on the page (using the
       enclosing panel-heading text).
    4. Returns raw HTML consisting of those filtered <table class="table">…</table>
       blocks concatenated, or "" if none matched.
    """
    base_matchup = "https://www.cricmetric.com/matchup.py"

    def attempt_fetch(name_a: str, name_b: str) -> str:
        """Attempt to fetch, then extract and return only T20I/TWENTY20 tables."""
        a_q = name_a.replace(" ", "+")
        b_q = name_b.replace(" ", "+")
        matchup_url = f"{base_matchup}?batsman={a_q}&bowler={b_q}&groupby=match"
        quoted = urllib.parse.quote(matchup_url, safe="")
        token = "c7cda0a41de3446abf92b8b0154c65e7922123609fe"
        scrape_do = f"http://api.scrape.do/?token={token}&url={quoted}&render=true"
        headers = {"User-Agent": "Mozilla/5.0"}
        resp = requests.get(scrape_do, headers = headers)
        resp.raise_for_status()
        soup = BeautifulSoup(resp.text, "html.parser")

        filtered_html = []

        # 1) Find every panel that wraps an entire section (ODI, T20I, etc.)
        for panel in soup.find_all("div", class_="panel panel-default"):
            # 2) Read its heading text
            heading_div = panel.find("div", class_="panel-heading")
            label = heading_div.get_text(strip=True).upper() if heading_div else ""

            # 3) If that heading says "T20I" or "TWENTY20", grab its <table class="table">
            if "T20I" in label or "TWENTY20" in label:
                # Inside this panel, find the first <table class="table">
                tbl = panel.find("table", class_="table")
                if tbl:
                    filtered_html.append(str(tbl))

        # 4) Return all matched tables concatenated (or "" if none)
        return "".join(filtered_html)

    # First try full names
    table_html = attempt_fetch(batsman, bowler)
    if table_html:
        return table_html

    # No table for full names: generate and try variants
    bats_variants = _name_variants(batsman)
    bowl_variants = _name_variants(bowler)

    for bv in bats_variants:
        for ov in bowl_variants:
            if bv == batsman and ov == bowler:
                continue
            table_html = attempt_fetch(bv, ov)
            if table_html:
                return table_html

    # If none worked, return empty
    return ""


def parse_cricmetric_total_row(table_html: str, batsman: str, bowler: str) -> Dict[str, str]:
    """
    Given HTML containing one or more <table class="table"> blocks (already
    filtered to only T20I/TWENTY20 by fetch_cricmetric_table_via_scrapedo), this:
      1) Parses each table’s header row to get column names.
      2) Sums up <tbody> row counts across tables to get total “Innings.”
      3) Extracts each <tfoot><tr> “Total” row from every table, converts numeric cells,
         and aggregates them column‐wise.
      4) Computes combined Strike Rate and Average from aggregated Runs, Balls, Outs.
      5) Returns a dict:
         {
           "Title": "Batsman V/S Bowler",
           "Stats": {
             "Innings": "<total_innings>",
             "Runs": "<sum_of_runs>",
             "Balls": "<sum_of_balls>",
             "Outs": "<sum_of_outs>",
             "Dots": "<sum_of_dots>",
             "4s": "<sum_of_4s>",
             "6s": "<sum_of_6s>",
             "SR": "<computed_SR>",
             "Avg": "<computed_Avg>"
           }
         }
    """
    result: Dict[str, Any] = {}
    result["Title"] = f"{batsman} V/S {bowler}"
    stats: Dict[str, str] = {}

    soup = BeautifulSoup(table_html, "html.parser")
    tables = soup.find_all("table", class_="table")
    if not tables:
        raise RuntimeError("No <table class='table'> blocks found to extract totals.")

    # Extract headers from the first table
    first_header_row = tables[0].find("tr")
    if not first_header_row:
        raise RuntimeError("No <tr> found in first table to extract headers.")
    headers = [th.get_text(strip=True) for th in first_header_row.find_all("th")]

    # Initialize running totals for each numeric column
    running_totals: Dict[str, float] = {col: 0.0 for col in headers[1:]}
    total_innings = 0

    for tbl in tables:
        # Count how many <tr> exist inside <tbody> for this table
        tbody = tbl.find("tbody")
        if not tbody:
            continue
        body_rows = tbody.find_all("tr")
        total_innings += len(body_rows)

        # Extract the <tfoot><tr> from this table
        tfoot = tbl.find("tfoot")
        if not tfoot:
            continue
        total_row = tfoot.find("tr")
        if not total_row:
            continue
        cells = total_row.find_all("td")
        if len(cells) != len(headers):
            raise RuntimeError(
                f"Header count ({len(headers)}) != Total row cell count ({len(cells)})."
            )

        # Sum up this table’s totals into running_totals
        for col_name, cell in zip(headers[1:], cells[1:]):
            text = cell.get_text(strip=True).replace(",", "")
            try:
                val = float(text)
            except ValueError:
                val = 0.0
            running_totals[col_name] += val

    # Now compute combined SR and Avg from aggregated Runs, Balls, Outs
    total_runs = running_totals.get("Runs", 0.0)
    total_balls = running_totals.get("Balls", 0.0)
    total_outs = running_totals.get("Outs", 0.0)

    combined_sr = 0.0
    if total_balls > 0:
        combined_sr = (total_runs / total_balls) * 100.0

    combined_avg = 0.0
    if total_outs > 0:
        combined_avg = total_runs / total_outs

    # Populate stats dict
    stats["Innings"] = str(total_innings - 1)
    stats["Runs"] = str(int(running_totals.get("Runs", 0.0)))
    stats["Balls"] = str(int(running_totals.get("Balls", 0.0)))
    stats["Outs"] = str(int(running_totals.get("Outs", 0.0)))
    stats["Dots"] = str(int(running_totals.get("Dots", 0.0)))
    stats["4s"] = str(int(running_totals.get("4s", 0.0)))
    stats["6s"] = str(int(running_totals.get("6s", 0.0)))
    stats["SR"] = f"{combined_sr:.1f}"
    stats["Avg"] = f"{combined_avg:.1f}"

    result["Stats"] = stats
    return result


def players_faceoff(
    batsman: str, bowler: str
) -> Dict[str, str]:
    """
    High-level helper that:
      1) Calls `fetch_cricmetric_table_via_scrapedo(...)` to retrieve the <table> HTML.
      2) If no table HTML is returned, returns an empty dict.
      3) Otherwise calls `parse_cricmetric_total_row(...)` to extract the “Total” row + match count.
      4) Returns the combined dict.
    """
    table_html = fetch_cricmetric_table_via_scrapedo(batsman, bowler)
    if not table_html:
        return {}
    return parse_cricmetric_total_row(table_html, batsman, bowler)


In [196]:
print(players_faceoff("Ayush mhatre", "jasprit bumrah"))


{}


In [None]:
@tool
def head_2_head(team_A: Dict[str, Union[str, List[Dict[str, str]]]], team_B: Dict[str, Union[str, List]]) -> Dict[str, List[Dict[str, Any]]]:
    """
    Computes head-to-head (faceoff) stats between batsmen/allrounders of one team
    and bowlers/allrounders of the other team, in both directions.

    Args:
        team1 (Dict): {
            "teamName": str,
            "Players": [
                {"name": str, "role": str},
                ...
            ]
        }
        team2 (Dict): same format as team1

    Returns:
        Dict[str, List[Dict[str, Any]]]:
          {

            "batsman": str,
            "bowler": str,
            "stats": { …output of faceoff_between_players… }
                },
                …
            ],
            "<team2Name>_vs_<team1Name>": [ …similar list… ]
          }
    """
    def categorize_players(players: List[Dict[str, str]]):
        """
        Splits a list of player dicts into batting‐side and bowling‐side lists.
        Any role containing 'batsman' or 'allround' goes into batting_side.
        Any role containing 'bowler' or 'allround' goes into bowling_side.
        """
        batting_side = []
        bowling_side = []
        for p in players:
            role = p["role"].lower()
            name = p["name"]
            if "batsman" in role or "allround" in role:
                batting_side.append(name)
            if "bowler" in role or "allround" in role:
                bowling_side.append(name)
        return batting_side, bowling_side

    # Extract team names
    t1_name = team_A.get("teamName", "Team1")
    t2_name = team_B.get("teamName", "Team2")

    # Categorize each team's players
    tA_bats, tA_bowl = categorize_players(team_A.get("Players", []))
    tB_bats, tB_bowl = categorize_players(team_B.get("Players", []))

    results = []
    # For each batsman/allrounder in team_A vs each bowler/allrounder in team_B
    for bats in tA_bats:
        for bowl in tB_bowl:
            stats = players_faceoff(bats, bowl)
            results.append({
                "batsman": bats,
                "bowler": bowl,
                "stats": stats
            })

    # For each batsman/allrounder in team2 vs each bowler/allrounder in team1
    for bats in tB_bats:
        for bowl in tA_bowl:
            stats = players_faceoff(bats, bowl)
            results.append({
                "batsman": bats,
                "bowler": bowl,
                "data": stats
            })

    
    return results

In [112]:
print(head_2_head.invoke({
    "team_A": {
    "teamName": "Mumbai Indians",
    "Players": [
        {"name": "Rohit Sharma", "role": "batsman"},
        {"name": "Virat Kohli", "role": "batsman"},
    ]
},

"team_B": {
    "teamName": "Kolkata Knight Riders",
    "Players": [
        {"name": "Jasprit Bumrah", "role": "bowler"},
        {"name": "tim southee", "role": "bowler"},
    ]
}}))


[{'batsman': 'Rohit Sharma', 'bowler': 'Jasprit Bumrah', 'stats': {}}, {'batsman': 'Rohit Sharma', 'bowler': 'tim southee', 'stats': {'Title': 'Rohit Sharma V/S tim southee', 'Stats': {'Innings': '5', 'Runs': '51', 'Balls': '130', 'Outs': '4', 'Dots': '106', '4s': '6', '6s': '0', 'SR': '39.2', 'Avg': '12.8'}}}, {'batsman': 'Virat Kohli', 'bowler': 'Jasprit Bumrah', 'stats': {'Title': 'Virat Kohli V/S Jasprit Bumrah', 'Stats': {'Innings': '17', 'Runs': '150', 'Balls': '101', 'Outs': '5', 'Dots': '37', '4s': '15', '6s': '6', 'SR': '148.5', 'Avg': '30.0'}}}, {'batsman': 'Virat Kohli', 'bowler': 'tim southee', 'stats': {'Title': 'Virat Kohli V/S tim southee', 'Stats': {'Innings': '8', 'Runs': '133', 'Balls': '276', 'Outs': '3', 'Dots': '218', '4s': '16', '6s': '1', 'SR': '48.2', 'Avg': '44.3'}}}]


In [114]:
def compute_faceoff_score(faceoff: dict) -> dict:
    title = faceoff.get('Title', 'Unknown')
    stats = faceoff.get('Stats', {})

    # Convert all to numeric safely
    def to_float(val):
        try:
            return float(val)
        except:
            return None

    min_inns = 5
    min_balls = 20
    inns = to_float(stats.get('Innings', 0))
    runs = to_float(stats.get('Runs', 0))
    balls = to_float(stats.get('Balls', 0))
    outs = to_float(stats.get('Outs', 0))
    dots = to_float(stats.get('Dots', 0))
    fours = to_float(stats.get('4s', 0))
    sixes = to_float(stats.get('6s', 0))
    sr = to_float(stats.get('SR', 0))
    avg = to_float(stats.get('Avg')) if stats.get('Avg') not in [None, '-', ''] else None

    if min_inns > inns and min_balls > balls:
        return {
        'Title': title,
        'Stats': stats,
        'advantage_score': None,
        'reason': "Insufficient stats to decide the face-off"
    }

    # Define weights
    batter_weights = {
        'sr': 0.5,
        'avg': 0.4,
        'boundaries': 0.1
    }

    bowler_weights = {
        'dots': 0.2,
        'outs': 0.8,
    }

    # Compute boundary impact
    boundaries = fours + sixes

    # Normalize SR
    sr_score = min(1, sr/200) # taking strike_rate >= 200 excellent

    # Normalize Avg (avg >/ 50 shows pure dominance)
    avg_score = min(1, avg / 50) if avg is not None else min(1, runs/50) # if not out then, it is complete dom of batsman over bowler

    # Normalize Dots
    dot_score = ((dots / balls))

    # Normalize Outs (more outs = bowler dominance)
    out_score = (outs / inns)

    # Normalize Boundaries (higher is better for batsman)
    boundary_score = ((boundaries / balls))

    if sr <= 100: # bowler has done some reat work, must be rewarded
        sr_score = max(sr_score - 0.5, 0) # ex if sr = 90, then there will be penalty and it will be treated as 40

    """if out_score >= 0.5:
        out_score = 1
    elif out_score >= 0.3 and out_score < 0.5:
        out_score += 0.3"""

    # Weighted Score
    batter_score = (
        sr_score * batter_weights['sr'] +
        avg_score * batter_weights['avg'] +
        boundary_score * batter_weights['boundaries']
    )

    bowler_score = (
        dot_score * bowler_weights['dots'] +
        out_score * bowler_weights['outs']
    )

    raw_score = batter_score - bowler_score # between -1 to 1

    # Add to dict
    output = {
        'Title': title,
        'Stats': stats,
        'advantage_score': round(raw_score, 2)  # Batter-positive, Bowler-negative
    }

    return output


In [118]:
example = {'Title': 'Virat Kohli V/S Jasprit Bumrah', 'Stats': {'Innings': '17', 'Runs': '150', 'Balls': '101', 'Outs': '9', 'Dots': '37', '4s': '15', '6s': '6', 'SR': '148.5', 'Avg': '30.0'}}

result = compute_faceoff_score(example)
print(result)


{'Title': 'Virat Kohli V/S Jasprit Bumrah', 'Stats': {'Innings': '17', 'Runs': '150', 'Balls': '101', 'Outs': '9', 'Dots': '37', '4s': '15', '6s': '6', 'SR': '148.5', 'Avg': '30.0'}, 'advantage_score': 0.14}


In [175]:
print(compute_score({'Batting': {'Matches': 55, 'Innings': 41, 'Runs': 518, 'Balls': 430, 'Outs': 31, '4s': 31, '6s': 18, '50s': 0, '100s': 0, 'SR': 120.46, 'Avg': 16.7}, 'Bowling': {'Matches': 55, 'innings': 50, 'Overs': 144.5, 'Maidens': 1, 'Runs': 1018, 'Wkts': 37, 'Eco': 7.02, 'Avg': 27.51, 'SR': 23.49}} , "bowling allrounder"))

0.5465304862379438


In [None]:
 # to do -> captain, vice-captain, batting position or bowling time, minInn cond for recent_stats, opp, ground
# complete name 
# use a separate tool just for role


In [210]:
SCRAPER_TOKEN = "c7cda0a41de3446abf92b8b0154c65e7922123609fe"
HEADERS = {"User-Agent": "Mozilla/5.0"}

def get_player_pace_spin_stats(player_name: str, role: str) -> Dict[str, Dict[str, Any]]:
    """
    Fetches a batsman’s or bowler’s T20 career stats broken down by opponent bowling type (pace vs. spin),
    using Scrape.do to retrieve the page. 
    - player_name: e.g. "sachin tendulkar"
    - role: either "batsman" or "bowler"
    
    Returns a dict with two keys: "pace" and "spin", each mapping to an aggregated stats dict:
        {
            "pace": {
                "Innings": int,
                "Runs": int,       # for batters: runs scored; for bowlers: runs conceded
                "Balls": int,      # balls faced (bat) or balls bowled (bowl)
                "Outs": int,       # dismissals (bat) or wickets (bowl)
                "4s": int,
                "6s": int,
                "50s": int,
                "100s": int,
                "HS": int,
                "SR": float,
                "Avg": float,
                "Dot %": float
            },
            "spin": { ... same fields ... }
        }
    If the Scrape.do fetch fails or no table is found, returns {"pace": {}, "spin": {}}.
    """
    # Construct the direct CricMetric URL
    raw_url = (
        "https://www.cricmetric.com/playerstats.py?"
        f"player={player_name.replace(' ', '+')}&role={role}"
        "&format=All_T20&groupby=opp_player_type"
    )
    # Wrap with Scrape.do
    quoted = urllib.parse.quote(raw_url, safe="")
    scrape_url = f"http://api.scrape.do/?token={SCRAPER_TOKEN}&url={quoted}&render=true"

    resp = requests.get(scrape_url, headers=HEADERS)
    resp.raise_for_status()
    soup = BeautifulSoup(resp.text, "html.parser")

    # Locate the single <table class="table scoretable">
    table = soup.find("table", class_="table scoretable")
    if not table:
        return {"pace": {}, "spin": {}}

    # Extract headers to map column indices
    header_cells = [th.get_text(strip=True) for th in table.find("thead").find_all("th")]
    col_index = {name: idx for idx, name in enumerate(header_cells)}

    def parse_int(text: str) -> int:
        try:
            return int(text.replace(",", "").strip())
        except:
            return 0

    def parse_float(text: str) -> float:
        try:
            return float(text.replace(",", "").strip())
        except:
            return 0.0

    # Initialize accumulators
    def new_accumulator() -> Dict[str, Any]:
        return {
            "Runs": 0,
            "Balls": 0,
            "Outs": 0,
            "4s": 0,
            "6s": 0,
            "50s": 0,
            "100s": 0,
        }

    pace_acc = new_accumulator()
    spin_acc = new_accumulator()

    # Process each row in <tbody>
    for row in table.find("tbody").find_all("tr"):
        cells = [td.get_text(strip=True) for td in row.find_all("td")]
        vs_type = cells[col_index["Versus Player Type"]].lower()

        is_pace = "fast" in vs_type or "medium" in vs_type
        is_spin = any(keyword in vs_type for keyword in ["chinaman", "orthodox", "legbreak", "offbreak"])
        if not (is_pace or is_spin):
            continue

        runs = parse_int(cells[col_index["Runs"]])
        balls = parse_int(cells[col_index["Balls"]])
        outs = parse_int(cells[col_index["Outs"]])
        fifties = parse_int(cells[col_index["50"]]) if "50" in col_index else 0
        hundreds = parse_int(cells[col_index["100"]]) if "100" in col_index else 0
        fours = parse_int(cells[col_index["4s"]])
        sixes = parse_int(cells[col_index["6s"]])

        acc = pace_acc if is_pace else spin_acc
        acc["Runs"] += runs
        acc["Balls"] += balls
        acc["Outs"] += outs
        acc["4s"] += fours
        acc["6s"] += sixes
        acc["50s"] += fifties
        acc["100s"] += hundreds

    # Compute final metrics
    def finalize(acc: Dict[str, Any]) -> Dict[str, Any]:
        total_runs = acc["Runs"]
        total_balls = acc["Balls"]
        total_outs = acc["Outs"]
        sr = (total_runs / total_balls) * 100.0 if total_balls > 0 else 0.0
        avg = (total_runs / total_outs) if total_outs > 0 else 0.0

        return {
            "Runs": total_runs,
            "Balls": total_balls,
            "Outs": total_outs,
            "4s": acc["4s"],
            "6s": acc["6s"],
            "50s": acc["50s"],
            "100s": acc["100s"],
            "SR": round(sr, 2),
            "Avg": round(avg, 2),
        }

    return {
        "pace": finalize(pace_acc),
        "spin": finalize(spin_acc)
    }


In [None]:
def get_complete_player_stats(players_details: List[])-> :
    """"""
    for player in players_details:
        if player["role"].lower() == "bowler":
            player["type stats"] = {
                "pace": {},
                "spin": {}
            }
        else:
            player["type stats"]  = get_player_pace_spin_stats(player["name"], player["role"])



    
    

In [209]:
stats = get_player_pace_spin_stats("rohit sharma", "batsman")
print(stats["pace"])  # Aggregated pace‐bowling stats
print(stats["spin"])  # Aggregated spin‐bowling stats


{'Runs': 7751, 'Balls': 5404, 'Outs': 247, '4s': 777, '6s': 367, '50s': 13, '100s': 0, 'SR': 143.43, 'Avg': 31.38}
{'Runs': 3826, 'Balls': 3169, 'Outs': 115, '4s': 269, '6s': 156, '50s': 0, '100s': 0, 'SR': 120.73, 'Avg': 33.27}


In [None]:
from langchain_openai import ChatOpenAI
from langgraph_supervisor import create_supervisor
from langgraph.prebuilt import create_react_agent
from langchain_community.tools.tavily_search.tool import TavilySearchResults
from langchain.tools import tool
from collections import defaultdict
from dotenv import load_dotenv
from typing import List, Dict, Optional, Union, Any, Tuple
from bs4 import BeautifulSoup
from langchain.tools import DuckDuckGoSearchRun


load_dotenv()
LLM = ChatOpenAI(model = "gpt-4.1")

def compute_score(stats: Dict[str, Dict[str, Union[int, float]]], role: str) -> float:
    """
    Calculate a performance score for a player in batting, bowling, or all‐rounder roles.

    This function combines key sub‐metrics into a single score:
      • For batters:
          - Strike Rate (SR) normalized against a 200 SR benchmark.
          - Batting Average (Avg) normalized against a 50 Avg benchmark.
          - Conversion Rate (number of 50s and 100s divided by innings).
      • For bowlers:
          - Bowling Strike Rate (balls per wicket) normalized against a 12 SR benchmark.
          - Bowling Average (runs per wicket) normalized against a 12 Avg benchmark.
          - Economy Rate (runs per over) normalized against a 6.0 Eco benchmark.

    An all‐rounder’s final score is a weighted combination of batting and bowling scores:
      - Batting all‐rounder: 70% batting score, 30% bowling score
      - Bowling all‐rounder: 30% batting score, 70% bowling score

    Returns:
        A float representing the player’s score. Pure batters and bowlers will return
        only their respective sub‐score, while all‐rounders blend both.
    """

    batting_weight = {
        'bat_sr': 0.35,
        'bat_avg': 0.45,
        'conversion': 0.20,
    }
    bowling_weight = {
        'bowl_sr': 0.30,
        'bowl_avg': 0.30,
        'eco': 0.40,
    }

    bat_score = 0.0
    bowl_score = 0.0

    # Batting calculations
    if "Batting" in stats:
        batting_details = stats["Batting"]
        bat_sr_benchmark = 200.0
        bat_avg_benchmark = 50.0

        bat_sr_score = batting_details["SR"] / bat_sr_benchmark
        bat_avg_score = batting_details["Avg"] / bat_avg_benchmark
        conversion_score = (batting_details["50s"] + batting_details["100s"]) / batting_details["Innings"]

        bat_score = (
            batting_weight["bat_sr"] * bat_sr_score +
            batting_weight["bat_avg"] * bat_avg_score +
            batting_weight["conversion"] * conversion_score
        )

    # Bowling calculations
    if "Bowling" in stats:
        bowling_details = stats["Bowling"]
        bowl_avg_benchmark = 12.0
        bowl_sr_benchmark = 12.0
        bowl_eco_benchmark = 6.0

        bowl_sr_score = bowl_sr_benchmark / bowling_details["SR"]
        bowl_avg_score = bowl_avg_benchmark / bowling_details["Avg"]
        eco_score = bowl_eco_benchmark / bowling_details["Eco"]

        bowl_score = (
            bowling_weight["bowl_sr"] * bowl_sr_score +
            bowling_weight["bowl_avg"] * bowl_avg_score +
            bowling_weight["eco"] * eco_score
        )

    role_lower = role.lower()
    if "batsman" in role_lower:
        return bat_score
    elif "bowler" in role_lower:
        return bowl_score
    else:
        # All‐rounder
        if role_lower == "batting allrounder":
            return 0.7 * bat_score + 0.3 * bowl_score
        else:
            return 0.3 * bat_score + 0.7 * bowl_score


def overall_score(player_stats_dict: Dict[str, Union[str, List[Dict[str, Any]]]]) -> float: # will do for single player
    """"""
    role = player_stats_dict["role"].lower()

    weight = {
        "recent": 0.3,
        "vs_opp": 0.15,
        "at_venue": 0.15,
        "head_2_head": 0.25,
        "pitch": 0.15
    }

    recent_dict = (player_stats_dict["stats"][0])["data"]
    vs_opp_dict = (player_stats_dict["stats"][1])["data"]
    at_venue_dict = (player_stats_dict["stats"][2])["data"]

    recent_score = compute_score(recent_dict, role)
    vs_opp_score = compute_score(vs_opp_dict, role)
    at_venue_score = compute_score(at_venue_dict, role)

    head_2_head_score = 0
    head_2_head_list = player_stats_dict["head_2_head_stats"]
    for dict in head_2_head_list:
        head_2_head_score += dict["advantage_score"]
    
    head_2_head_score  = head_2_head_score / len(head_2_head_list)

    pitch_score  = player_stats_dict["pitch_stats"]

    return (
        weight["recent"] * recent_score + 
        weight["vs_opp"] * vs_opp_score + 
        weight["at_venue"] * at_venue_score + 
        weight["head_2_head"] * head_2_head_score +
        weight["pitch"] * pitch_score
    )

@tool
def select_players(players_overall_details: 
    List[Dict[str, Union[str, List[Dict[str, Any]]]]]) -> Tuple[List[Dict[str, Union[str, List[Dict[str, Any]]]]], List[Dict[str, Any]]]:
    """xxsadvf"""

    result = []
    for player in players_overall_details:
        player["overall_score"] = overall_score(player)
        result.append({
            "name": player["name"],
            "role": player["role"],
            "overall_score": player["overall_score"]
        })
    return players_overall_details, result

# Player Selector agent
player_selector = create_react_agent(
    model = LLM,
    name = "selector",
    tools = [select_players],
    prompt = (
        "You are an agent which is phenomenal at maths. Answer the query of the user to the best you can."
    )
)