In [8]:

import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
from sklearn.linear_model import LinearRegression


In [9]:
# Parameters
try:
    parameters
except NameError:
    parameters = {}

In [10]:


raw_players = parameters.get("user_players", [])

if raw_players and isinstance(raw_players[0], dict):
    user_players = [(p["name"], p["role"], p["id"]) for p in raw_players]
else:
    user_players = [
        ("Virat Kohli", "BAT", "253802"),
        ("Rohit Sharma", "BAT", "34102"),
        ("Ravindra Jadeja", "ALL", "234675")
    ]

print("🟢 Players passed to notebook:")
for name, role, pid in user_players:
    print(f"- {name} ({role}) ➝ ID: {pid}")

# Convert list of dicts to list of tuples if needed


🟢 Players passed to notebook:
- Virat Kohli (BAT) ➝ ID: 253802
- Rohit Sharma (BAT) ➝ ID: 34102
- Ravindra Jadeja (ALL) ➝ ID: 234675


In [11]:
if isinstance(user_players, list) and user_players and isinstance(user_players[0], dict):
    user_players = [(p["name"], p["role"], p["id"]) for p in user_players]

# Initialize empty player_stats dictionary
player_stats = {}


In [12]:

def fetch_player_stats(player_name, player_type, pid, stats_dict):
    player_id = pid
    if not player_id:
        print(f"Could not find ESPN ID for {player_name}")
        return
    headers = {"User-Agent": "Mozilla/5.0"}
    if player_name not in stats_dict:
        stats_dict[player_name] = {"Batting": None, "Bowling": None, "Fielding": None, "player_type": player_type}

    def scrape_data(url, stat_type):
        response = requests.get(url, headers=headers)
        soup = BeautifulSoup(response.text, "html.parser")
        tables = soup.find_all("table", class_="engineTable")
        if len(tables) >= 4:
            data =  pd.read_html(str(tables[3]))[0]
            return data.head(16).iloc[::-1]
        return None

    if player_type in ["BAT", "ALL", "WK", "BOWL"]:
        batting_url = f"https://stats.espncricinfo.com/ci/engine/player/{player_id}.html?class=6;host=6;orderby=start;orderbyad=reverse;template=results;type=batting;view=innings"
        batting_data = scrape_data(batting_url, "Batting")
        # columns_for_batting = ["4s", "6s", "SR", "Runs","BF"] 
        
        if batting_data is not None:
            stats_dict[player_name]["Batting"] = batting_data

    if player_type in ["BOWL", "ALL"]:
        bowling_url = f"https://stats.espncricinfo.com/ci/engine/player/{player_id}.html?class=6;host=6;orderby=start;orderbyad=reverse;template=results;type=bowling;view=innings"
        bowling_data = scrape_data(bowling_url, "Bowling")
        
        if bowling_data is not None:
            stats_dict[player_name]["Bowling"] = bowling_data

    if player_type in ["BAT", "ALL", "WK", "BOWL"]:
        fielding_url = f"https://stats.espncricinfo.com/ci/engine/player/{player_id}.html?class=6;host=6;orderby=start;orderbyad=reverse;template=results;type=fielding;view=innings"
        fielding_data = scrape_data(fielding_url, "Fielding")
        
        if fielding_data is not None:
            stats_dict[player_name]["Fielding"] = fielding_data


In [13]:
for name, role, pid in user_players:
    fetch_player_stats(name, role, pid, player_stats)

  data =  pd.read_html(str(tables[3]))[0]
  data =  pd.read_html(str(tables[3]))[0]
  data =  pd.read_html(str(tables[3]))[0]
  data =  pd.read_html(str(tables[3]))[0]
  data =  pd.read_html(str(tables[3]))[0]
  data =  pd.read_html(str(tables[3]))[0]
  data =  pd.read_html(str(tables[3]))[0]


In [32]:
# Creating lag 1 series

def create_lagged_features(scores, lag=1):
    X, y = [], []
    for i in range(len(scores) - lag):
        X.append(scores[i:i+lag])
        y.append(scores[i+lag])
    return np.array(X).reshape(-1,1), np.array(y)

# Applying linear regression on the lag 1 series

def train_and_predict(scores):
    if len(scores) < 3:
        return "N/A"  # Return N/A if not enough data

    X, y = create_lagged_features(scores)

    model = LinearRegression()
    model.fit(X, y)
    
    last_5_scores = np.array(np.mean(scores[-5:])).reshape(1, -1)
    next_score = model.predict(last_5_scores)
    
    # Making sure that negative values or not predicted
    if next_score[0] < 0:
        return 0
    return round(next_score[0], 2)  # Rounding off to 2 decimals

columns_to_predict = ["4s", "6s", "SR", "Runs", "Wkts", "Econ", "Ct","St","BF"]

# Initiating a dictionary to store predictions for all players
all_predictions = {}

# Loop through each player in player_stats

for player_name, stats in player_stats.items():
    # Initializing predictions dictionary for this player
    
    player_type = stats.get("player_type", "UNKNOWN")  # Edge case if no player type given then we take it as unknown
    predictions = {col: "N/A" for col in columns_to_predict}  # filling n/a as default values for every column

    # Checking if batting data exists
    if "Batting" in stats and stats["Batting"] is not None:
        df_batting = stats["Batting"]  # DataFrame for batting stats

        for col in ["4s", "6s", "SR", "Runs","BF"]:
            if col in df_batting.columns:
                df_batting[col] = df_batting[col].astype(str).str.replace("*", "", regex=False)  # Removing * from score as if player is not out then the score contains *
                df_batting[col] = df_batting[col].replace(["-", "DNB", "TDNB", "sub"], np.nan)  # Replacing invalid values
    
                if player_type == "BOWL":
                    df_batting[col] = df_batting[col].fillna(0) # Filling na values in batting stats with 0 for bowlers
                    
                 # If player is not blower then we are filling na values in batting stats with mean
                else:
                    df_batting[col] = pd.to_numeric(df_batting[col], errors='coerce')
        
                    mean_value = df_batting[col].mean()
                    
                    if np.isnan(mean_value): # Just to handle corner cases
                        mean_value = 0
                    
                    df_batting[col] = df_batting[col].fillna(mean_value)  # Filling na values with mean
    
                scores = df_batting[col].dropna().astype(float).values
                # Storing all scores as avg of 3 matches so that scores wont deviate more, This is the main idea of our model
                window_size = 3
                scores = np.convolve(scores, np.ones(window_size)/window_size, mode='valid')
                predictions[col] = train_and_predict(scores) # Finding the predicted batting stats 

    # Checking if bowling data exists
    if "Bowling" in stats and stats["Bowling"] is not None:
        df_bowling = stats["Bowling"] # Dataframe for bowling

        for col in ["Wkts", "Econ"]:
            if col in df_bowling.columns:
                df_bowling[col] = df_bowling[col].replace(["TDNB"], np.nan)
                df_bowling[col] = pd.to_numeric(df_bowling[col], errors='coerce')
                mean_value = df_bowling[col].mean()
                    
                if np.isnan(mean_value): # Just to handle corner cases
                    mean_value = 0
                    
                df_bowling[col] = df_bowling[col].fillna(mean_value)

                df_bowling[col] = df_bowling[col].replace(["-","DNB", "sub","absent"], np.nan)  # Replace invalid values
                df_bowling[col] = df_bowling[col].astype(float)  # Convert to float

                df_bowling[col] = df_bowling[col].fillna(0)  # Fill with mean
                
                scores = df_bowling[col].dropna().astype(float).values
                # Storing all scores as avg of 3 matches so that scores wont deviate more, This is the main idea of our model
                window_size = 3
                scores = np.convolve(scores, np.ones(window_size)/window_size, mode='valid')
                predictions[col] = train_and_predict(scores) # Finding the predicted bowling stats

    # Checking if fielding data exists
    if "Fielding" in stats and stats["Fielding"] is not None:
        df_fielding = stats["Fielding"]  # DataFrame for fielding


        for col in ["Ct","St"]:
            if col in df_fielding.columns:
                df_fielding[col] = df_fielding[col].replace(["-", "DNB", "TDNB", "sub","absent"], np.nan)  # Replace invalid values
                df_fielding[col] = df_fielding[col].astype(float)

                df_fielding[col] = df_fielding[col].fillna(0)  # Filling na values with 0
                scores = df_fielding[col].dropna().values
                # Storing all scores as avg of 3 matches so that scores wont deviate more, This is the main idea of our model
                window_size = 3
                scores = np.convolve(scores, np.ones(window_size)/window_size, mode='valid')
                predictions[col] = train_and_predict(scores) # Finding the predicted fielding stats

    all_predictions[player_name] = predictions

# Converting the dictionary to DataFrame
predicted_df = pd.DataFrame.from_dict(all_predictions, orient="index")

# Printing the final DataFrame
print(predicted_df)

# Function to calculate dream11 score

def calculate_dream11_score(df):
    scores = {}

    for player, stats in df.iterrows():
        score = 0

        # Convert columns to numeric values safely
        stats["4s"] = pd.to_numeric(stats["4s"], errors='coerce')
        stats["6s"] = pd.to_numeric(stats["6s"], errors='coerce')
        stats["Runs"] = pd.to_numeric(stats["Runs"], errors='coerce')
        stats["Wkts"] = pd.to_numeric(stats["Wkts"], errors='coerce')
        stats["Econ"] = pd.to_numeric(stats["Econ"], errors='coerce')
        stats["SR"] = pd.to_numeric(stats["SR"], errors='coerce')
        stats["Ct"] = pd.to_numeric(stats["Ct"], errors='coerce')
        stats["St"] = pd.to_numeric(stats["St"], errors='coerce')
        stats["BF"] = pd.to_numeric(stats["BF"], errors='coerce')

        # Score based on runs
        if not pd.isna(stats["Runs"]):
            score += stats["Runs"]  # Runs add directly
            if stats["Runs"] >= 25: score += 4
            if stats["Runs"] >= 50: score += 4
            if stats["Runs"] >= 75: score += 4
            if stats["Runs"] >= 100: score += 4
            if stats["Runs"] == 0: score -= 2
        # Score for 4's
        if not pd.isna(stats["4s"]):
            score += stats["4s"] * 4

        # Score for 6's
        if not pd.isna(stats["6s"]):
            score += stats["6s"] * 6

        # Score for wicket
        if not pd.isna(stats["Wkts"]):
            score += stats["Wkts"] * 31

        # Score for Economy Rate
        if not pd.isna(stats["Econ"]):
            if stats["Econ"] < 5: score += 6
            elif 5 <= stats["Econ"] < 6: score += 4
            elif 6 <= stats["Econ"] < 7: score += 2
            elif 10 <= stats["Econ"] < 11: score -= 2
            elif 11 <= stats["Econ"] < 12: score -= 4
            elif stats["Econ"] >= 12: score -= 6

        # Score for strike Rate
        if not pd.isna(stats["SR"]):
            if stats["SR"] > 170 and stats["BF"] >= 10: score += 6
            elif 150 < stats["SR"] and stats["BF"] >= 10 <= 170: score += 4
            elif 130 < stats["SR"] and stats["BF"] >= 10 <= 150: score += 2
            elif 60 < stats["SR"] and stats["BF"] >= 10 <= 70: score -= 2
            elif 50 < stats["SR"] and stats["BF"] >= 10 <= 60: score -= 4
            elif stats["SR"] < 50 and stats["BF"] >= 10: score -= 6
        # Score for catches
        if not pd.isna(stats["Ct"]):
            score += stats["Ct"] * 8
        # Score for stumps
        if not pd.isna(stats["St"]):
            score += stats["St"] * 12
            
        scores[player] = score

    return pd.DataFrame.from_dict(scores, orient='index', columns=['Dream11 Score'])

# Finding Dream11 scores for the predicted stats
dream11_scores = calculate_dream11_score(predicted_df)
print(dream11_scores)
# Creating main_df from player_details
main_data = []

for name, stats in player_stats.items():
    main_data.append({
        "Player": name,
        "Team": stats.get("team", "Unknown"),
        "PlayerType": stats.get("player_type", "Unknown")
    })

main_df = pd.DataFrame(main_data)


# Normalize names to avoid merge issues
main_df["Player"] = main_df["Player"].str.strip()
dream11_scores.index = dream11_scores.index.str.strip()
predicted_df.index = predicted_df.index.str.strip()

# Merging DataFrames
df = main_df.merge(dream11_scores, left_on="Player", right_index=True, how="inner")
df = df.merge(predicted_df, left_on="Player", right_index=True, how="inner")


# Sorting by Dream11 Score (Highest First)
df = df.sort_values(by="Dream11 Score", ascending=False)

# Initializing the team
final_team = []
selected_roles = {"BAT": 0, "BOWL": 0, "WK": 0, "ALL": 0}
selected_teams = set()
selected_players = set()

# Ensure one player per role
for role in ["BAT", "BOWL", "WK", "ALL"]:
    filtered = df[df["PlayerType"] == role]
    if not filtered.empty:
        player = filtered.iloc[0]
        if player["Player"] not in selected_players:
            final_team.append(player)
            selected_roles[role] += 1
            selected_teams.add(player["Team"])
            selected_players.add(player["Player"])
    else:
        print(f"⚠️ No player found for role {role}. Skipping.")

# Ensure one player per team
for team in df["Team"].unique():
    if team not in selected_teams:
        filtered = df[df["Team"] == team]
        for _, player in filtered.iterrows():
            if player["Player"] not in selected_players:
                final_team.append(player)
                role = player.get("PlayerType", "Unknown")
                if role not in selected_roles:
                    selected_roles[role] = 0
                selected_roles[role] += 1
                selected_teams.add(team)
                selected_players.add(player["Player"])
                break

# Fill remaining players based on score
remaining_players = df[~df["Player"].isin(selected_players)]
for _, player in remaining_players.iterrows():
    if len(final_team) >= 11:
        break
    if player["Player"] not in selected_players:
        final_team.append(player)
        role = player.get("PlayerType", "Unknown")
        if role not in selected_roles:
            selected_roles[role] = 0
        selected_roles[role] += 1
        selected_teams.add(player["Team"])
        selected_players.add(player["Player"])

# Convert to dataframe
final_team_df = pd.DataFrame(final_team)

# Assign C/VC
final_team_df["C/VC"] = "NA"
top2 = final_team_df.nlargest(2, "Dream11 Score")
if not top2.empty:
    final_team_df.loc[top2.index[0], "C/VC"] = "C"
if len(top2) > 1:
    final_team_df.loc[top2.index[1], "C/VC"] = "VC"

# Order and print
final_team_df["C/VC"] = pd.Categorical(final_team_df["C/VC"], categories=["C", "VC", "NA"], ordered=True)
final_team_df = final_team_df.sort_values(by="C/VC")
final_team_df = final_team_df[["Player", "Team", "C/VC"]]

print(final_team_df)
print("Best 11:\n", final_team_df.to_string(index=False))


                   4s    6s      SR   Runs  Wkts   Econ    Ct   St     BF
Virat Kohli      5.51  1.25  147.43  45.58   N/A    N/A  0.19  0.0  30.23
Rohit Sharma     2.98  1.11  135.03  28.10   N/A    N/A  0.11  0.0  19.79
Ravindra Jadeja  2.90  0.85  128.70  27.71  0.75  10.84  0.49  0.0  18.47
                 Dream11 Score
Virat Kohli              82.64
Rohit Sharma             53.56
Ravindra Jadeja          71.58
⚠️ No player found for role BOWL. Skipping.
⚠️ No player found for role WK. Skipping.
            Player     Team C/VC
0      Virat Kohli  Unknown    C
2  Ravindra Jadeja  Unknown   VC
1     Rohit Sharma  Unknown   NA
Best 11:
          Player    Team C/VC
    Virat Kohli Unknown    C
Ravindra Jadeja Unknown   VC
   Rohit Sharma Unknown   NA
