# Cricket Fantasy Team builder and points predicition.

### Importing necessary packages.

In [1]:
import requests
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

### Fetching data from api.

In [2]:
# Below I am fetching the data from the api I found on the fantasy.iplt20.com.
# Its IPL official fantasy team website and is managed by My11Circle.

api_url = "https://fantasy.iplt20.com/daily/api/feed/gamedayplayers?lang=en&gamedayId=44"

# Note:
# Now in terms of this url, the main part of this url is at the end that is gamedayID part.
# This part will change daily as IPL is currently an ongoing tournament(IPL 2025). 
# If one wants to build a team for a match on that day he has to know the gamedayID of that match.
# This id is nothing but the match number(which match it is of the tournament, easily available on google).
# As of 24/04/2025 42 matches has been commenced in the IPL and today i.e 25/04/2025 its the 43rd match(ID). CSK v SRH.
# This is something I want to automate according to the date but yet to do so. This has to be done manually as of now.

# Below part simply hits a get api and I print the data to check the structure so I know what steps I will have to take next.
# The code only runs when the status is 200 (running without error).
response = requests.get(api_url)
if response.status_code == 200:
    data = response.json()
    print(data)
else: 
    print('Error fetching API. Check url or a server error.')

{'Data': {'Value': {'players': [{'id': 4311, 'name': 'Marcus Peter Stoinis', 'short_name': 'Marcus Stoinis', 'team_id': 1107, 'team_name': 'Punjab Kings', 'team_short_name': 'PBKS', 'skill_name': 'ALL-ROUNDER', 'skill_id': 3, 'is_fp': '0', 'is_injured': '0', 'price': 9.0, 'is_active': 1, 'sel_per': 3.46, 'cap_sel_per': 0.03, 'vcap_sel_per': 0.11, 'ov_pts': 111.0, 'gd_pts': 0.0, 'is_announced': 'NP', 'player_desc': 'null', 'profile_url': 'null', 'gamedayId': 44, 'playing_order': 0, 'is_dreamteam': 0}, {'id': 65430, 'name': 'Venkatesh Rajasekaran Iyer', 'short_name': 'Venkatesh Iyer', 'team_id': 1106, 'team_name': 'Kolkata Knight Riders', 'team_short_name': 'KKR', 'skill_name': 'ALL-ROUNDER', 'skill_id': 3, 'is_fp': '0', 'is_injured': '0', 'price': 9.0, 'is_active': 1, 'sel_per': 28.27, 'cap_sel_per': 0.64, 'vcap_sel_per': 0.82, 'ov_pts': 253.0, 'gd_pts': 12.0, 'is_announced': 'P', 'player_desc': 'null', 'profile_url': 'null', 'gamedayId': 44, 'playing_order': 4, 'is_dreamteam': 0}, {'id

### Transforming the data got from the response and saving it to a csv and calling that csv into the environment.

In [3]:
# Here I am according to the structure they have in place I am extracting the player data. It is stored in data in value.
# You can see in the above output how they have structered the data so the code below to extract the same is self explainatory.

if data and 'Data' in data and 'Value' in data['Data'] and 'players' in data['Data']['Value']:
    players = data['Data']['Value']['players']
    df_full = pd.DataFrame(players)
    
    # Filtering the players based on their injury and a filed called as is_active assuming if they are playing or not.
    df_full = df_full[(df_full['is_active'] == 1) & (df_full['is_injured'] == '0')]
    
    # Now taking up only those columns that are required.
    df_players = df_full[['name', 'team_name', 'skill_name', 'price', 'sel_per', 'cap_sel_per', 'vcap_sel_per', 'ov_pts']]
                       

    # Renaming the columns they use for better understanding what it means.
    df_players.columns = ['Player', 'Team', 'Role', 'Credits', 'Selection_Per', 'Captain_Per', 'ViceCaptain_Per',
                          'Season_Points']
        
    # Changing the format of how role was written according to my typing style.
    mapper = {
        'WICKET KEEPER': 'Wicketkeeper',
        'BATSMAN': 'Batsman',
        'BOWLER': 'Bowler',
        'ALL-ROUNDER': 'Allrounder'
    }
    df_players['Role'] = df_players['Role'].map(mapper).fillna(df_players['Role'])
    
    # Saving the data to a csv and then calling it and placing it in a dataframe.
    # Wanted it to save it to csv as I use this csv for more other purposes further so wanted it as a backup.
    
    df_players.to_csv('ipl_fantasy_players.csv', index = False)
    print("Gameday players available to pick for fantasy team")
    print(df_players.head())
else:
    print("No player data found in API response")

Gameday players available to pick for fantasy team
                       Player                   Team        Role  Credits  \
0        Marcus Peter Stoinis           Punjab Kings  Allrounder      9.0   
1  Venkatesh Rajasekaran Iyer  Kolkata Knight Riders  Allrounder      9.0   
2        Andre Dwayne Russell  Kolkata Knight Riders  Allrounder      9.0   
3                Marco Jansen           Punjab Kings  Allrounder      9.0   
4    Varun Chakravarthy Vinod  Kolkata Knight Riders      Bowler      9.0   

   Selection_Per  Captain_Per  ViceCaptain_Per  Season_Points  
0           3.46         0.03             0.11          111.0  
1          28.27         0.64             0.82          253.0  
2          38.41         0.77             1.47          284.0  
3          74.01         1.70             4.25          395.0  
4          74.66         2.02             4.12          377.0  


### Using Optimizing techniques and Linear Programming for selecting best team.

In [4]:
# Importing necessary libraries
from pulp import LpProblem, LpMaximize, LpVariable, lpSum, LpMinimize

In [5]:
df = pd.read_csv('ipl_fantasy_players.csv')

In [6]:
# Here I am creating the linear programming problem and defining it as FantasyTeam for identification purposes.
# Here we are using LpMaximize cause I want the objective function to be maximized in this context Season_Points.
# I want team that has players that have collected maximum points currently and are under the contraints forced by the app.
# Constraints discussed further.
prob = LpProblem("FantasyTeam", LpMaximize)

In [7]:
# This code creates a variable player_vars that contains dictionary of decision variables.
# Iterates over each row in the dataframe and creates a binary decision variable for eacvh player whether they will be in team.
player_vars = {row['Player']: LpVariable(row['Player'], cat = 'Binary') for _, row in df.iterrows()}

In [8]:
# This code does nothing just tells the optimizer to select the combination of players that will get the maximum points
prob += lpSum(row['Season_Points'] * player_vars[row['Player']] for _, row in df.iterrows())

### Constraints 

In [9]:
# 1. We need a team of 11 players so that is the first constraint.
prob += lpSum(player_vars) == 11

# 2. We do not want to surpass the credit limit that is 100.
prob += lpSum(row['Credits'] * player_vars[row['Player']] for _, row in df.iterrows()) <= 100

# 3. Role constraints. Self explainatory, each team only 3 to 6 batters, 3 to 6 bowlers, 1 to 4 alrs and 1 to 4 wkps.
for role, min_count, max_count in [('Batsman', 3, 6), ('Bowler', 3, 6), ('Allrounder', 1, 4), ('Wicketkeeper', 1, 4)]:
    prob += lpSum(player_vars[row['Player']] for _, row in df[df['Role'] == role].iterrows()) >= min_count
    prob += lpSum(player_vars[row['Player']] for _, row in df[df['Role'] == role].iterrows()) <= max_count

# 4. Team constraints. Out of the 2 teams playing we can build our team where only 7 players belong from one team.
for team in df['Team'].unique():
    prob += lpSum(player_vars[row['Player']] for _, row in df[df['Team'] == team].iterrows()) <= 7

### Solver

In [10]:
# Solve
status = prob.solve()

### Getting the team in an organized manner with Captain and Vice Captain options.

In [11]:
if status != 1:  
    print("No optimal solution found.")
else:
    team = []
    total_credits = 0
    for p in player_vars:
        if player_vars[p].varValue == 1:
            player_data = df[df['Player'] == p].iloc[0]
            team.append({
                'Player': p,
                'Team': player_data['Team'],
                'Role': player_data['Role'],
                'Credits': player_data['Credits'],
                'Season_Points': player_data['Season_Points'],
                'Captain_Per': player_data['Captain_Per'],
                'ViceCaptain_Per': player_data['ViceCaptain_Per']
            })
            total_credits += player_data['Credits']
    df_team = pd.DataFrame(team)

    # Selecting captain and vice-captain from selected players
    if not df_team.empty:
        df_team = df_team.sort_values(by = 'Season_Points', ascending = False)
        captain = df_team.iloc[0]['Player']
        vice_captain = df_team.iloc[1]['Player']
       
        # Printing the selected team.
        print("\nSelected Fantasy Team:")
        print(df_team[['Player', 'Team', 'Role', 'Credits', 'Season_Points', 'Captain_Per', 'ViceCaptain_Per']])
        print(f"\nTotal Credits Used: {total_credits}")
        print(f"Captain: {captain}")
        print(f"Vice-Captain: {vice_captain}")
    else:
        print("No players selected. Check optimization constraints.")


Selected Fantasy Team:
                      Player                   Team          Role  Credits  \
10             Priyansh Arya           Punjab Kings       Batsman      7.0   
4        Sunil Philip Narine  Kolkata Knight Riders    Allrounder      9.0   
8          Prabhsimran Singh           Punjab Kings  Wicketkeeper      8.0   
3       Shreyas Santosh Iyer           Punjab Kings       Batsman      9.0   
9    Ajinkya Madhukar Rahane  Kolkata Knight Riders       Batsman      8.0   
0               Marco Jansen           Punjab Kings    Allrounder      9.0   
7       Harshit Pradeep Rana  Kolkata Knight Riders        Bowler      8.0   
1   Varun Chakravarthy Vinod  Kolkata Knight Riders        Bowler      9.0   
5        Vaibhav Gopal Arora  Kolkata Knight Riders        Bowler      8.0   
2             Arshdeep Singh           Punjab Kings        Bowler      9.0   
6              Nehal Wadhera           Punjab Kings       Batsman      8.0   

    Season_Points  Captain_Per  ViceCap

# The above optimizer builds team with season_points as the main objective.
# I thought of giving a team that is riskier as well.
# The logic behind a risky team is simple.
# The players in that team are less selected by other users.
# Weightage to less selection percentage.
# Next in line is season_points.
# That is what I have tried implementing below.

In [12]:
# Creating the problem.
prob = LpProblem("RiskyFantasyTeam", LpMinimize)

player_vars = {row['Player']: LpVariable(row['Player'], cat = 'Binary') for _, row in df.iterrows()}

# Objective here is: Minimize Selection_Per, maximize Season_Points as secondary

max_points = df['Season_Points'].max()

selection_weight = 0.7  # Primary weight for Selection_Per. A 70% weightage to it.
points_weight = 0.3    # Secondary weight for Season_Points. A 30% weightage to it.

# The logic behind the below formula is we want to get high season points and low selection percentage.
# We subtract the two we might get low values that is good as we minimize.
# Higher popularity will get penalized lower popular players will get preference here.
prob += lpSum((selection_weight * row['Selection_Per'] - points_weight * (row['Season_Points'] / max_points) * 100) * 
              player_vars[row['Player']] for _, row in df.iterrows())

### Constraints

In [13]:
# 1. We need a team of 11 players so that is the first constraint.
prob += lpSum(player_vars) == 11

# 2. We do not want to surpass the credit limit that is 100.
prob += lpSum(row['Credits'] * player_vars[row['Player']] for _, row in df.iterrows()) <= 100

# 3. Role constraints. Self explainatory, each team only 3 to 6 batters, 3 to 6 bowlers, 1 to 4 alrs and 1 to 4 wkps.
for role, min_count, max_count in [('Batsman', 3, 6), ('Bowler', 3, 6),('Allrounder', 1, 4), ('Wicketkeeper', 1, 4)]:
    prob += lpSum(player_vars[row['Player']] for _, row in df[df['Role'] == role].iterrows()) >= min_count
    prob += lpSum(player_vars[row['Player']] for _, row in df[df['Role'] == role].iterrows()) <= max_count

# 4. Team constraints. Out of the 2 teams playing we can build our team where only 7 players belong from one team.
for team in df['Team'].unique():
    prob += lpSum(player_vars[row['Player']] for _, row in df[df['Team'] == team].iterrows()) <= 7

### Solver

In [14]:
# Solve
status = prob.solve()

### Getting the team in an organized manner with Captain and Vice Captain options.

In [15]:
if status != 1:  
    print("No optimal solution found.")
else:
    team = []
    total_credits = 0
    for p in player_vars:
        if player_vars[p].varValue == 1:
            player_data = df[df['Player'] == p].iloc[0]
            team.append({
                'Player': p,
                'Team': player_data['Team'],
                'Role': player_data['Role'],
                'Credits': player_data['Credits'],
                'Season_Points': player_data['Season_Points'],
                'Captain_Per': player_data['Captain_Per'],
                'ViceCaptain_Per': player_data['ViceCaptain_Per']
            })
            total_credits += player_data['Credits']
    df_team = pd.DataFrame(team)

    # Selecting captain and vice-captain from selected players
    if not df_team.empty:
        df_team = df_team.sort_values(by = 'Season_Points', ascending = False)
        captain = df_team.iloc[0]['Player']
        vice_captain = df_team.iloc[1]['Player']
       
        # Printing the selected team.
        print("\nSelected Risky Fantasy Team:")
        print(df_team[['Player', 'Team', 'Role', 'Credits', 'Season_Points', 'Captain_Per', 'ViceCaptain_Per']])
        print(f"\nTotal Credits Used: {total_credits}")
        print(f"Captain: {captain}")
        print(f"Vice-Captain: {vice_captain}")
    else:
        print("No players selected. Check optimization constraints.")


Selected Risky Fantasy Team:
                      Player                   Team          Role  Credits  \
6        Vaibhav Gopal Arora  Kolkata Knight Riders        Bowler      8.0   
5             Shashank Singh           Punjab Kings       Batsman      8.0   
7            Quinton de Kock  Kolkata Knight Riders  Wicketkeeper      8.0   
4      Rinku Khanchand Singh  Kolkata Knight Riders       Batsman      8.0   
10  Lachlan Hammond Ferguson           Punjab Kings        Bowler      7.0   
3            Moeen Munir Ali  Kolkata Knight Riders    Allrounder      8.0   
9            Ramandeep Singh  Kolkata Knight Riders       Batsman      7.0   
0       Marcus Peter Stoinis           Punjab Kings    Allrounder      9.0   
1      Xavier Colin Bartlett           Punjab Kings        Bowler      8.0   
2              Harpreet Brar           Punjab Kings        Bowler      8.0   
8      Spencer Henry Johnson  Kolkata Knight Riders        Bowler      7.0   

    Season_Points  Captain_Per  V

# Points Predicition using Regression model

###  - Now to build a regression model to predict points, old data of previous gamedays are required.
###  - Below through the same above code of extracting data I have put that in a function.
###  - Then I have called that function to get previous data and then appended them for train-test purposes.

In [16]:
from datetime import datetime, timedelta

In [17]:
base_url = "https://fantasy.iplt20.com/daily/api/feed/gamedayplayers?lang=en&gamedayId={}"

In [18]:
# Function to fetch data for a single gameday.
def fetch_gameday_data(gameday_id):
    api_url = base_url.format(gameday_id)
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        if 'Data' in data and 'Value' in data['Data'] and 'players' in data['Data']['Value']:
            players = data['Data']['Value']['players']
            df_full = pd.DataFrame(players)
            df_full = df_full[(df_full['is_active'] == 1) & (df_full['is_injured'] == '0')]
            df_players = df_full[['name', 'team_name', 'skill_name', 'price', 'sel_per', 'cap_sel_per', 'vcap_sel_per',
                                  'gd_pts']]
            df_players.columns = ['Player', 'Team', 'Role', 'Credits', 'Selection_Per', 'Captain_Per', 'ViceCaptain_Per',
                                  'GameDay_Points']
            df_players['Gameday_ID'] = gameday_id
            role_mapping = {
                'WICKET KEEPER': 'Wicketkeeper',
                'BATSMAN': 'Batsman',
                'BOWLER': 'Bowler',
                'ALL-ROUNDER': 'Allrounder',
                'BATTER': 'Batsman', 
                'ALL ROUNDER': 'Allrounder'
            }
            df_players['Role'] = df_players['Role'].map(role_mapping).fillna(df_players['Role'])
            return df_players
        else:
            print(f"No player data for gamedayId={gameday_id}")
            return pd.DataFrame()
    else:
        print("Error fetching API. Check url or a server error.")

# Keeping all dataframes together. Each dataframe corresponds players participating in each match.
historical_data = []
for gameday_id in range(1, 42):  # Adjust range based on available matches
    df_players = fetch_gameday_data(gameday_id)
    if not df_players.empty:
        historical_data.append(df_players)

In [19]:
# Combining data here and then saving it as a csv then calling it as a dataframe.
if historical_data:
    df_historical = pd.concat(historical_data, ignore_index = True)
    df_historical.to_csv('ipl_historical_players.csv', index = False)
    print("Historical data saved to ipl_historical_players.csv")
    print(df_historical.head())
else:
    print("No historical data collected. Using simulated data.")

Historical data saved to ipl_historical_players.csv
                       Player                         Team          Role  \
0    Liam Stephen Livingstone  Royal Challengers Bengaluru    Allrounder   
1  Venkatesh Rajasekaran Iyer        Kolkata Knight Riders    Allrounder   
2        Andre Dwayne Russell        Kolkata Knight Riders    Allrounder   
3       Rajat Manohar Patidar  Royal Challengers Bengaluru       Batsman   
4            Philip Dean Salt  Royal Challengers Bengaluru  Wicketkeeper   

   Credits  Selection_Per  Captain_Per  ViceCaptain_Per  GameDay_Points  \
0      9.0          58.52         1.28             2.81            23.0   
1      9.0          67.66         3.18             4.73            11.0   
2      9.0          75.70         4.93             8.28             9.0   
3      9.0          80.05         6.21             5.81            55.0   
4      9.0          86.47        10.20             8.20            87.0   

   Gameday_ID  
0           1  
1       

In [20]:
# Importing necessary libraries.
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Training-Testing and model builiding

In [21]:
# Storing the current matchday players data in df_current.
df_current = pd.read_csv('ipl_fantasy_players.csv')

In [22]:
# Storing previous matchdays data in df_previous.
df_previous = pd.read_csv('ipl_historical_players.csv')

### Feature selection

In [23]:
# Defining features and target variable.
features = ['Selection_Per', 'Captain_Per', 'ViceCaptain_Per', 'Role']
target = 'GameDay_Points'

### Defining X and y for previous data and current data

In [24]:
# Storing features in X and target in y, encoding the Role variable.
X = df_previous[features]
y = df_previous[target]
X = pd.get_dummies(X, columns=['Role'], drop_first = True, prefix = 'Role')

In [25]:
# Also defining the same for current matchday data.
X_current = df_current[features]
X_current = pd.get_dummies(X_current, columns=['Role'], drop_first = True, prefix = 'Role')

### Model building

In [26]:
# Defining model, performing train-test-split and fitting the model.
model = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)

### Checking R² values.

In [27]:
# Checking R² values for training and testing.
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Training R²:", round(train_score, 2))
print(f"Testing R²:", round(test_score, 2))

Training R²: 0.27
Testing R²: 0.29


### Predictions

In [28]:
# Storing predicitons in predicted_points variable.
predicted_points = model.predict(X_current)

In [29]:
# Creating a new column to store predicted points.
df_current['Predicted_Points'] = predicted_points

In [30]:
df_current

Unnamed: 0,Player,Team,Role,Credits,Selection_Per,Captain_Per,ViceCaptain_Per,Season_Points,Predicted_Points
0,Marcus Peter Stoinis,Punjab Kings,Allrounder,9.0,3.46,0.03,0.11,111.0,8.59911
1,Venkatesh Rajasekaran Iyer,Kolkata Knight Riders,Allrounder,9.0,28.27,0.64,0.82,253.0,22.053173
2,Andre Dwayne Russell,Kolkata Knight Riders,Allrounder,9.0,38.41,0.77,1.47,284.0,27.538511
3,Marco Jansen,Punjab Kings,Allrounder,9.0,74.01,1.7,4.25,395.0,46.58654
4,Varun Chakravarthy Vinod,Kolkata Knight Riders,Bowler,9.0,74.66,2.02,4.12,377.0,46.810209
5,Arshdeep Singh,Punjab Kings,Bowler,9.0,78.23,0.96,2.49,353.0,49.327342
6,Shreyas Santosh Iyer,Punjab Kings,Batsman,9.0,93.06,36.19,15.5,442.0,48.853128
7,Sunil Philip Narine,Kolkata Knight Riders,Allrounder,9.0,95.27,23.16,23.38,467.0,49.302096
8,Yuzvendra Singh Chahal,Punjab Kings,Bowler,8.5,54.71,1.55,4.36,287.0,35.874591
9,Pravin Dubey,Punjab Kings,Bowler,8.0,0.02,0.0,0.0,0.0,6.660918


# We can definitely add more data to train the model.
# This will help make better model. We can also train different models as well.