In [52]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier


# Calculating batsmen features

In [53]:
import pandas as pd
import numpy as numpy
from sklearn.model_selection import train_test_split

In [54]:
batsmen=pd.read_csv('dataset/batsman_level_scorecard.csv')

In [55]:
batsmen.head()

Unnamed: 0,match id,batsman,batsman_id,batsman_details,is_batsman_captain,is_batsman_keeper,inning,runs,balls_faced,over_faced_first,wicket kind,out_by_bowler,out_by_fielder,bowler_id,bowler_details,is_bowler_keeper,is_bowler_captain,strike_rate,Fours,Sixes,match_dt
0,8638034,KD Ce,7907451.0,NZ:Right-hand bat:Right-arm medium-fast:,0.0,0.0,1,7,5,1.1,caught,JS Nm,TM Jn,2486896.0,NZ:Left-hand bat:Right-arm medium-fast:,0.0,0.0,140.0,1.0,,2021-01-01
1,8638034,TL St,4381761.0,NZ:Right-hand bat:None:,0.0,1.0,1,46,46,1.2,caught,R Ra,MG Bl,6718382.0,NZ:Left-hand bat:Slow left-arm orthodox:,0.0,0.0,100.0,4.0,1.0,2021-01-01
2,8638034,HR Cr,4949790.0,NZ:Right-hand bat:Right-arm offbreak:,0.0,0.0,1,9,10,12.1,caught,PF Yd,R Ra,4950294.0,NZ:Right-hand bat:Legbreak googly:,0.0,0.0,90.0,,,2021-01-01
3,8638034,BR Hn,3834305.0,NZ:Right-hand bat:Right-arm medium-fast:,0.0,0.0,1,28,22,13.6,caught,HK Bt,FH An,1585464.0,NZ:Left-hand bat:Right-arm medium-fast:,0.0,0.0,127.27,3.0,,2021-01-01
4,8638034,SC Kn,3776849.0,NZ:Right-hand bat:Right-arm fast-medium:,0.0,0.0,1,18,13,17.2,,,,,,,,138.46,,1.0,2021-01-01


In [56]:
# calculating 2 dictionaries to store {batsman_id: batsman average } and {batsman_id: batsman strike rate}

total_runs = {}  
balls_faced = {}
matches_played = {}

for i in range(len(batsmen)):
    batsman_id = batsmen['batsman_id'][i]
    runs = batsmen['runs'][i]
    balls = batsmen['balls_faced'][i]
    match_id = batsmen['match id'][i]
    if batsman_id in total_runs:
        total_runs[batsman_id] += runs
        balls_faced[batsman_id] += balls
        matches_played[batsman_id] += 1
    else:
        total_runs[batsman_id] = runs
        balls_faced[batsman_id] = balls
        matches_played[batsman_id] = 1
        
batsman_avg = {}
batsman_sr = {}

for batsman_id in total_runs:
    batsman_avg[batsman_id] = total_runs[batsman_id] / matches_played[batsman_id]
    batsman_sr[batsman_id] = total_runs[batsman_id] / balls_faced[batsman_id] * 100
 

In [57]:
batsman_avg

{7907451.0: 24.7,
 4381761.0: 22.661538461538463,
 4949790.0: 13.266666666666667,
 3834305.0: 12.583333333333334,
 3776849.0: 12.11111111111111,
 6718340.0: 6.5,
 31464.0: 3.3333333333333335,
 258649.0: 23.0,
 2653993.0: 34.507462686567166,
 6718326.0: 27.4625,
 6718382.0: 13.045454545454545,
 37351.0: 25.64516129032258,
 46794.0: 32.36842105263158,
 1594319.0: 16.16949152542373,
 7534687.0: 22.65,
 7537067.0: 7.904761904761905,
 5406540.0: 18.193548387096776,
 2231928.0: 23.9873417721519,
 181404.0: 15.530303030303031,
 1506098.0: 28.346153846153847,
 1749075.0: 28.13888888888889,
 36665.0: 20.8,
 2083409.0: 20.681818181818183,
 7869987.0: 24.58490566037736,
 7620283.0: 21.0,
 3063696.0: 22.96551724137931,
 34061.0: 12.555555555555555,
 3200756.0: 11.942307692307692,
 4756982.0: 10.052631578947368,
 7455818.0: 4.142857142857143,
 49496.0: 4.0,
 2076192.0: 32.2020202020202,
 4002340.0: 20.115384615384617,
 7620269.0: 22.333333333333332,
 2535420.0: 12.380952380952381,
 4967738.0: 9.217

# Bowler level features

In [58]:
bowlers=pd.read_csv('dataset/bowler_level_scorecard.csv')

In [59]:
bowlers.head()

Unnamed: 0,match id,bowler,bowler_id,bowler_details,is_bowler_captain,is_bowler_keeper,inning,runs,wicket_count,balls_bowled,economy,maiden,dots,Fours,Sixes,wides,noballs,match_dt
0,8638034,BV Ss,6718396.0,NZ:Right-hand bat:Right-arm medium-fast:,0.0,0.0,1,21,0,12,10.5,0,3.0,1,1,3,1,2021-01-01
1,8638034,HK Bt,1585464.0,NZ:Left-hand bat:Right-arm medium-fast:,0.0,0.0,1,34,1,24,8.5,0,7.0,3,1,2,0,2021-01-01
2,8638034,JS Nm,2486896.0,NZ:Left-hand bat:Right-arm medium-fast:,0.0,0.0,1,23,2,24,5.75,0,12.0,1,1,0,0,2021-01-01
3,8638034,LV vn Bk,3083667.0,NED:Right-hand bat:Right-arm medium-fast:,0.0,0.0,1,31,1,24,7.75,0,9.0,4,0,2,0,2021-01-01
4,8638034,PF Yd,4950294.0,NZ:Right-hand bat:Legbreak googly:,0.0,0.0,1,21,1,24,5.25,0,8.0,0,0,1,0,2021-01-01


In [60]:
# calculating 2 dictionaries to store {bowler_id: bowler strike rate } , {bowler_id: bowler economy} , {bowler_id: bowler extras rate}

total_matches = {}
total_wickets = {}
total_ball_bowled = {}
total_extras = {}
total_economy = {}

for i in range(len(bowlers)):
    bowler_id = bowlers['bowler_id'][i]
    wickets = bowlers['wicket_count'][i]
    balls = bowlers['balls_bowled'][i]
    runs = bowlers['runs'][i]
    extras = bowlers['wides'][i] + bowlers['noballs'][i]
    match_id = bowlers['match id'][i]
    economy=bowlers['economy'][i]
    if bowler_id in total_matches:
        total_matches[bowler_id] += 1
        total_wickets[bowler_id] += wickets
        total_ball_bowled[bowler_id] += balls
        total_extras[bowler_id] += extras
        total_economy[bowler_id] += economy
    else:
        total_matches[bowler_id] = 1
        total_wickets[bowler_id] = wickets
        total_ball_bowled[bowler_id] = balls
        total_extras[bowler_id] = extras
        total_economy[bowler_id] = economy
        
bowler_sr = {}
bowler_economy = {}
bowler_extras = {}

for bowler_id in total_matches:
    bowler_economy[bowler_id] = total_economy[bowler_id] / total_matches[bowler_id]
    bowler_extras[bowler_id] = total_extras[bowler_id] / total_ball_bowled[bowler_id]
    if total_wickets[bowler_id] == 0:
        bowler_sr[bowler_id] = 0
    else:
        bowler_sr[bowler_id] = total_ball_bowled[bowler_id] /total_wickets[bowler_id]
    

In [61]:
bowler_sr

{6718396.0: 14.192307692307692,
 1585464.0: 19.91304347826087,
 2486896.0: 16.27777777777778,
 3083667.0: 15.948275862068966,
 4950294.0: 21.789473684210527,
 6718382.0: 21.333333333333332,
 4950364.0: 22.0,
 3834305.0: 11.357142857142858,
 7543647.0: 16.23076923076923,
 3566240.0: 15.91304347826087,
 3776849.0: 14.5,
 6718340.0: 25.2,
 1612610.0: 29.9,
 5509524.0: 27.5,
 3876613.0: 30.0,
 5788320.0: 16.76923076923077,
 5497274.0: 22.266666666666666,
 8193310.0: 13.170731707317072,
 1594319.0: 21.675324675324674,
 1905847.0: 15.176470588235293,
 7620346.0: 12.0,
 181404.0: 25.25,
 8339701.0: 19.565217391304348,
 7537067.0: 22.5,
 3995991.0: 26.666666666666668,
 4967738.0: 19.64,
 2535420.0: 17.428571428571427,
 5419546.0: 22.333333333333332,
 3890984.0: 13.6,
 6818776.0: 19.543478260869566,
 7353828.0: 15.892857142857142,
 7455818.0: 47.0,
 4756982.0: 13.776119402985074,
 3200756.0: 15.191780821917808,
 34061.0: 22.93103448275862,
 1506077.0: 19.333333333333332,
 49496.0: 18.7741935483

# Combining the features of dataset

In [62]:
match=pd.read_csv('dataset/match_level_scorecard.csv')

In [63]:
match.head()

Unnamed: 0,match id,team1,team2,winner,by,win amount,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,umpire1,umpire2,inning1_runs,inning1_wickets,inning1_balls,inning2_runs,inning2_wickets,inning2_balls,team1_id,team1_roster_ids,team2_id,team2_roster_ids,series_type,winner_id,player_of_the_match_id
0,8638034,Nn Ds,Wn,Wn,wickets,9.0,Wn,field,By Ol,Mount Maunganui,2021-01-01,night match,Sr Sh,2020/21,17681,Dn Mw,TJ Pe,148,6,130,152.0,1.0,97.0,17982,7907451.0:4381761.0:31464.0:258649.0:4949790.0...,18570,2653993.0:6718326.0:6718382.0:2486896.0:228878...,other_domestic,18570,
1,8588005,Me Rs,Sy Tr,Sy Tr,runs,7.0,Sy Tr,field,Ca Ol,Carrara,2021-01-01,night match,Bg Bh Le,2020/21,6663,D Tr,PJ Ge,166,6,106,117.0,2.0,74.0,33942,37351.0:46794.0:5406540.0:2231928.0:181404.0:1...,33963,1506098.0:1749075.0:36665.0:2083409.0:7534652....,other_domestic,33963,1749075.0
2,8587837,Sy Ss,Be Ht,Be Ht,wickets,4.0,Sy Ss,bat,Be Ct Gd,Brisbane,2021-01-02,night match,Bg Bh Le,2020/21,1476,GJ Dn,PJ Ge,165,8,126,171.0,6.0,119.0,33956,7869987.0:7620283.0:2076192.0:4002340.0:306369...,33921,7620269.0:2286437.0:87191.0:5786766.0:3114803....,other_domestic,33921,3890984.0
3,8638041,Nn Ds,Oo,Oo,wickets,2.0,Oo,field,By Ol,Mount Maunganui,2021-01-02,night match,Sr Sh,2020/21,17681,GA St,TJ Pe,153,7,128,156.0,8.0,126.0,17982,7907451.0:4381761.0:31464.0:4949790.0:258649.0...,18360,2319638.0:256080.0:7918280.0:3913447.0:2690498...,other_domestic,18360,
4,8587921,Ht Hs,Me Ss,Ht Hs,runs,21.0,Me Ss,field,Be Ol,Hobart,2021-01-02,day/night match,Bg Bh Le,2020/21,6348,GA Ad,NR Je,164,5,126,143.0,9.0,123.0,33928,4223883.0:2161599.0:1655436.0:5788418.0:319948...,33949,363047.0:2275097.0:3901078.0:2275195.0:4230127...,other_domestic,33928,3125849.0


In [64]:
# # calculating 2 dictionaries to store {team_id: team average }
#  if team1 has won toss and choose bat then inning1 score will be team1 score and inning2 score will be team2 score

total_runs = {}
total_matches={}
for i in range(len(match)):
    
    toss_winner = match['toss winner'][i]
    team2=toss_winner
    
    if toss_winner == match['team2_id'][i]:
        team1 = match['team1_id'][i]
    else:
        team1 = match['team2_id'][i]
    
    if match['toss decision'][i] == 'bat':
        team2_score=match['inning1_runs'][i]
        team1_score=match['inning2_runs'][i]
    
    else:
        team1_score=match['inning1_runs'][i]
        team2_score=match['inning2_runs'][i]
    
    if team1 in total_runs:
        total_runs[team1] += team1_score
        total_matches[team1] += 1
    
    else:
        total_runs[team1] = team1_score
        total_matches[team1] = 1
        
    if team2 in total_runs:
        total_runs[team2] += team2_score
        total_matches[team2] += 1
    
    else:
        total_runs[team2] = team2_score
        total_matches[team2] = 1
        
team_avg = {}

for team_id in total_runs:
    team_avg[team_id] = total_runs[team_id] / total_matches[team_id]
    
    

In [65]:
team_avg

{18570: 149.0,
 'Wn': 167.58333333333334,
 33963: 138.28571428571428,
 'Sy Tr': 148.5,
 33921: 161.41176470588235,
 'Sy Ss': 154.47368421052633,
 18360: 157.3,
 'Oo': 149.69230769230768,
 33949: 154.6315789473684,
 'Me Ss': 128.0,
 17653: 161.83333333333334,
 'Cy': 143.375,
 33956: 160.05,
 33942: 155.8421052631579,
 'Me Rs': 138.16666666666666,
 17982: 141.86666666666667,
 'Nn Ds': 134.125,
 33928: 166.26315789473685,
 'Ht Hs': 157.8421052631579,
 'Ae Ss': 155.15,
 45961: 145.375,
 'Ud': 131.4,
 'Cl Ds': 170.16666666666666,
 32388: 173.0,
 'Ch': 167.33333333333334,
 15497: 123.16666666666667,
 'Pb': 158.1818181818182,
 13131: 150.0,
 'Rs': 133.875,
 13166: 125.42857142857143,
 'Ju  Kr': 118.28571428571429,
 45919: 162.0,
 'Jd': 173.75,
 12718: 141.0,
 'Hd Ia': 147.0,
 13705: 163.0,
 'Ma': 135.46153846153845,
 11374: 127.71428571428571,
 'Bl': 129.16666666666666,
 17583: 157.11111111111111,
 12389: 175.28571428571428,
 'Ga': 150.44444444444446,
 12669: 151.42857142857142,
 'Ha': 149.0,

In [66]:
def winpCrossLastn(team1_id, team2_id, date, n):
    '''
    Function to compute team1's win% against team2 from the current game in their past n encounters.
    
    Input-
    1. team1_id: ID of team1 to calculate win% of.
    2. team2_id: ID of team2 to calculate win% against.
    3: date: match date of the current game for which the feature is to be calculated.
    4. n: look-back window of games for both these teams.
    
    Output- None
    
    Returns- Float value denoting team1's win% against team2 in their past n games against each other.
    '''
    # filter out games where either team1_id is input team1 and team2_id is input team2, or where team2_id is input team1 and team1_id is input team2.
    # Also, match date is less than current games's input date, sort desc by date and get top n rows (games)
    df_rel = match[(match['match_dt']<date)&\
                      (((match['team1_id']==team1_id)&(match['team2_id']==team2_id))|((match['team1_id']==team2_id)&(match['team2_id']==team1_id)))]\
                        .sort_values(by='match_dt', ascending=False).head(n)
    win_count = df_rel[df_rel['winner_id']==team1_id].shape[0] # Counting number of rows (games) where winner is input team1.
    if win_count == 0:
        return 0
    return round(win_count*100/df_rel.shape[0], 2) # return Float denoting team1's win% against team2 in past n games rounded to 2 decimal places.


In [67]:
def win_percentage_last_n_matches(team_id, date,n):
    '''
    Function to calculate a team's win percentage in their last n games.

    '''
    # Filter games where team1 or team2 is the input team_id and the match date is before the input date
    df_rel = match[(match['match_dt'] < date) &
                            ((match['team1_id'] == team_id) | (match['team2_id'] == team_id))]\
                            .sort_values(by='match_dt', ascending=False).head(n)
    
    # Calculate the number of wins
    wins = df_rel[(df_rel['winner_id'] == team_id)].shape[0]
    
    # Calculate the number of matches
    total_matches = df_rel.shape[0]
    
    if total_matches == 0:
        return 0.0  # Avoid division by zero
    
    # Calculate win percentage
    win_percentage = (wins / total_matches) * 100
    return win_percentage


In [69]:
import requests
from datetime import datetime

def fetch_weather_data(latitude, longitude, start_date, end_date):
    """
    Fetches weather data for the given latitude, longitude, start date, and end date.

    Parameters:
        latitude (str): Latitude of the location.
        longitude (str): Longitude of the location.
        start_date (str): Start date in the format 'YYYY-MM-DD'.
        end_date (str): End date in the format 'YYYY-MM-DD'.

    Returns:
        dict: Parsed JSON response containing weather data, or None if the request failed.
    """
    # Construct the API endpoint dynamically to include temperature data
    api_endpoint = f"https://archive-api.open-meteo.com/v1/era5?latitude={latitude}&longitude={longitude}&start_date={start_date}&end_date={end_date}&hourly=relative_humidity_2m,temperature_2m"
    
    # Make the API call
    response = requests.get(api_endpoint)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the JSON response
        data = response.json()
        return data
    else:
        print("Failed to retrieve data")
        return None

def calculate_average_humidity(data):
    """
    Calculates the average humidity from the given weather data.

    Parameters:
        data (dict): Parsed JSON response containing weather data.

    Returns:
        float: Average humidity.
    """
    # Extract the list of humidity values
    humidity_values = data['hourly']['relative_humidity_2m']
    
    # Calculate the sum of the humidity values
    total_humidity = sum(humidity_values)
    
    # Calculate the number of humidity values
    number_of_values = len(humidity_values)
    
    # Calculate the average humidity
    average_humidity = total_humidity / number_of_values
    
    return average_humidity

def calculate_average_temperature(data):
    """
    Calculates the average temperature from the given weather data.

    Parameters:
        data (dict): Parsed JSON response containing weather data.

    Returns:
        float: Average temperature.
    """
    # Extract the list of temperature values
    temperature_values = data['hourly']['temperature_2m']
    
    # Calculate the sum of the temperature values
    total_temperature = sum(temperature_values)
    
    # Calculate the number of temperature values
    number_of_values = len(temperature_values)
    
    # Calculate the average temperature
    average_temperature = total_temperature / number_of_values
    
    return average_temperature


In [70]:
train_data=pd.read_csv('dataset/train_data.csv')

In [71]:
train_data.head(5)

Unnamed: 0,match id,team1,team1_id,team1_roster_ids,team2,team2_id,team2_roster_ids,winner,winner_id,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15
0,9331181,Ba,11283,9373356.0:7857520.0:4232164.0:4566540.0:329940...,Hl Ph,12634,3500958.0:4231751.0:2735081.0:2035102.0:369833...,Hl Ph,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571
1,8797060,Ed,20,2089079.0:6139370.0:2076192.0:62432.0:2083409....,Wt Is,41,4690258.0:4069666.0:4230127.0:1942317.0:161392...,Ed,20,Wt Is,field,Kn Ol Bn Bs,Bridgetown,2022-01-23,day/night match,Ed tr of Wt Is,2021/22,1406,1.285714,1.952381,156.0,50.0,103.5
2,9433269,We,10576,3298427.0:2288789.0:7773338.0:3519011.0:368195...,Ne,8987,4003390.0:1749075.0:1626526.0:4172447.0:551672...,We,10576,Ne,field,Tt Be Nm,Nottingham,2023-06-02,day/night match,Vy Bt,2023,251,0.857143,0.672131,173.266667,0.0,154.333333
3,9587073,Ga An Ws,36084,8127230.0:4690328.0:4069666.0:7960847.0:469018...,Bs Rs,36070,3462080.0:2436405.0:1798705.0:7550857.0:574247...,Ga An Ws,36084,Ga An Ws,bat,Bn La Sm Ta Td,Tarouba,2023-09-10,day match,Cn Pr Le,2023,14300,2.166667,1.97561,164.266667,50.0,144.25
4,9516457,Pb Ks,30407,8127181.0:197658.0:4239038.0:2398346.0:5053082...,Gt Ts,48341,1958683.0:7491224.0:8059029.0:4377610.0:225245...,Gt Ts,48341,Gt Ts,field,Pb Ct An IS Ba Sm Mi Ch,Chandigarh,2023-04-13,night match,In Pr Le,2023,7118,0.818182,1.327869,164.666667,0.0,189.0


In [72]:
type(batsman_avg[7907451.0])

numpy.float64

In [73]:
len(train_data)

948

In [74]:
from geopy.geocoders import Nominatim

# Initialize Nominatim API
geolocator = Nominatim(user_agent="my_geocoder")

# Creating new columns in train_data and initializing with 0
train_data['batsmens_averages_1'] = 0
train_data['batsmens_strike_rates_1'] = 0
train_data['bowlers_strike_rates_1'] = 0
train_data['bowlers_economy_1'] = 0
train_data['bowlers_extras_1'] = 0

train_data['batsmens_averages_2'] = 0
train_data['batsmens_strike_rates_2'] = 0
train_data['bowlers_strike_rates_2'] = 0
train_data['bowlers_economy_2'] = 0
train_data['bowlers_extras_2'] = 0

train_data['team1_winp_team2_last10']=0
train_data['team1_average_score'] = 0
train_data['team2_average_score'] = 0
train_data['win_percentage_team1_last_5'] = 0
train_data['win_percentage_team2_last_5'] = 0
train_data['location_humidity'] = 0
train_data['location_temperature'] = 0

# Filling the values in the new columns
for i in range(len(train_data)):
    # Getting ids of batsmen and bowlers from team roster id separated by :
    team1_players_ids = train_data['team1_roster_ids'][i].split(':')
    team2_players_ids = train_data['team2_roster_ids'][i].split(':')
    # print(i,team1_players_ids,team2_players_ids)
    # For team1 check if id is present in the dictionary and add the values
    for player_id in team1_players_ids:
        player_id = int(float(player_id))
        if player_id in batsman_avg:
            train_data.at[i, 'batsmens_averages_1'] += batsman_avg[player_id]
            train_data.at[i, 'batsmens_strike_rates_1'] += batsman_sr[player_id]
        if player_id in bowler_sr:
            train_data.at[i, 'bowlers_strike_rates_1'] += bowler_sr[player_id]
            train_data.at[i, 'bowlers_economy_1'] += bowler_economy[player_id]
            train_data.at[i, 'bowlers_extras_1'] += bowler_extras[player_id]
            
    # For team2 check if id is present in the dictionary and add the values
    for player_id in team2_players_ids:
        player_id = int(float(player_id))
        if player_id in batsman_avg:
            train_data.at[i, 'batsmens_averages_2'] += batsman_avg[player_id]
            train_data.at[i, 'batsmens_strike_rates_2'] += batsman_sr[player_id]
        if player_id in bowler_sr:
            train_data.at[i, 'bowlers_strike_rates_2'] += bowler_sr[player_id]
            train_data.at[i, 'bowlers_economy_2'] += bowler_economy[player_id]
            train_data.at[i, 'bowlers_extras_2'] += bowler_extras[player_id]
    
    # filling team win% against each other in last 10 games
    team1_id = train_data['team1_id'][i]
    team2_id = train_data['team2_id'][i]
    date = train_data['match_dt'][i]
    
    train_data.at[i, 'team1_winp_team2_last10'] = winpCrossLastn(team1_id, team2_id, date, 10)

            
    # filling team average scores
    team1_id = train_data['team1_id'][i]
    team2_id = train_data['team2_id'][i]
    
    if team1_id in team_avg:
        train_data.at[i, 'team1_average_score'] = team_avg[team1_id]
    else:
        train_data.at[i, 'team1_average_score'] = 0
    if team2_id in team_avg:
        train_data.at[i, 'team2_average_score'] = team_avg[team2_id]
    else:
        train_data.at[i, 'team2_average_score'] = 0
        
    # filling win% of team1 in last 5 games
    
    train_data.at[i, 'win_percentage_team1_last_5'] = win_percentage_last_n_matches(team1_id, date, 5)
    train_data.at[i, 'win_percentage_team2_last_5'] = win_percentage_last_n_matches(team2_id, date, 5)
    
    city_name = train_data['city'][i]
    location = geolocator.geocode(city_name)
    start_date=date
    end_date=date
    latitude = location.latitude
    longitude = location.longitude  
    
    weather_data = fetch_weather_data(latitude, longitude, start_date, end_date)
    # Check if the data is successfully retrieved
    if weather_data:
        # Calculate and print the average humidity
        average_humidity = int(calculate_average_humidity(weather_data))
        train_data.at[i, 'location_humidity'] = average_humidity
        
        # Calculate and print the average temperature
        average_temperature = int(calculate_average_temperature(weather_data))
        train_data.at[i, 'location_temperature'] = average_temperature
        
    else:
        train_data.at[i, 'location_humidity'] = 0   
        train_data.at[i, 'location_temperature'] = 0



  train_data.at[i, 'batsmens_averages_1'] += batsman_avg[player_id]
  train_data.at[i, 'batsmens_strike_rates_1'] += batsman_sr[player_id]
  train_data.at[i, 'bowlers_strike_rates_1'] += bowler_sr[player_id]
  train_data.at[i, 'bowlers_economy_1'] += bowler_economy[player_id]
  train_data.at[i, 'bowlers_extras_1'] += bowler_extras[player_id]
  train_data.at[i, 'batsmens_averages_2'] += batsman_avg[player_id]
  train_data.at[i, 'batsmens_strike_rates_2'] += batsman_sr[player_id]
  train_data.at[i, 'bowlers_strike_rates_2'] += bowler_sr[player_id]
  train_data.at[i, 'bowlers_economy_2'] += bowler_economy[player_id]
  train_data.at[i, 'bowlers_extras_2'] += bowler_extras[player_id]
  train_data.at[i, 'team1_average_score'] = team_avg[team1_id]
  train_data.at[i, 'team2_average_score'] = team_avg[team2_id]
  train_data.at[i, 'location_humidity'] = average_humidity
  train_data.at[i, 'location_temperature'] = average_temperature
  train_data.at[i, 'team1_winp_team2_last10'] = winpCrossLastn

In [75]:
len(train_data)

948

In [76]:
# dropping team1_roster_ids , team2_roster_ids , winner , team1 , team2 ,  columns 
train_data = train_data.drop(['team1_roster_ids', 'team2_roster_ids', 'winner', 'team1', 'team2'], axis=1)

In [77]:
train_data.shape

(948, 35)

In [78]:
# printing entire train data 
pd.set_option('display.max_columns', None)
train_data.head()

Unnamed: 0,match id,team1_id,team2_id,winner_id,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15,batsmens_averages_1,batsmens_strike_rates_1,bowlers_strike_rates_1,bowlers_economy_1,bowlers_extras_1,batsmens_averages_2,batsmens_strike_rates_2,bowlers_strike_rates_2,bowlers_economy_2,bowlers_extras_2,team1_winp_team2_last10,team1_average_score,team2_average_score,win_percentage_team1_last_5,win_percentage_team2_last_5,location_humidity,location_temperature
0,9331181,11283,12634,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571,90.053911,788.625462,159.04473,36.835913,0.149193,44.399321,302.028326,34.285714,15.667738,0.087202,100.0,137.666667,116.5,40.0,60.0,51.916667,23.5875
1,8797060,20,41,20,Wt Is,field,Kn Ol Bn Bs,Bridgetown,2022-01-23,day/night match,Ed tr of Wt Is,2021/22,1406,1.285714,1.952381,156.0,50.0,103.5,174.100571,1377.592279,107.384251,49.592843,0.16451,169.88834,1425.240685,134.616916,61.447239,0.439582,50.0,157.730769,157.791667,40.0,20.0,67.125,25.741667
2,9433269,10576,8987,10576,Ne,field,Tt Be Nm,Nottingham,2023-06-02,day/night match,Vy Bt,2023,251,0.857143,0.672131,173.266667,0.0,154.333333,171.38589,1284.825311,122.901785,52.625669,0.229346,171.362187,1412.343563,144.557938,73.312361,0.223853,0.0,166.944444,149.588235,40.0,60.0,66.083333,12.170833
3,9587073,36084,36070,36084,Ga An Ws,bat,Bn La Sm Ta Td,Tarouba,2023-09-10,day match,Cn Pr Le,2023,14300,2.166667,1.97561,164.266667,50.0,144.25,178.977078,1489.026067,126.950513,66.902475,0.403496,160.359012,1415.130719,156.06389,76.173784,0.575825,50.0,156.3,177.076923,80.0,40.0,84.416667,19.720833
4,9516457,30407,48341,48341,Gt Ts,field,Pb Ct An IS Ba Sm Mi Ch,Chandigarh,2023-04-13,night match,In Pr Le,2023,7118,0.818182,1.327869,164.666667,0.0,189.0,176.26162,1414.951455,135.063532,56.301939,0.263185,170.236359,1140.506596,124.558461,48.194061,0.177738,0.0,172.4,167.916667,80.0,60.0,30.708333,28.454167


In [79]:
train_data.head()

Unnamed: 0,match id,team1_id,team2_id,winner_id,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15,batsmens_averages_1,batsmens_strike_rates_1,bowlers_strike_rates_1,bowlers_economy_1,bowlers_extras_1,batsmens_averages_2,batsmens_strike_rates_2,bowlers_strike_rates_2,bowlers_economy_2,bowlers_extras_2,team1_winp_team2_last10,team1_average_score,team2_average_score,win_percentage_team1_last_5,win_percentage_team2_last_5,location_humidity,location_temperature
0,9331181,11283,12634,12634,Hl Ph,field,Hr Ct Sm Ie,Indore,2022-10-20,day/night match,Sd Mq Ai Ty,2022/23,7398,1.666667,0.672131,139.0,100.0,157.178571,90.053911,788.625462,159.04473,36.835913,0.149193,44.399321,302.028326,34.285714,15.667738,0.087202,100.0,137.666667,116.5,40.0,60.0,51.916667,23.5875
1,8797060,20,41,20,Wt Is,field,Kn Ol Bn Bs,Bridgetown,2022-01-23,day/night match,Ed tr of Wt Is,2021/22,1406,1.285714,1.952381,156.0,50.0,103.5,174.100571,1377.592279,107.384251,49.592843,0.16451,169.88834,1425.240685,134.616916,61.447239,0.439582,50.0,157.730769,157.791667,40.0,20.0,67.125,25.741667
2,9433269,10576,8987,10576,Ne,field,Tt Be Nm,Nottingham,2023-06-02,day/night match,Vy Bt,2023,251,0.857143,0.672131,173.266667,0.0,154.333333,171.38589,1284.825311,122.901785,52.625669,0.229346,171.362187,1412.343563,144.557938,73.312361,0.223853,0.0,166.944444,149.588235,40.0,60.0,66.083333,12.170833
3,9587073,36084,36070,36084,Ga An Ws,bat,Bn La Sm Ta Td,Tarouba,2023-09-10,day match,Cn Pr Le,2023,14300,2.166667,1.97561,164.266667,50.0,144.25,178.977078,1489.026067,126.950513,66.902475,0.403496,160.359012,1415.130719,156.06389,76.173784,0.575825,50.0,156.3,177.076923,80.0,40.0,84.416667,19.720833
4,9516457,30407,48341,48341,Gt Ts,field,Pb Ct An IS Ba Sm Mi Ch,Chandigarh,2023-04-13,night match,In Pr Le,2023,7118,0.818182,1.327869,164.666667,0.0,189.0,176.26162,1414.951455,135.063532,56.301939,0.263185,170.236359,1140.506596,124.558461,48.194061,0.177738,0.0,172.4,167.916667,80.0,60.0,30.708333,28.454167


In [83]:
# wget http://nlp.stanford.edu/data/glove.6B.zip
# unzip glove.6B.zip

In [80]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# List of categorical columns to encode
categorical_columns = ['toss winner', 'toss decision', 'venue', 'match_dt', 'lighting', 'series_name', 'season', 'city']

# Initialize the label encoder
label_encoder = LabelEncoder()

# Apply label encoding to each categorical column
for col in categorical_columns:
    train_data[col] = label_encoder.fit_transform(train_data[col])

# Display the first 5 rows of the dataframe
train_data.head(5)


Unnamed: 0,match id,team1_id,team2_id,winner_id,toss winner,toss decision,venue,city,match_dt,lighting,series_name,season,ground_id,team_count_50runs_last15,team_winp_last5,team1only_avg_runs_last15,team1_winp_team2_last15,ground_avg_runs_last15,batsmens_averages_1,batsmens_strike_rates_1,bowlers_strike_rates_1,bowlers_economy_1,bowlers_extras_1,batsmens_averages_2,batsmens_strike_rates_2,bowlers_strike_rates_2,bowlers_economy_2,bowlers_extras_2,team1_winp_team2_last10,team1_average_score,team2_average_score,win_percentage_team1_last_5,win_percentage_team2_last_5,location_humidity,location_temperature
0,9331181,11283,12634,12634,44,1,44,55,212,1,58,2,7398,1.666667,0.672131,139.0,100.0,157.178571,90.053911,788.625462,159.04473,36.835913,0.149193,44.399321,302.028326,34.285714,15.667738,0.087202,100.0,137.666667,116.5,40.0,60.0,51.916667,23.5875
1,8797060,20,41,20,138,1,53,13,20,1,22,0,1406,1.285714,1.952381,156.0,50.0,103.5,174.100571,1377.592279,107.384251,49.592843,0.16451,169.88834,1425.240685,134.616916,61.447239,0.439582,50.0,157.730769,157.791667,40.0,20.0,67.125,25.741667
2,9433269,10576,8987,10576,81,1,111,83,370,1,79,3,251,0.857143,0.672131,173.266667,0.0,154.333333,171.38589,1284.825311,122.901785,52.625669,0.229346,171.362187,1412.343563,144.557938,73.312361,0.223853,0.0,166.944444,149.588235,40.0,60.0,66.083333,12.170833
3,9587073,36084,36070,36084,34,0,10,105,434,0,18,3,14300,2.166667,1.97561,164.266667,50.0,144.25,178.977078,1489.026067,126.950513,66.902475,0.403496,160.359012,1415.130719,156.06389,76.173784,0.575825,50.0,156.3,177.076923,80.0,40.0,84.416667,19.720833
4,9516457,30407,48341,48341,40,1,75,25,331,2,35,3,7118,0.818182,1.327869,164.666667,0.0,189.0,176.26162,1414.951455,135.063532,56.301939,0.263185,170.236359,1140.506596,124.558461,48.194061,0.177738,0.0,172.4,167.916667,80.0,60.0,30.708333,28.454167


In [81]:
# filling nan values with 0

train_data.fillna(0, inplace=True)

In [82]:
# changing column names of match id to match_id , toss winner to toss_winner , toss decision to toss_decision

train_data['match_id'] = train_data['match id']
train_data['toss_winner'] = train_data['toss winner']
train_data['toss_decision'] = train_data['toss decision']

train_data = train_data.drop(['match id', 'toss winner', 'toss decision'], axis=1)

In [83]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.ensemble import GradientBoostingClassifier
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Load the dataset
data = train_data

# Separate features and target
y = data["winner_id"]
X = data.drop("winner_id", axis=1)

# Encode the target variable
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=101)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)



In [88]:
# GBM
gbm = GradientBoostingClassifier()
gbm.fit(X_train, y_train)
gbm_pred = gbm.predict(X_test)
gbm_accuracy = accuracy_score(y_test, gbm_pred)
gbm_f1 = f1_score(y_test, gbm_pred, average='macro')

KeyboardInterrupt: 

In [43]:
# LightGBM
lgbm = lgb.LGBMClassifier()
lgbm.fit(X_train, y_train)
lgbm_pred = lgbm.predict(X_test)
lgbm_accuracy = accuracy_score(y_test, lgbm_pred)
lgbm_f1 = f1_score(y_test, lgbm_pred, average='macro')

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000977 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4464
[LightGBM] [Info] Number of data points in the train set: 853, number of used features: 27
[LightGBM] [Info] Start training from score -4.040709
[LightGBM] [Info] Start training from score -4.446174
[LightGBM] [Info] Start training from score -4.669318
[LightGBM] [Info] Start training from score -4.446174
[LightGBM] [Info] Start training from score -4.183810
[LightGBM] [Info] Start training from score -3.570706
[LightGBM] [Info] Start training from score -4.350864
[LightGBM] [Info] Start training from score -4.802849
[LightGBM] [Info] Start training from score -4.350864
[LightGBM] [Info] Start training from score -6.748760
[LightGBM] [Info] Start training from score -5.139322
[LightGBM] [Info] Start training from score -5.650147
[

In [29]:
# CatBoost
cat_model = CatBoostClassifier()
cat_model.fit(X_train, y_train)
cat_pred = cat_model.predict(X_test)
cat_accuracy = accuracy_score(y_test, cat_pred)
cat_f1 = f1_score(y_test, cat_pred, average='macro')

Learning rate set to 0.078488
0:	learn: 4.8861626	total: 631ms	remaining: 10m 30s
1:	learn: 4.8477091	total: 1.11s	remaining: 9m 14s
2:	learn: 4.8056110	total: 1.57s	remaining: 8m 41s
3:	learn: 4.7273124	total: 2.02s	remaining: 8m 23s
4:	learn: 4.6812731	total: 2.46s	remaining: 8m 10s
5:	learn: 4.6416458	total: 2.9s	remaining: 7m 59s
6:	learn: 4.5941377	total: 3.33s	remaining: 7m 52s
7:	learn: 4.5390293	total: 3.79s	remaining: 7m 50s
8:	learn: 4.4806930	total: 4.21s	remaining: 7m 43s
9:	learn: 4.4361426	total: 4.62s	remaining: 7m 37s
10:	learn: 4.3902647	total: 5.06s	remaining: 7m 34s
11:	learn: 4.3500483	total: 5.47s	remaining: 7m 30s
12:	learn: 4.3124577	total: 5.9s	remaining: 7m 27s
13:	learn: 4.2629442	total: 6.32s	remaining: 7m 24s
14:	learn: 4.2166724	total: 6.79s	remaining: 7m 25s
15:	learn: 4.1681640	total: 7.29s	remaining: 7m 28s
16:	learn: 4.1253357	total: 7.79s	remaining: 7m 30s
17:	learn: 4.0715496	total: 8.25s	remaining: 7m 30s
18:	learn: 4.0216584	total: 8.7s	remaining: 7

In [95]:
# Create DMatrix
xgb_train = xgb.DMatrix(X_train, label=y_train, enable_categorical=True)
xgb_test = xgb.DMatrix(X_test, label=y_test, enable_categorical=True)

# Define parameters and add num_class
n = 150
params = {
    'objective': 'multi:softprob',
    'max_depth': 9,
    'learning_rate': 0.38,
    'num_class': len(set(y))  # Number of unique classes
}

# Train the model
xgb_model = xgb.train(params=params, dtrain=xgb_train, num_boost_round=n)

# Make predictions
xgb_preds = xgb_model.predict(xgb_test)

# Convert predictions to class labels
xgb_preds = xgb_preds.argmax(axis=1)

# Evaluate the model
xgb_accuracy = accuracy_score(y_test, xgb_preds)
xgb_f1 = f1_score(y_test, xgb_preds, average='macro')

print(f'Accuracy of the model is: {xgb_accuracy*100:.2f}%, F1-Score: {xgb_f1:.2f}')

Accuracy of the model is: 48.42%, F1-Score: 0.34


In [45]:
print(f"GBM Accuracy: {100*gbm_accuracy:.2f}, F1-Score: {gbm_f1:.2f}")
print(f"LightGBM Accuracy: {100*gbm_accuracy:.2f}, F1-Score: {lgbm_f1:.2f}")
print(f"CatBoost Accuracy: {100*cat_accuracy:.2f}, F1-Score: {cat_f1:.2f}")

GBM Accuracy: 35.79, F1-Score: 0.21
LightGBM Accuracy: 35.79, F1-Score: 0.30
CatBoost Accuracy: 40.00, F1-Score: 0.28


In [31]:
# Doing grid search on xg boost 

from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from sklearn.metrics import classification_report

# Initialize the XGBClassifier
xgb_clf = XGBClassifier(objective='multi:softprob', num_class=len(set(y)), use_label_encoder=False)

# Define the parameter grid
param_grid = {
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.3, 0.5],
    'n_estimators': [50, 100, 200],
    'subsample': [0.6, 0.8, 1.0],
    'colsample_bytree': [0.6, 0.8, 1.0]
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=xgb_clf, param_grid=param_grid, scoring='accuracy', cv=3, verbose=1, n_jobs=-1)

# Perform the grid search
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print(f"Best parameters: {best_params}")

# Train the best model
best_xgb = grid_search.best_estimator_

# Make predictions
y_pred = best_xgb.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')
print(f'Accuracy of the model is: {accuracy*100:.2f}%, F1-Score: {f1:.2f}')
print(classification_report(y_test, y_pred))

Fitting 3 folds for each of 324 candidates, totalling 972 fits




ValueError: 
All the 972 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
324 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\sklearn.py", line 1471, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132], got [  0   1   2   3   4   5   6   7   8  10  11  12  13  14  15  16  18  19
  20  21  22  23  24  27  28  29  30  31  32  33  34  35  36  37  38  39
  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57
  58  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76
  77  78  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95
  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
 132 133 134 135 136 137 138]

--------------------------------------------------------------------------------
324 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\sklearn.py", line 1471, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132 133 134], got [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  22  24  25  26  27  28  29  30  31  32  33  34  35  36  37
  38  39  40  41  42  43  44  45  47  48  49  50  51  52  53  54  55  56
  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74
  75  76  77  78  80  81  82  83  84  85  86  87  88  89  90  91  92  93
  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 111
 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
 130 131 132 133 134 135 136 137 138]

--------------------------------------------------------------------------------
324 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\model_selection\_validation.py", line 729, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\core.py", line 730, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
  File "c:\Users\Dell\AppData\Local\Programs\Python\Python311\Lib\site-packages\xgboost\sklearn.py", line 1471, in fit
    raise ValueError(
ValueError: Invalid classes inferred from unique values of `y`.  Expected: [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53
  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71
  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89
  90  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107
 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
 126 127 128 129 130 131 132], got [  0   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35
  36  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  54  55
  57  58  59  60  62  63  64  65  66  67  68  69  70  71  72  73  74  75
  76  77  78  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94
  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 111 112
 113 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131
 132 133 134 135 136 137 138]
