In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.
import matplotlib.pyplot as plt
import seaborn as sns
import gc
import warnings
import xgboost as xgb
warnings.filterwarnings("ignore")


/kaggle/input/pubg-finish-placement-prediction/train_V2.csv
/kaggle/input/pubg-finish-placement-prediction/sample_submission_V2.csv
/kaggle/input/pubg-finish-placement-prediction/test_V2.csv
/kaggle/input/pubgds/Importances.csv


In [2]:

def reduce_mem_usage(df):
    # iterate through all the columns of a dataframe and modify the data type
    #   to reduce memory usage.        
    
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))

    for col in df.columns:
        col_type = df[col].dtype

        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [3]:
def FeatureModification(df):
    
    #2.3.1)Create the new feature, players in the same group:
    groupcounts=df["groupId"].value_counts()
    df["players_in_the_group"]=df.apply(lambda x:groupcounts[x["groupId"]],axis=1)
    del groupcounts
    
    
    #2.3.2)Create the new feature, players in the same match:
    matchcounts=df["matchId"].value_counts()
    df["players_in_the_match"]=df.apply(lambda x:matchcounts[x["matchId"]],axis=1)
    del matchcounts
    
    #---------------------------------------------------------------------------------------------------------------------------
    #Creating some new features:
    df["Total_Distance"]=0.25*df["rideDistance"]+df["walkDistance"]+df["swimDistance"]
    df["WeaponsAcquired_Distance_Ratio"]=df["weaponsAcquired"]/df["Total_Distance"]
    df["WeaponsAcquired_Distance_Ratio"].fillna(value=0,inplace=True)
    df["kills_Distance_Ratio"]=df["kills"]/df["Total_Distance"]
    df["killstreaks_kills_rate"]=df["killStreaks"]/df["kills"]
    df["Headshot_kills_rate"]=df.apply(lambda x: x["headshotKills"]/x["kills"] if x["kills"]!=0 else 0, axis=1)
    df["Headshot_Distance_Ratio"]=df["headshotKills"]/df["Total_Distance"]
    df["Headshot_Distance_Ratio"].fillna(value=0,inplace=True)
    df["vehicleDestroys_Distance_Ratio"]=df["vehicleDestroys"]/df["Total_Distance"]
    df["vehicleDestroys_Distance_Ratio"].fillna(value=0,inplace=True)
    df["vehicleDestroys_driveDistance_Ratio"]=df["vehicleDestroys"]/df["rideDistance"]
    df["vehicleDestroys_driveDistance_Ratio"].fillna(0)
    df['_walkDistancePerDuration'] =df["walkDistance"]/df["matchDuration"]
    df['walkDistancePerc'] = df.groupby('matchId')['walkDistance'].rank(pct=True).values
    df['killPerc'] = df.groupby('matchId')['kills'].rank(pct=True).values
    df['killPlacePerc'] = df.groupby('matchId')['killPlace'].rank(pct=True).values
    df['weaponsAcquired'] = df.groupby('matchId')['weaponsAcquired'].rank(pct=True).values
    df['_walkDistance_kills_Ratio2'] = df['walkDistancePerc'] / df['killPerc']
    df['_kill_kills_Ratio2'] = df['killPerc']/df['walkDistancePerc']
    df['_killPlace_walkDistance_Ratio2'] = df['walkDistancePerc']/df['killPlacePerc']
    df['_killPlace_kills_Ratio2'] = df['killPlacePerc']/df['killPerc']
    df['_totalDistance'] = df.groupby('matchId')['Total_Distance'].rank(pct=True).values
    df['_walkDistance_kills_Ratio3'] = df['walkDistancePerc'] / df['kills']
    df['_walkDistance_kills_Ratio4'] = df['kills'] / df['walkDistancePerc']
    df['_walkDistance_kills_Ratio5'] = df['killPerc'] / df['walkDistance']
    df['_walkDistance_kills_Ratio6'] = df['walkDistance'] / df['killPerc']
    df['players_in_groupPer']=df['players_in_the_group']/df['players_in_the_group']
    df['players_in_groupPerRank']=df.groupby("matchId")["players_in_the_group"].rank(pct=True).values
    
    df[df == np.Inf] = np.NaN
    df[df == np.NINF] = np.NaN
    df.fillna(0, inplace=True)
    
    features=[
 'assists',
 'boosts',
 'damageDealt',
 'DBNOs',
 'headshotKills',
 'heals',
 'killPoints',
 'kills',
 'killStreaks',
 'longestKill',
 'roadKills',
 'vehicleDestroys',
 'walkDistance',
 'weaponsAcquired',
 'winPoints',
 'Total_Distance',
 'WeaponsAcquired_Distance_Ratio',
 'kills_Distance_Ratio',
 'killstreaks_kills_rate',
 'Headshot_kills_rate',
 'Headshot_Distance_Ratio',
 'vehicleDestroys_Distance_Ratio',
 'vehicleDestroys_driveDistance_Ratio',
 '_walkDistancePerDuration',
 'walkDistancePerc',
 '_walkDistance_kills_Ratio2',
 '_kill_kills_Ratio2',
 '_killPlace_walkDistance_Ratio2',
 '_killPlace_kills_Ratio2',
 '_totalDistance',
 '_walkDistance_kills_Ratio3',
 '_walkDistance_kills_Ratio4',
 '_walkDistance_kills_Ratio5',
 '_walkDistance_kills_Ratio6']
    
    df=reduce_mem_usage(df)
    
    
    
    #Creating new features by means, stds, mins, maxs within the same groups:
    x=df.groupby(["matchId","groupId"])[features].agg(["min","max","mean","std"])
    df=df.merge(x,left_on=["matchId","groupId"],right_index=True)
    
    del x
    
    df.rename(lambda y:y[0]+"_"+y[1] if type(y) is tuple else y,axis=1,inplace=True)
    
    df.replace([np.inf, np.NINF,np.nan], 0, inplace=True)
    df=reduce_mem_usage(df)
    
    
    
    feature_means=[feature+"_mean" for feature in features]
    y=df.groupby("matchId")[feature_means].rank(pct=True)
    del feature_means
    y.rename(lambda x:"rankPer_"+x,axis=1,inplace=True)
    y=reduce_mem_usage(y)
    df=pd.concat([df,y],axis=1,sort=False)
    del y
    
    df.drop(["Id","groupId","matchId"],axis=1,inplace=True)
    
    df=pd.concat([df,pd.get_dummies(df["matchType"])],axis=1)
    df=reduce_mem_usage(df)
    
    
    
    
    return df

from sklearn.model_selection import train_test_split

#-------------------------------------------------------------------------------------------------------------------------------
#We fix the total features which we study.

total_features=['assists',
 'boosts',
 'damageDealt',
 'DBNOs',
 'headshotKills',
 'heals',
 'killPlace',
 'killPoints',
 'kills',
 'killStreaks',
 'longestKill',
 'matchDuration',
 'maxPlace',
 'numGroups',
 'rankPoints',
 'revives',
 'rideDistance',
 'roadKills',
 'swimDistance',
 'teamKills',
 'vehicleDestroys',
 'walkDistance',
 'weaponsAcquired',
 'winPoints',
 'players_in_the_group',
 'players_in_the_match',
 'Total_Distance',
 'WeaponsAcquired_Distance_Ratio',
 'kills_Distance_Ratio',
 'killstreaks_kills_rate',
 'Headshot_kills_rate',
 'Headshot_Distance_Ratio',
 'vehicleDestroys_Distance_Ratio',
 'vehicleDestroys_driveDistance_Ratio',
 '_walkDistancePerDuration',
 'walkDistancePerc',
 'killPerc',
 'killPlacePerc',
 '_walkDistance_kills_Ratio2',
 '_kill_kills_Ratio2',
 '_killPlace_walkDistance_Ratio2',
 '_killPlace_kills_Ratio2',
 '_totalDistance',
 '_walkDistance_kills_Ratio3',
 '_walkDistance_kills_Ratio4',
 '_walkDistance_kills_Ratio5',
 '_walkDistance_kills_Ratio6',
 'players_in_groupPer',
 'players_in_groupPerRank',
 'assists_min',
 'assists_max',
 'assists_mean',
 'assists_std',
 'boosts_min',
 'boosts_max',
 'boosts_mean',
 'boosts_std',
 'damageDealt_min',
 'damageDealt_max',
 'damageDealt_mean',
 'damageDealt_std',
 'DBNOs_min',
 'DBNOs_max',
 'DBNOs_mean',
 'DBNOs_std',
 'headshotKills_min',
 'headshotKills_max',
 'headshotKills_mean',
 'headshotKills_std',
 'heals_min',
 'heals_max',
 'heals_mean',
 'heals_std',
 'killPoints_min',
 'killPoints_max',
 'killPoints_mean',
 'killPoints_std',
 'kills_min',
 'kills_max',
 'kills_mean',
 'kills_std',
 'killStreaks_min',
 'killStreaks_max',
 'killStreaks_mean',
 'killStreaks_std',
 'longestKill_min',
 'longestKill_max',
 'longestKill_mean',
 'longestKill_std',
 'roadKills_min',
 'roadKills_max',
 'roadKills_mean',
 'roadKills_std',
 'vehicleDestroys_min',
 'vehicleDestroys_max',
 'vehicleDestroys_mean',
 'vehicleDestroys_std',
 'walkDistance_min',
 'walkDistance_max',
 'walkDistance_mean',
 'walkDistance_std',
 'weaponsAcquired_min',
 'weaponsAcquired_max',
 'weaponsAcquired_mean',
 'weaponsAcquired_std',
 'winPoints_min',
 'winPoints_max',
 'winPoints_mean',
 'winPoints_std',
 'Total_Distance_min',
 'Total_Distance_max',
 'Total_Distance_mean',
 'Total_Distance_std',
 'WeaponsAcquired_Distance_Ratio_min',
 'WeaponsAcquired_Distance_Ratio_max',
 'WeaponsAcquired_Distance_Ratio_mean',
 'WeaponsAcquired_Distance_Ratio_std',
 'kills_Distance_Ratio_min',
 'kills_Distance_Ratio_max',
 'kills_Distance_Ratio_mean',
 'kills_Distance_Ratio_std',
 'killstreaks_kills_rate_min',
 'killstreaks_kills_rate_max',
 'killstreaks_kills_rate_mean',
 'killstreaks_kills_rate_std',
 'Headshot_kills_rate_min',
 'Headshot_kills_rate_max',
 'Headshot_kills_rate_mean',
 'Headshot_kills_rate_std',
 'Headshot_Distance_Ratio_min',
 'Headshot_Distance_Ratio_max',
 'Headshot_Distance_Ratio_mean',
 'Headshot_Distance_Ratio_std',
 'vehicleDestroys_Distance_Ratio_min',
 'vehicleDestroys_Distance_Ratio_max',
 'vehicleDestroys_Distance_Ratio_mean',
 'vehicleDestroys_Distance_Ratio_std',
 'vehicleDestroys_driveDistance_Ratio_min',
 'vehicleDestroys_driveDistance_Ratio_max',
 'vehicleDestroys_driveDistance_Ratio_mean',
 'vehicleDestroys_driveDistance_Ratio_std',
 '_walkDistancePerDuration_min',
 '_walkDistancePerDuration_max',
 '_walkDistancePerDuration_mean',
 '_walkDistancePerDuration_std',
 'walkDistancePerc_min',
 'walkDistancePerc_max',
 'walkDistancePerc_mean',
 'walkDistancePerc_std',
 '_walkDistance_kills_Ratio2_min',
 '_walkDistance_kills_Ratio2_max',
 '_walkDistance_kills_Ratio2_mean',
 '_walkDistance_kills_Ratio2_std',
 '_kill_kills_Ratio2_min',
 '_kill_kills_Ratio2_max',
 '_kill_kills_Ratio2_mean',
 '_kill_kills_Ratio2_std',
 '_killPlace_walkDistance_Ratio2_min',
 '_killPlace_walkDistance_Ratio2_max',
 '_killPlace_walkDistance_Ratio2_mean',
 '_killPlace_walkDistance_Ratio2_std',
 '_killPlace_kills_Ratio2_min',
 '_killPlace_kills_Ratio2_max',
 '_killPlace_kills_Ratio2_mean',
 '_killPlace_kills_Ratio2_std',
 '_totalDistance_min',
 '_totalDistance_max',
 '_totalDistance_mean',
 '_totalDistance_std',
 '_walkDistance_kills_Ratio3_min',
 '_walkDistance_kills_Ratio3_max',
 '_walkDistance_kills_Ratio3_mean',
 '_walkDistance_kills_Ratio3_std',
 '_walkDistance_kills_Ratio4_min',
 '_walkDistance_kills_Ratio4_max',
 '_walkDistance_kills_Ratio4_mean',
 '_walkDistance_kills_Ratio4_std',
 '_walkDistance_kills_Ratio5_min',
 '_walkDistance_kills_Ratio5_max',
 '_walkDistance_kills_Ratio5_mean',
 '_walkDistance_kills_Ratio5_std',
 '_walkDistance_kills_Ratio6_min',
 '_walkDistance_kills_Ratio6_max',
 '_walkDistance_kills_Ratio6_mean',
 '_walkDistance_kills_Ratio6_std',
 'rankPer_assists_mean',
 'rankPer_boosts_mean',
 'rankPer_damageDealt_mean',
 'rankPer_DBNOs_mean',
 'rankPer_headshotKills_mean',
 'rankPer_heals_mean',
 'rankPer_killPoints_mean',
 'rankPer_kills_mean',
 'rankPer_killStreaks_mean',
 'rankPer_longestKill_mean',
 'rankPer_roadKills_mean',
 'rankPer_vehicleDestroys_mean',
 'rankPer_walkDistance_mean',
 'rankPer_weaponsAcquired_mean',
 'rankPer_winPoints_mean',
 'rankPer_Total_Distance_mean',
 'rankPer_WeaponsAcquired_Distance_Ratio_mean',
 'rankPer_kills_Distance_Ratio_mean',
 'rankPer_killstreaks_kills_rate_mean',
 'rankPer_Headshot_kills_rate_mean',
 'rankPer_Headshot_Distance_Ratio_mean',
 'rankPer_vehicleDestroys_Distance_Ratio_mean',
 'rankPer_vehicleDestroys_driveDistance_Ratio_mean',
 'rankPer__walkDistancePerDuration_mean',
 'rankPer_walkDistancePerc_mean',
 'rankPer__walkDistance_kills_Ratio2_mean',
 'rankPer__kill_kills_Ratio2_mean',
 'rankPer__killPlace_walkDistance_Ratio2_mean',
 'rankPer__killPlace_kills_Ratio2_mean',
 'rankPer__totalDistance_mean',
 'rankPer__walkDistance_kills_Ratio3_mean',
 'rankPer__walkDistance_kills_Ratio4_mean',
 'rankPer__walkDistance_kills_Ratio5_mean',
 'rankPer__walkDistance_kills_Ratio6_mean',
 'crashfpp',
 'crashtpp',
 'duo',
 'duo-fpp',
 'flarefpp',
 'flaretpp',
 'normal-duo',
 'normal-duo-fpp',
 'normal-solo',
 'normal-solo-fpp',
 'normal-squad',
 'normal-squad-fpp',
 'solo',
 'solo-fpp',
 'squad',
 'squad-fpp']

In [4]:
def training_validation_splitting(df,features=total_features):
    X=df.loc[:,features]
    y=df["winPlacePerc"]
    features=list(X.columns)
    X=X.values
    y=y.values
    X_train, X_valid,y_train,y_valid=train_test_split(X,y,test_size=0.2)
    X_train=pd.DataFrame(X_train,columns=features)
    X_valid=pd.DataFrame(X_valid,columns=features)
    y_train=pd.DataFrame(y_train)
    y_valid=pd.DataFrame(y_valid)
    return  X_train, X_valid, y_train, y_valid


In [5]:
from sklearn.externals import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

def feature_assessment(features):
    X_train_df_filtered=X_train_df.loc[:,features]
    X_valid_df_filtered=X_valid_df.loc[:,features]
    
    X_train=X_train_df_filtered.values
    features=list(X_train_df_filtered.columns)
    del X_train_df_filtered
    
    X_valid=X_valid_df_filtered.values
    del X_valid_df_filtered
    
    r=RandomForestRegressor(n_estimators=1, max_features=len(features),verbose=2)
    r.fit(X_train,y_train)
    
    importances=r.feature_importances_
    
    importances=pd.DataFrame(importances,index=features,columns=["Importance"])
    importances.sort_values(by="Importance",inplace=True,ascending=False)
    features=importances.index
    
    y=r.predict(X_valid)
    del r
    
    mae=mean_absolute_error(y,y_valid,multioutput="uniform_average")
    print(mae)
    return mae, features, importances

In [6]:
# Drop i features, get mae, new features, features importances
def feature_selection(features,i):
    if len(features)>i:
        features=features[0:-i]
        mae, features, importances=feature_assessment(features)
        im=[(i,importances["Importance"][i]) for i in importances.index]
        print("we drop the features:")
        for j in range(1,i+1):
            print(features[-j])
        print("We have"+" "+str(len(features))+" "+"features")
        return mae, features, im

In [7]:
"""
mae, features,importances=feature_assessment(total_features)
mae_232, features_232, im_232=feature_selection(features,3)
mae_229, features_229, im_229=feature_selection(features_232,3)
mae_221, features_221, im_221=feature_selection(features_229,8)
mae_220, features_220, im_220=feature_selection(features_221,1)
mae_199, features_199, im_199=feature_selection(features_220,21)
mae_159, features_159, im_159=feature_selection(features_199,40)
mae_116, features_116, im_116=feature_selection(features_159,43)
mae_98, features_98, im_98=feature_selection(features_116,18)
mae_76, features_76, im_76=feature_selection(features_98,22)
mae_75, features_75, im_75=feature_selection(features_76,1)
"""
features_75=['rankPer_Total_Distance_mean',
 'killPlacePerc',
 '_killPlace_kills_Ratio2_max',
 '_killPlace_walkDistance_Ratio2_mean',
 'killPerc',
 '_killPlace_walkDistance_Ratio2_min',
 '_totalDistance_max',
 'boosts_max',
 'numGroups',
 'players_in_the_group',
 'rankPer__killPlace_walkDistance_Ratio2_mean',
 '_walkDistancePerDuration_std',
 'boosts_mean',
 'rankPer_boosts_mean',
 'players_in_groupPerRank',
 'rankPer_kills_mean',
 '_totalDistance_mean',
 'rankPer_walkDistance_mean',
 'killstreaks_kills_rate',
 'kills_mean',
 'rankPer_heals_mean',
 'rankPer__killPlace_kills_Ratio2_mean',
 'rankPer_killstreaks_kills_rate_mean',
 '_totalDistance_std',
 'matchDuration',
 '_walkDistancePerDuration_max',
 'rankPer_assists_mean',
 '_walkDistance_kills_Ratio2_min',
 'Total_Distance_min',
 'rankPer_kills_Distance_Ratio_mean',
 '_walkDistance_kills_Ratio2_mean',
 '_killPlace_kills_Ratio2_mean',
 'rankPer_DBNOs_mean',
 'weaponsAcquired_max',
 'rankPer_killStreaks_mean',
 '_killPlace_walkDistance_Ratio2_max',
 '_walkDistancePerDuration_mean',
 'walkDistancePerc_max',
 'rankPer_weaponsAcquired_mean',
 'damageDealt_max',
 'rankPer__walkDistance_kills_Ratio2_mean',
 'rankPer__walkDistance_kills_Ratio3_mean',
 'rankPer__walkDistance_kills_Ratio6_mean',
 'weaponsAcquired_mean',
 'rankPer__walkDistance_kills_Ratio4_mean',
 'Total_Distance_max',
 'rankPer_WeaponsAcquired_Distance_Ratio_mean',
 '_killPlace_kills_Ratio2_min',
 'weaponsAcquired_min',
 'damageDealt_mean',
 'rankPer_damageDealt_mean',
 'rankPer_longestKill_mean',
 'WeaponsAcquired_Distance_Ratio_max',
 'WeaponsAcquired_Distance_Ratio_min',
 'rankPer_Headshot_kills_rate_mean',
 'rankPer_Headshot_Distance_Ratio_mean',
 'rankPer__kill_kills_Ratio2_mean',
 'rankPer__walkDistance_kills_Ratio5_mean',
 '_killPlace_kills_Ratio2_std',
 '_walkDistance_kills_Ratio6_std',
 'walkDistance_max',
 '_walkDistance_kills_Ratio4_max',
 'longestKill_max',
 'rankPer__totalDistance_mean',
 'rankPer_walkDistancePerc_mean',
 'walkDistancePerc_mean',
 'kills_Distance_Ratio_max',
 'weaponsAcquired_std',
 'players_in_the_match',
 'maxPlace',
 'rankPer_vehicleDestroys_driveDistance_Ratio_mean',
 'longestKill_mean',
 'boosts_std',
 'kills_Distance_Ratio_mean',
 'rankPer__walkDistancePerDuration_mean']

In [8]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

def training_RF(number_of_trees, number_of_features, number_of_depth=None):
    r=RandomForestRegressor(n_estimators=number_of_trees, max_depth=number_of_depth ,max_features=number_of_features,verbose=2)
    r.fit(X_train,y_train)
    y=r.predict(X_valid)
    mae=mean_absolute_error(y,y_valid,multioutput="uniform_average")
    print(mae)
    return mae, r

In [9]:
from sklearn.ensemble import GradientBoostingRegressor

def training_GBR(number_of_estimators, rate_of_learning, sub_sample, number_of_features, number_of_depth):
    
    
    GBRregressor=GradientBoostingRegressor(n_estimators=number_of_estimators, 
                                           learning_rate=rate_of_learning, 
                                           subsample=sub_sample, 
                                           loss= 'lad',
                                           verbose=2, 
                                           max_features=number_of_features, 
                                           max_depth=number_of_depth,
                                          criterion ='mse')
    GBRregressor.fit(X_train, y_train)
    y=GBRregressor.predict(X_valid)
    
    mae=mean_absolute_error(y,y_valid,multioutput="uniform_average")
    
    print("--------------------------------------------------------")
    print("The mean absolute error is:")
    print(mae)
    return mae, GBRregressor

In [10]:
def training_XGB(X_t, y_t, X_v, y_v, parameters, verbose):
    
    
    print("We begin to train the model with xgboost:")
    print("-----------------------------------------------")
    
    for x in parameters:
        print("The "+str(x)+" is: "+str(parameters[x]))
           
    bst = xgb.XGBRegressor(max_depth=parameters["max_depth"],
                           learning_rate=parameters["learning_rate"],
                           n_estimators=parameters["n_estimators"],
                           gamma=parameters["gamma"], 
                           min_child_weight=parameters["min_child_weight"], 
                           colsample_bytree=parameters["colsample_bytree"],
                           subsample=parameters["subsample"],
                           reg_lambda=parameters["reg_lambda"],
                           base_score=parameters["base_score"])
    if verbose:
        bst.fit(X_t,y_t,eval_set=[(X_t,y_t), (X_v,y_v)],eval_metric="mae")
    else:
        bst.fit(X_t,y_t,eval_metric="mae")
    
    y=bst.predict(X_v)
    mae=mean_absolute_error(y,y_v)
    
    return y, bst

In [11]:
train_df=import_data("/kaggle/input/pubg-finish-placement-prediction/train_V2.csv")
test_df=import_data("/kaggle/input/pubg-finish-placement-prediction/test_V2.csv")
total_df=pd.concat([train_df,test_df],keys=["train","test"])
del train_df
del test_df

Memory usage of dataframe is 983.90 MB
Memory usage after optimization is: 288.39 MB
Decreased by 70.7%
Memory usage of dataframe is 413.18 MB
Memory usage after optimization is: 121.74 MB
Decreased by 70.5%


In [12]:
total_df=reduce_mem_usage(total_df)

Memory usage of dataframe is 478.17 MB
Memory usage after optimization is: 478.17 MB
Decreased by 0.0%


In [None]:
total_df=FeatureModification(total_df)

In [None]:
total_df.to_csv("")