In [26]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression,Lasso,Ridge
from sklearn.neighbors import KNeighborsRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
import warnings
warnings.filterwarnings(action='ignore')

In [4]:
#loading the dataset
df=pd.read_csv('/kaggle/input/ps4-games/games_data.csv')
#showing the dataset
df

Unnamed: 0.1,Unnamed: 0,game,score,leaderbord,gamers,comp_perc,rating,url,min_comp_time,max_comp_time
0,0,A Boy and His Blob,638,2.02,2194,16.5,3.2,https://www.truetrophies.com/game/A-Boy-and-Hi...,15,20
1,1,A Hat in Time,1992,1.53,7062,35.9,4.2,https://www.truetrophies.com/game/A-Hat-in-Tim...,15,20
2,2,A Hero and a Garden,1364,1.01,503,97.6,5.0,https://www.truetrophies.com/game/A-Hero-and-a...,0,1
3,3,A Hero and a Garden (EU),1363,1.01,581,97.8,2.9,https://www.truetrophies.com/game/A-Hero-and-a...,0,1
4,4,A King's Tale: Final Fantasy XV,637,2.02,21914,14.1,3.3,https://www.truetrophies.com/game/A-Kings-Tale...,4,5
...,...,...,...,...,...,...,...,...,...,...
1579,1579,36 Fragments of Midnight,1367,1.06,8472,82.3,2.5,https://www.truetrophies.com/game/36-Fragments...,0,1
1580,1580,36 Fragments of Midnight (Asia),1335,1.03,2131,88.9,2.4,https://www.truetrophies.com/game/36-Fragments...,0,1
1581,1581,36 Fragments of Midnight (EU),1382,1.07,12273,79.2,2.4,https://www.truetrophies.com/game/36-Fragments...,0,1
1582,1582,428: Shibuya Scramble,1943,1.47,916,41.5,4.2,https://www.truetrophies.com/game/428-Shibuya-...,40,50


In [5]:
#getting preliminary information about the dataset
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1584 entries, 0 to 1583
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Unnamed: 0     1584 non-null   int64  
 1   game           1584 non-null   object 
 2   score          1584 non-null   int64  
 3   leaderbord     1584 non-null   float64
 4   gamers         1584 non-null   int64  
 5   comp_perc      1584 non-null   float64
 6   rating         1584 non-null   float64
 7   url            1584 non-null   object 
 8   min_comp_time  1584 non-null   int64  
 9   max_comp_time  1584 non-null   int64  
dtypes: float64(3), int64(5), object(2)
memory usage: 123.9+ KB


In [15]:
def preprocess_input(df):
    df=df.copy()
    #dropping unecessary columns
    df=df.drop(df.columns[0:2],axis=1)
    df=df.drop('url',axis=1)
    #splitting  between target and feature
    y=df['rating']
    x=df.drop('rating',axis=1)
    #train_test_split
    x_train,x_test,y_train,y_test=train_test_split(x,y,train_size=0.7,random_state=123,shuffle=True)
    #scalering the dataset
    scaler=StandardScaler()
    scaler.fit(x_train)
    x_train=pd.DataFrame(scaler.transform(x_train),columns=x_train.columns,index=x_train.index)
    x_test=pd.DataFrame(scaler.transform(x_test),columns=x_test.columns,index=x_test.index)
    return x_train,x_test,y_train,y_test

In [16]:
x_train,x_test,y_train,y_test=preprocess_input(df)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(1108, 6)
(476, 6)
(1108,)
(476,)


In [17]:
x_train

Unnamed: 0,score,leaderbord,gamers,comp_perc,min_comp_time,max_comp_time
468,-0.297321,-0.614341,-0.458540,0.647965,-0.367328,-0.248148
409,-1.093007,-0.513783,-0.521352,0.343093,-0.539160,-0.329020
842,0.811007,0.190123,0.985142,-1.246193,0.801125,0.317950
840,-0.391677,-0.727469,-0.548587,1.246418,-0.539160,-0.329020
448,-0.363511,-0.727469,-0.476329,1.204074,-0.539160,-0.329020
...,...,...,...,...,...,...
1122,-0.380410,-0.727469,-0.521843,1.187137,-0.573526,-0.342498
1346,1.072245,1.359110,0.414349,-1.141747,0.285631,0.048379
1406,-0.311404,-0.576632,0.752329,0.498352,-0.298596,-0.221191
1389,-1.097232,-0.538922,-0.129301,0.213240,-0.504793,-0.315541


In [18]:
y_train

468     3.3
409     3.0
842     4.1
840     1.8
448     2.2
       ... 
1122    3.2
1346    3.8
1406    4.3
1389    3.6
1534    3.6
Name: rating, Length: 1108, dtype: float64

In [30]:
models={'Linear Regression':LinearRegression(),
'Linear Regression(L2 Regularization)':Lasso(),
'Linear Regression(L1 Regularization)':Ridge(),
'K-Neigbors':KNeighborsRegressor(),
'Neural Network':MLPRegressor(),
'Decision Tree':DecisionTreeRegressor(),
'Random Forest':RandomForestRegressor(),
'Gradient Boosting':GradientBoostingRegressor(),
'LightGBM':XGBRegressor(),
'Cat Boost':CatBoostRegressor(verbose=0)}

In [28]:
models

{'Linear Regression': LinearRegression(),
 'Linear Regression(L2 Regularization)': Lasso(),
 'Linear Regression(L1 Regularization)': Ridge(),
 'K-Neigbors': KNeighborsRegressor(),
 'Neural Network': MLPRegressor(),
 'Decision Tree': DecisionTreeRegressor(),
 'Random Forest': RandomForestRegressor(),
 'Gradient Boosting': GradientBoostingRegressor(),
 'LightGBM': XGBRegressor(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, device=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=None, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
       

In [31]:
for name,model in models.items():
    model.fit(x_train,y_train)
    print(name)
    print(model.score(x_test,y_test))

Linear Regression
0.5255245053622062
Linear Regression(L2 Regularization)
-0.0006854769590591836
Linear Regression(L1 Regularization)
0.5254909117341899
K-Neigbors
0.5596502431621174
Neural Network
0.5963047680481254
Decision Tree
0.27361025129923067
Random Forest
0.6038390043963828
Gradient Boosting
0.6114538550056396
LightGBM
0.5376297583774523
Cat Boost
0.607590521082221


Index(['Unnamed: 0', 'game'], dtype='object')