In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
import xgboost as xgb


In [2]:
df = pd.read_excel ("EPS_Dataset.xlsx")
df = df. drop( ['Bank name', 'Year'] , axis=1)

In [3]:
# Data cleaning

# Outliers detection
col_list = df.columns 
for i in col_list:
    q1 = np.percentile(df[i] , 25)
    q3 = np.percentile(df[i] , 75)
    iqr = q3-q1
    lower_bound = q1 - (5 * iqr)
    upper_bound = q3 + (5 * iqr)
   
    df = df.loc [ (df[i] >= lower_bound) & (df[i] <= upper_bound) ]
   

In [4]:
# Extract x and y
y = df ['Basic EPS (Rs.)']
x = df. drop( ['Basic EPS (Rs.)'] , axis=1)

In [5]:
# VIF for numerical columns

from statsmodels.stats.outliers_influence import variance_inflation_factor


numeric_columns = x.columns
vif_data = x
total_columns = vif_data.shape[1]
columns_to_be_kept = []
column_index = 0

In [6]:
for i in range (0,total_columns):
   
    vif_value = variance_inflation_factor(vif_data, column_index)
    print (column_index,vif_value)
   
   
    if vif_value <= 280:
        columns_to_be_kept.append( numeric_columns[i] )
        column_index = column_index+1
   
    else:
        vif_data = vif_data.drop([ numeric_columns[i] ] , axis=1)

0 63.834334311485975
1 81.37140870798974
2 1413.4762312589748
2 1412.0958055848223
2 15126.960463568766
2 36.45798070275257
3 274807.3146241795
3 422.1655599862487
3 1602.8262658267167
3 97.22717205038882
4 82.38502507488876
5 78.46941998567789
6 48.635709903507205
7 6.655066392654436


In [7]:
x = x [columns_to_be_kept]
# Apply StandardScaler
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)
x = x_scaled

In [8]:
# train test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [9]:
# Define the hyperparameter grid
param_grid = {
  'colsample_bytree': [0.1, 0.3, 0.5, 0.7, 0.9],
  'learning_rate'   : [0.001, 0.01, 0.1, 1],
  'max_depth'       : [3, 5, 8, 10],
  'alpha'           : [1, 10, 100],
  'n_estimators'    : [10,50,100]
}



In [10]:
index = 0

answers_grid = {
    'combination'       :[],
    'train_RMSE'        :[],
    'test_RMSE'         :[],
    'train_R2'          :[],
    'test_R2'           :[],
    'train_std_diff'    :[],
    'test_std_diff'     :[],
    'colsample_bytree'  :[],
    'learning_rate'     :[],
    'max_depth'         :[],
    'alpha'             :[],
    'n_estimators'      :[]

    }

In [11]:
# Loop through each combination of hyperparameters
for colsample_bytree in param_grid['colsample_bytree']:
  for learning_rate in param_grid['learning_rate']:
    for max_depth in param_grid['max_depth']:
      for alpha in param_grid['alpha']:
          for n_estimators in param_grid['n_estimators']:
             
              index = index + 1
             
              # Define and train the XGBoost model
              model = xgb.XGBRegressor(objective='reg:squarederror',
                                      colsample_bytree=colsample_bytree,
                                      learning_rate=learning_rate,
                                      max_depth=max_depth,
                                      alpha=alpha,
                                      n_estimators=n_estimators)
               
       
     


              model.fit(x_train, y_train)
       
              # Predict on training and testing sets
              y_pred_train = model.predict(x_train)
              y_pred_test = model.predict(x_test)
       
       
              # Calculate train and test results
              train_rmse = np.sqrt (mean_squared_error(y_train, y_pred_train))
              test_rmse= np.sqrt (mean_squared_error(y_test, y_pred_test))
              train_r2 = r2_score(y_train, y_pred_train)
              test_r2  = r2_score(y_test, y_pred_test)
              train_std_diff = train_rmse  / np.std(y_train)
              test_std_diff = test_rmse / np.std(y_test)
       
       
              # Include into the lists
              answers_grid ['combination']   .append(index)
              answers_grid ['train_RMSE']    .append(train_rmse)
              answers_grid ['test_RMSE']     .append(test_rmse)
              answers_grid ['train_R2']      .append(train_r2)
              answers_grid ['test_R2']       .append(test_r2)
              answers_grid ['train_std_diff'].append(train_std_diff)
              answers_grid ['test_std_diff'] .append(test_std_diff)
              answers_grid ['colsample_bytree']   .append(colsample_bytree)
              answers_grid ['learning_rate']      .append(learning_rate)
              answers_grid ['max_depth']          .append(max_depth)
              answers_grid ['alpha']              .append(alpha)
              answers_grid ['n_estimators']       .append(n_estimators)
       
       
              # Print results for this combination
              print(f"Combination {index}")
              print(f"colsample_bytree: {colsample_bytree}, learning_rate: {learning_rate}, max_depth: {max_depth}, alpha: {alpha}, n_estimators: {n_estimators}")
              print(f"Train RMSE: {train_rmse:.2f}")
              print(f"Test RMSE : {test_rmse:.2f}")
              print(f"Train R2  : {train_r2:.2f}")
              print(f"Test R2   : {test_r2:.2f}")
              print(f"Train std_diff: {train_std_diff:.2f}")
              print(f"Test std_diff : {test_std_diff:.2f}")
              print("-" * 30)

Combination 1
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 18.46
Test RMSE : 19.15
Train R2  : 0.01
Test R2   : -0.35
Train std_diff: 1.00
Test std_diff : 1.16
------------------------------
Combination 2
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 18.19
Test RMSE : 19.03
Train R2  : 0.03
Test R2   : -0.33
Train std_diff: 0.98
Test std_diff : 1.15
------------------------------
Combination 3
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 17.83
Test RMSE : 18.85
Train R2  : 0.07
Test R2   : -0.30
Train std_diff: 0.96
Test std_diff : 1.14
------------------------------
Combination 4
colsample_bytree: 0.1, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 18.47
Test RMSE : 19.15
Train R2  : 0.01
Test R2   : -0.35
Train std_diff: 1.00
Test std_diff : 1.16
------------------------------
Combination 5
colsampl

Combination 39
colsample_bytree: 0.1, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 12.88
Test RMSE : 16.79
Train R2  : 0.52
Test R2   : -0.03
Train std_diff: 0.70
Test std_diff : 1.02
------------------------------
Combination 40
colsample_bytree: 0.1, learning_rate: 0.01, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 18.03
Test RMSE : 19.07
Train R2  : 0.05
Test R2   : -0.34
Train std_diff: 0.97
Test std_diff : 1.16
------------------------------
Combination 41
colsample_bytree: 0.1, learning_rate: 0.01, max_depth: 3, alpha: 10, n_estimators: 50
Train RMSE: 15.88
Test RMSE : 18.25
Train R2  : 0.27
Test R2   : -0.22
Train std_diff: 0.86
Test std_diff : 1.11
------------------------------
Combination 42
colsample_bytree: 0.1, learning_rate: 0.01, max_depth: 3, alpha: 10, n_estimators: 100
Train RMSE: 13.42
Test RMSE : 17.11
Train R2  : 0.47
Test R2   : -0.07
Train std_diff: 0.72
Test std_diff : 1.04
------------------------------
Combination 43
cols

Combination 74
colsample_bytree: 0.1, learning_rate: 0.1, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 4.68
Test RMSE : 16.31
Train R2  : 0.94
Test R2   : 0.02
Train std_diff: 0.25
Test std_diff : 0.99
------------------------------
Combination 75
colsample_bytree: 0.1, learning_rate: 0.1, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 1.89
Test RMSE : 15.04
Train R2  : 0.99
Test R2   : 0.17
Train std_diff: 0.10
Test std_diff : 0.91
------------------------------
Combination 76
colsample_bytree: 0.1, learning_rate: 0.1, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 14.33
Test RMSE : 18.66
Train R2  : 0.40
Test R2   : -0.28
Train std_diff: 0.77
Test std_diff : 1.13
------------------------------
Combination 77
colsample_bytree: 0.1, learning_rate: 0.1, max_depth: 3, alpha: 10, n_estimators: 50
Train RMSE: 6.21
Test RMSE : 16.67
Train R2  : 0.89
Test R2   : -0.02
Train std_diff: 0.34
Test std_diff : 1.01
------------------------------
Combination 78
colsample_bytre

Combination 112
colsample_bytree: 0.1, learning_rate: 1, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 5.36
Test RMSE : 20.11
Train R2  : 0.92
Test R2   : -0.48
Train std_diff: 0.29
Test std_diff : 1.22
------------------------------
Combination 113
colsample_bytree: 0.1, learning_rate: 1, max_depth: 3, alpha: 10, n_estimators: 50
Train RMSE: 2.49
Test RMSE : 18.57
Train R2  : 0.98
Test R2   : -0.27
Train std_diff: 0.13
Test std_diff : 1.12
------------------------------
Combination 114
colsample_bytree: 0.1, learning_rate: 1, max_depth: 3, alpha: 10, n_estimators: 100
Train RMSE: 2.48
Test RMSE : 18.58
Train R2  : 0.98
Test R2   : -0.27
Train std_diff: 0.13
Test std_diff : 1.13
------------------------------
Combination 115
colsample_bytree: 0.1, learning_rate: 1, max_depth: 3, alpha: 100, n_estimators: 10
Train RMSE: 11.42
Test RMSE : 17.76
Train R2  : 0.62
Test R2   : -0.16
Train std_diff: 0.62
Test std_diff : 1.08
------------------------------
Combination 116
colsample_byt

Combination 147
colsample_bytree: 0.3, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 17.54
Test RMSE : 18.71
Train R2  : 0.10
Test R2   : -0.28
Train std_diff: 0.95
Test std_diff : 1.13
------------------------------
Combination 148
colsample_bytree: 0.3, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 18.44
Test RMSE : 19.14
Train R2  : 0.01
Test R2   : -0.34
Train std_diff: 1.00
Test std_diff : 1.16
------------------------------
Combination 149
colsample_bytree: 0.3, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 50
Train RMSE: 18.08
Test RMSE : 18.97
Train R2  : 0.05
Test R2   : -0.32
Train std_diff: 0.98
Test std_diff : 1.15
------------------------------
Combination 150
colsample_bytree: 0.3, learning_rate: 0.001, max_depth: 3, alpha: 10, n_estimators: 100
Train RMSE: 17.62
Test RMSE : 18.73
Train R2  : 0.09
Test R2   : -0.29
Train std_diff: 0.95
Test std_diff : 1.13
------------------------------
Combination

Combination 183
colsample_bytree: 0.3, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 11.16
Test RMSE : 15.95
Train R2  : 0.64
Test R2   : 0.07
Train std_diff: 0.60
Test std_diff : 0.97
------------------------------
Combination 184
colsample_bytree: 0.3, learning_rate: 0.01, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 17.71
Test RMSE : 18.93
Train R2  : 0.08
Test R2   : -0.32
Train std_diff: 0.96
Test std_diff : 1.15
------------------------------
Combination 185
colsample_bytree: 0.3, learning_rate: 0.01, max_depth: 3, alpha: 10, n_estimators: 50
Train RMSE: 14.68
Test RMSE : 17.65
Train R2  : 0.37
Test R2   : -0.14
Train std_diff: 0.79
Test std_diff : 1.07
------------------------------
Combination 186
colsample_bytree: 0.3, learning_rate: 0.01, max_depth: 3, alpha: 10, n_estimators: 100
Train RMSE: 11.76
Test RMSE : 16.23
Train R2  : 0.60
Test R2   : 0.03
Train std_diff: 0.64
Test std_diff : 0.98
------------------------------
Combination 187
c

Combination 219
colsample_bytree: 0.3, learning_rate: 0.1, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 1.02
Test RMSE : 13.97
Train R2  : 1.00
Test R2   : 0.28
Train std_diff: 0.05
Test std_diff : 0.85
------------------------------
Combination 220
colsample_bytree: 0.3, learning_rate: 0.1, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 12.44
Test RMSE : 17.47
Train R2  : 0.55
Test R2   : -0.12
Train std_diff: 0.67
Test std_diff : 1.06
------------------------------
Combination 221
colsample_bytree: 0.3, learning_rate: 0.1, max_depth: 3, alpha: 10, n_estimators: 50
Train RMSE: 4.37
Test RMSE : 14.32
Train R2  : 0.94
Test R2   : 0.25
Train std_diff: 0.24
Test std_diff : 0.87
------------------------------
Combination 222
colsample_bytree: 0.3, learning_rate: 0.1, max_depth: 3, alpha: 10, n_estimators: 100
Train RMSE: 2.95
Test RMSE : 14.07
Train R2  : 0.97
Test R2   : 0.27
Train std_diff: 0.16
Test std_diff : 0.85
------------------------------
Combination 223
colsample

Combination 254
colsample_bytree: 0.3, learning_rate: 1, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 0.28
Test RMSE : 21.57
Train R2  : 1.00
Test R2   : -0.71
Train std_diff: 0.02
Test std_diff : 1.31
------------------------------
Combination 255
colsample_bytree: 0.3, learning_rate: 1, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 0.28
Test RMSE : 21.57
Train R2  : 1.00
Test R2   : -0.71
Train std_diff: 0.02
Test std_diff : 1.31
------------------------------
Combination 256
colsample_bytree: 0.3, learning_rate: 1, max_depth: 3, alpha: 10, n_estimators: 10
Train RMSE: 3.52
Test RMSE : 19.01
Train R2  : 0.96
Test R2   : -0.33
Train std_diff: 0.19
Test std_diff : 1.15
------------------------------
Combination 257
colsample_bytree: 0.3, learning_rate: 1, max_depth: 3, alpha: 10, n_estimators: 50
Train RMSE: 2.46
Test RMSE : 19.02
Train R2  : 0.98
Test R2   : -0.33
Train std_diff: 0.13
Test std_diff : 1.15
------------------------------
Combination 258
colsample_bytree:

Combination 288
colsample_bytree: 0.3, learning_rate: 1, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 10.27
Test RMSE : 18.09
Train R2  : 0.69
Test R2   : -0.20
Train std_diff: 0.55
Test std_diff : 1.10
------------------------------
Combination 289
colsample_bytree: 0.5, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 18.39
Test RMSE : 19.08
Train R2  : 0.01
Test R2   : -0.34
Train std_diff: 0.99
Test std_diff : 1.16
------------------------------
Combination 290
colsample_bytree: 0.5, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 17.91
Test RMSE : 18.81
Train R2  : 0.06
Test R2   : -0.30
Train std_diff: 0.97
Test std_diff : 1.14
------------------------------
Combination 291
colsample_bytree: 0.5, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 17.32
Test RMSE : 18.44
Train R2  : 0.13
Test R2   : -0.25
Train std_diff: 0.94
Test std_diff : 1.12
------------------------------
Combination 292

Combination 324
colsample_bytree: 0.5, learning_rate: 0.001, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 17.93
Test RMSE : 18.92
Train R2  : 0.06
Test R2   : -0.31
Train std_diff: 0.97
Test std_diff : 1.15
------------------------------
Combination 325
colsample_bytree: 0.5, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 17.32
Test RMSE : 18.41
Train R2  : 0.13
Test R2   : -0.24
Train std_diff: 0.94
Test std_diff : 1.12
------------------------------
Combination 326
colsample_bytree: 0.5, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 13.42
Test RMSE : 16.28
Train R2  : 0.47
Test R2   : 0.03
Train std_diff: 0.72
Test std_diff : 0.99
------------------------------
Combination 327
colsample_bytree: 0.5, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 9.95
Test RMSE : 14.42
Train R2  : 0.71
Test R2   : 0.24
Train std_diff: 0.54
Test std_diff : 0.87
------------------------------
Combination 328
c

Combination 359
colsample_bytree: 0.5, learning_rate: 0.01, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 16.04
Test RMSE : 18.20
Train R2  : 0.25
Test R2   : -0.22
Train std_diff: 0.87
Test std_diff : 1.10
------------------------------
Combination 360
colsample_bytree: 0.5, learning_rate: 0.01, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 14.37
Test RMSE : 17.44
Train R2  : 0.40
Test R2   : -0.12
Train std_diff: 0.78
Test std_diff : 1.06
------------------------------
Combination 361
colsample_bytree: 0.5, learning_rate: 0.1, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 9.99
Test RMSE : 14.27
Train R2  : 0.71
Test R2   : 0.25
Train std_diff: 0.54
Test std_diff : 0.86
------------------------------
Combination 362
colsample_bytree: 0.5, learning_rate: 0.1, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 2.03
Test RMSE : 13.40
Train R2  : 0.99
Test R2   : 0.34
Train std_diff: 0.11
Test std_diff : 0.81
------------------------------
Combination 363
col

Combination 393
colsample_bytree: 0.5, learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 100
Train RMSE: 1.93
Test RMSE : 13.06
Train R2  : 0.99
Test R2   : 0.37
Train std_diff: 0.10
Test std_diff : 0.79
------------------------------
Combination 394
colsample_bytree: 0.5, learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 10
Train RMSE: 14.33
Test RMSE : 17.64
Train R2  : 0.40
Test R2   : -0.14
Train std_diff: 0.77
Test std_diff : 1.07
------------------------------
Combination 395
colsample_bytree: 0.5, learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 10.96
Test RMSE : 16.44
Train R2  : 0.65
Test R2   : 0.01
Train std_diff: 0.59
Test std_diff : 1.00
------------------------------
Combination 396
colsample_bytree: 0.5, learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 10.73
Test RMSE : 16.35
Train R2  : 0.66
Test R2   : 0.02
Train std_diff: 0.58
Test std_diff : 0.99
------------------------------
Combination 397

Combination 432
colsample_bytree: 0.5, learning_rate: 1, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 10.39
Test RMSE : 18.56
Train R2  : 0.69
Test R2   : -0.26
Train std_diff: 0.56
Test std_diff : 1.12
------------------------------
Combination 433
colsample_bytree: 0.7, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 18.39
Test RMSE : 19.06
Train R2  : 0.01
Test R2   : -0.33
Train std_diff: 0.99
Test std_diff : 1.15
------------------------------
Combination 434
colsample_bytree: 0.7, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 17.88
Test RMSE : 18.76
Train R2  : 0.07
Test R2   : -0.29
Train std_diff: 0.97
Test std_diff : 1.14
------------------------------
Combination 435
colsample_bytree: 0.7, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 17.27
Test RMSE : 18.34
Train R2  : 0.13
Test R2   : -0.23
Train std_diff: 0.93
Test std_diff : 1.11
------------------------------
Combination 436

Combination 468
colsample_bytree: 0.7, learning_rate: 0.001, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 17.90
Test RMSE : 18.91
Train R2  : 0.07
Test R2   : -0.31
Train std_diff: 0.97
Test std_diff : 1.15
------------------------------
Combination 469
colsample_bytree: 0.7, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 17.24
Test RMSE : 18.15
Train R2  : 0.13
Test R2   : -0.21
Train std_diff: 0.93
Test std_diff : 1.10
------------------------------
Combination 470
colsample_bytree: 0.7, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 13.24
Test RMSE : 15.87
Train R2  : 0.49
Test R2   : 0.08
Train std_diff: 0.72
Test std_diff : 0.96
------------------------------
Combination 471
colsample_bytree: 0.7, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 9.77
Test RMSE : 14.39
Train R2  : 0.72
Test R2   : 0.24
Train std_diff: 0.53
Test std_diff : 0.87
------------------------------
Combination 472
c

Combination 503
colsample_bytree: 0.7, learning_rate: 0.01, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 15.92
Test RMSE : 18.14
Train R2  : 0.26
Test R2   : -0.21
Train std_diff: 0.86
Test std_diff : 1.10
------------------------------
Combination 504
colsample_bytree: 0.7, learning_rate: 0.01, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 14.24
Test RMSE : 17.33
Train R2  : 0.41
Test R2   : -0.10
Train std_diff: 0.77
Test std_diff : 1.05
------------------------------
Combination 505
colsample_bytree: 0.7, learning_rate: 0.1, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 9.65
Test RMSE : 12.85
Train R2  : 0.73
Test R2   : 0.39
Train std_diff: 0.52
Test std_diff : 0.78
------------------------------
Combination 506
colsample_bytree: 0.7, learning_rate: 0.1, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 2.07
Test RMSE : 12.16
Train R2  : 0.99
Test R2   : 0.46
Train std_diff: 0.11
Test std_diff : 0.74
------------------------------
Combination 507
col

Combination 540
colsample_bytree: 0.7, learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 10.75
Test RMSE : 15.91
Train R2  : 0.66
Test R2   : 0.07
Train std_diff: 0.58
Test std_diff : 0.96
------------------------------
Combination 541
colsample_bytree: 0.7, learning_rate: 1, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 0.51
Test RMSE : 11.09
Train R2  : 1.00
Test R2   : 0.55
Train std_diff: 0.03
Test std_diff : 0.67
------------------------------
Combination 542
colsample_bytree: 0.7, learning_rate: 1, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 0.26
Test RMSE : 11.13
Train R2  : 1.00
Test R2   : 0.55
Train std_diff: 0.01
Test std_diff : 0.67
------------------------------
Combination 543
colsample_bytree: 0.7, learning_rate: 1, max_depth: 3, alpha: 1, n_estimators: 100
Train RMSE: 0.26
Test RMSE : 11.13
Train R2  : 1.00
Test R2   : 0.55
Train std_diff: 0.01
Test std_diff : 0.67
------------------------------
Combination 544
colsample_bytree

Combination 575
colsample_bytree: 0.7, learning_rate: 1, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 10.69
Test RMSE : 17.09
Train R2  : 0.67
Test R2   : -0.07
Train std_diff: 0.58
Test std_diff : 1.04
------------------------------
Combination 576
colsample_bytree: 0.7, learning_rate: 1, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 10.69
Test RMSE : 17.09
Train R2  : 0.67
Test R2   : -0.07
Train std_diff: 0.58
Test std_diff : 1.04
------------------------------
Combination 577
colsample_bytree: 0.9, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 18.39
Test RMSE : 19.05
Train R2  : 0.01
Test R2   : -0.33
Train std_diff: 0.99
Test std_diff : 1.15
------------------------------
Combination 578
colsample_bytree: 0.9, learning_rate: 0.001, max_depth: 3, alpha: 1, n_estimators: 50
Train RMSE: 17.86
Test RMSE : 18.63
Train R2  : 0.07
Test R2   : -0.27
Train std_diff: 0.96
Test std_diff : 1.13
------------------------------
Combination 579
c

Combination 610
colsample_bytree: 0.9, learning_rate: 0.001, max_depth: 10, alpha: 100, n_estimators: 10
Train RMSE: 18.45
Test RMSE : 19.13
Train R2  : 0.01
Test R2   : -0.34
Train std_diff: 1.00
Test std_diff : 1.16
------------------------------
Combination 611
colsample_bytree: 0.9, learning_rate: 0.001, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 18.19
Test RMSE : 18.98
Train R2  : 0.04
Test R2   : -0.32
Train std_diff: 0.98
Test std_diff : 1.15
------------------------------
Combination 612
colsample_bytree: 0.9, learning_rate: 0.001, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 17.87
Test RMSE : 18.78
Train R2  : 0.07
Test R2   : -0.29
Train std_diff: 0.96
Test std_diff : 1.14
------------------------------
Combination 613
colsample_bytree: 0.9, learning_rate: 0.01, max_depth: 3, alpha: 1, n_estimators: 10
Train RMSE: 17.24
Test RMSE : 18.04
Train R2  : 0.13
Test R2   : -0.19
Train std_diff: 0.93
Test std_diff : 1.09
------------------------------
Combina

Combination 645
colsample_bytree: 0.9, learning_rate: 0.01, max_depth: 10, alpha: 10, n_estimators: 100
Train RMSE: 9.92
Test RMSE : 12.78
Train R2  : 0.71
Test R2   : 0.40
Train std_diff: 0.54
Test std_diff : 0.77
------------------------------
Combination 646
colsample_bytree: 0.9, learning_rate: 0.01, max_depth: 10, alpha: 100, n_estimators: 10
Train RMSE: 17.87
Test RMSE : 18.75
Train R2  : 0.07
Test R2   : -0.29
Train std_diff: 0.96
Test std_diff : 1.14
------------------------------
Combination 647
colsample_bytree: 0.9, learning_rate: 0.01, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 15.84
Test RMSE : 17.53
Train R2  : 0.27
Test R2   : -0.13
Train std_diff: 0.86
Test std_diff : 1.06
------------------------------
Combination 648
colsample_bytree: 0.9, learning_rate: 0.01, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 14.13
Test RMSE : 16.52
Train R2  : 0.42
Test R2   : -0.00
Train std_diff: 0.76
Test std_diff : 1.00
------------------------------
Combinati

Combination 681
colsample_bytree: 0.9, learning_rate: 0.1, max_depth: 10, alpha: 10, n_estimators: 100
Train RMSE: 1.98
Test RMSE : 10.49
Train R2  : 0.99
Test R2   : 0.60
Train std_diff: 0.11
Test std_diff : 0.64
------------------------------
Combination 682
colsample_bytree: 0.9, learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 10
Train RMSE: 14.06
Test RMSE : 16.49
Train R2  : 0.42
Test R2   : 0.00
Train std_diff: 0.76
Test std_diff : 1.00
------------------------------
Combination 683
colsample_bytree: 0.9, learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 10.80
Test RMSE : 15.01
Train R2  : 0.66
Test R2   : 0.17
Train std_diff: 0.58
Test std_diff : 0.91
------------------------------
Combination 684
colsample_bytree: 0.9, learning_rate: 0.1, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 10.68
Test RMSE : 14.95
Train R2  : 0.67
Test R2   : 0.18
Train std_diff: 0.58
Test std_diff : 0.91
------------------------------
Combination 685


Combination 719
colsample_bytree: 0.9, learning_rate: 1, max_depth: 10, alpha: 100, n_estimators: 50
Train RMSE: 10.69
Test RMSE : 17.20
Train R2  : 0.67
Test R2   : -0.09
Train std_diff: 0.58
Test std_diff : 1.04
------------------------------
Combination 720
colsample_bytree: 0.9, learning_rate: 1, max_depth: 10, alpha: 100, n_estimators: 100
Train RMSE: 10.69
Test RMSE : 17.20
Train R2  : 0.67
Test R2   : -0.09
Train std_diff: 0.58
Test std_diff : 1.04
------------------------------


In [12]:
answers_grid_df = pd.DataFrame(answers_grid)
answers_grid_df .to_excel ('hyperparameter.xlsx', index=False)

In [13]:
# Getting best results with these-
# colsample_bytree  0.7
# learning_rate     1.0
# max_depth         8
# alpha             10.0
# n_estimators      10


# Retrain on the new parameters
model = xgb.XGBRegressor(objective='reg:squarederror',
                         colsample_bytree = 0.7,
                         learning_rate    = 1.0,
                         max_depth        = 8,
                         alpha            = 10.0,
                         n_estimators     = 10)
   

model.fit(x_train, y_train)

In [16]:
# Predicting on the testing set
y_pred = model.predict(x_test)

array([ 40.96468   ,  40.96468   ,  11.464754  ,   2.0101986 ,
         8.041091  ,  12.205851  ,  11.434292  ,  37.71998   ,
         0.81415385,   1.1230373 , -20.061218  ,   6.207073  ,
        10.324867  ,  11.144197  ,   0.776927  ,  -1.1843302 ,
        32.244133  ,   6.207073  ], dtype=float32)

In [17]:
# Calculate loss metrics
print()
r2 = r2_score(y_test, y_pred)
print('R-squared:', r2)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print('RMSE:', rmse)
stddev = y_test.std()
print('Stddev difference', rmse/stddev)



R-squared: 0.5836806714050802
RMSE: 10.649716662222037
Stddev difference 0.6270490228094715


In [18]:
# save the model
import pickle
filename = 'eps_v1.sav'
pickle.dump(model, open(filename,'wb'))


load_model = pickle.load(open(filename,'rb'))


arg = x_train[:2]
load_model.predict(arg)


array([ 30.832546, -21.062023], dtype=float32)

In [None]:
# ROCE (%)
# 1.91


# CASA (%)
# 39.47


# Return on Equity / Networth (%)
# 14.36


# Non-Interest Income/Total Assets (%)
# 0.68


# Operating Profit/Total Assets (%)
# 0.27


# Operating Expenses/Total Assets (%)
# 1.68

# Interest Expenses/Total Assets (%)
# 3.3


# Face_value
# 2



# Basic EPS (Rs.)
# 27.28

