In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_excel('/content/drive/MyDrive/ML Files/T5.xlsx')

# Features and target
X = df.drop(columns=['putting marks'])
y = df['putting marks']

# Split the data
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train_full_scaled = scaler.fit_transform(X_train_full)
X_test_scaled = scaler.transform(X_test)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Initialize XGBoost Regressor
model = XGBRegressor(random_state=42)

# Set up K-Fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the model with GridSearchCV
grid_search.fit(X_train_full_scaled, y_train_full)

# Get the best estimator
best_model = grid_search.best_estimator_
print(f"Best Parameters for XGBoost: {grid_search.best_params_}\n")

# Predict on both training and test sets with the best model
y_train_pred = best_model.predict(X_train_full_scaled)
y_test_pred = best_model.predict(X_test_scaled)

# Calculate metrics for the training set
mse_train = mean_squared_error(y_train_full, y_train_pred)
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(y_train_full, y_train_pred)
mape_train = mean_absolute_percentage_error(y_train_full, y_train_pred) * 100

# Print training set metrics
print("Training Set Metrics for XGBoost:")
print(f"MSE: {mse_train}")
print(f"RMSE: {rmse_train}")
print(f"R2: {r2_train}")
print(f"MAPE: {mape_train}\n")

# Calculate metrics for the test set
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test, y_test_pred)
mape_test = mean_absolute_percentage_error(y_test, y_test_pred) * 100

# Print test set metrics
print("Test Set Metrics for XGBoost:")
print(f"MSE: {mse_test}")
print(f"RMSE: {rmse_test}")
print(f"R2: {r2_test}")
print(f"MAPE: {mape_test}\n")


Best Parameters for XGBoost: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}

Training Set Metrics for XGBoost:
MSE: 0.23099122560463164
RMSE: 0.4806154654238996
R2: 0.9369794076100573
MAPE: 4.035256257690388e+16

Test Set Metrics for XGBoost:
MSE: 1.6174613041649408
RMSE: 1.2717945212041688
R2: 0.5079876874613917
MAPE: 9.298420452181002e+16



In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_excel('/content/drive/MyDrive/ML Files/Codebert (1).xlsx')

# Features and target
X = df.drop(columns=['putting marks'])
y = df['putting marks']

# Split the data
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train_full_scaled = scaler.fit_transform(X_train_full)
X_test_scaled = scaler.transform(X_test)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Initialize XGBoost Regressor
model = XGBRegressor(random_state=42)

# Set up K-Fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the model with GridSearchCV
grid_search.fit(X_train_full_scaled, y_train_full)

# Get the best estimator
best_model = grid_search.best_estimator_
print(f"Best Parameters for XGBoost: {grid_search.best_params_}\n")

# Predict on both training and test sets with the best model
y_train_pred = best_model.predict(X_train_full_scaled)
y_test_pred = best_model.predict(X_test_scaled)

# Calculate metrics for the training set
mse_train = mean_squared_error(y_train_full, y_train_pred)
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(y_train_full, y_train_pred)
mape_train = mean_absolute_percentage_error(y_train_full, y_train_pred) * 100

# Print training set metrics
print("Training Set Metrics for XGBoost:")
print(f"MSE: {mse_train}")
print(f"RMSE: {rmse_train}")
print(f"R2: {r2_train}")
print(f"MAPE: {mape_train}\n")

# Calculate metrics for the test set
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test, y_test_pred)
mape_test = mean_absolute_percentage_error(y_test, y_test_pred) * 100

# Print test set metrics
print("Test Set Metrics for XGBoost:")
print(f"MSE: {mse_test}")
print(f"RMSE: {rmse_test}")
print(f"R2: {r2_test}")
print(f"MAPE: {mape_test}\n")


Best Parameters for XGBoost: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 100}

Training Set Metrics for XGBoost:
MSE: 0.20578438022420217
RMSE: 0.4536346329638007
R2: 0.9438565100800678
MAPE: 3.369466534288435e+16

Test Set Metrics for XGBoost:
MSE: 1.6836982787154147
RMSE: 1.2975739973949134
R2: 0.48783919491926875
MAPE: 1.1198735097372502e+17



In [3]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV, KFold, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import pandas as pd
import numpy as np

# Load dataset
df = pd.read_excel('/content/drive/MyDrive/ML Files/Mathbert (1).xlsx')

# Features and target
X = df.drop(columns=['putting marks'])
y = df['putting marks']

# Split the data
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the data
scaler = StandardScaler()
X_train_full_scaled = scaler.fit_transform(X_train_full)
X_test_scaled = scaler.transform(X_test)

# Define parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7]
}

# Initialize XGBoost Regressor
model = XGBRegressor(random_state=42)

# Set up K-Fold cross-validation
kf = KFold(n_splits=10, shuffle=True, random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=kf, scoring='neg_mean_squared_error', n_jobs=-1)

# Fit the model with GridSearchCV
grid_search.fit(X_train_full_scaled, y_train_full)

# Get the best estimator
best_model = grid_search.best_estimator_
print(f"Best Parameters for XGBoost: {grid_search.best_params_}\n")

# Predict on both training and test sets with the best model
y_train_pred = best_model.predict(X_train_full_scaled)
y_test_pred = best_model.predict(X_test_scaled)

# Calculate metrics for the training set
mse_train = mean_squared_error(y_train_full, y_train_pred)
rmse_train = np.sqrt(mse_train)
r2_train = r2_score(y_train_full, y_train_pred)
mape_train = mean_absolute_percentage_error(y_train_full, y_train_pred) * 100

# Print training set metrics
print("Training Set Metrics for XGBoost:")
print(f"MSE: {mse_train}")
print(f"RMSE: {rmse_train}")
print(f"R2: {r2_train}")
print(f"MAPE: {mape_train}\n")

# Calculate metrics for the test set
mse_test = mean_squared_error(y_test, y_test_pred)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(y_test, y_test_pred)
mape_test = mean_absolute_percentage_error(y_test, y_test_pred) * 100

# Print test set metrics
print("Test Set Metrics for XGBoost:")
print(f"MSE: {mse_test}")
print(f"RMSE: {rmse_test}")
print(f"R2: {r2_test}")
print(f"MAPE: {mape_test}\n")


Best Parameters for XGBoost: {'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 50}

Training Set Metrics for XGBoost:
MSE: 0.134064343108958
RMSE: 0.3661479797963632
R2: 0.9634236568987421
MAPE: 2.185096309192575e+16

Test Set Metrics for XGBoost:
MSE: 1.57253205295462
RMSE: 1.254006400683274
R2: 0.52165462634376
MAPE: 1.071031959859057e+17

