In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import LinearRegression  # Linear Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Linear Regression model
model = LinearRegression()

# Define the hyperparameter grid
param_grid = {
    'fit_intercept': [True, False],
    #'normalize': [True, False]  # Note: 'normalize' is deprecated in recent versions of sklearn
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation manually to compute metrics
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [2.1335538936632668e+21, 1.6289543078705874e+21, 1.7440403803866797e+21, 1.707800197156848e+20, 1.6508843770056448e+22, 3.012121355622338e+22, 2.9767889186469194e+21, 3.017925700901727e+21, 5703.806514665222, 1.1955093285719146e+22]
Average MSE: 7.025719383318385e+21
Standard Deviation of MSE: 9.259545833802157e+21

RMSE scores for each fold: [46190409109.07011, 40360306092.37977, 41761709500.29081, 13068282967.386526, 128486745503.40376, 173554641413.6579, 54559957099.02015, 54935650545.9044, 75.52354940457461, 109339349210.24153]
Average RMSE: 66225705151.68785
Standard Deviation of RMSE: 51379717403.661224

R2 scores for each fold: [-7.369171458506222e+20, -4.645961230194282e+20, -4.627446684371251e+20, -4.749108995783087e+19, -4.656248427854876e+21, -8.026485399070811e+21, -7.910068362557686e+20, -8.172746589798738e+20, -1548.394511137135, -3.7054329853779546e+21]
Average R2: -1.9708197334804285e+21
Standard Deviation of R2: 2.5152129298758364e+21

MAPE sc

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import LinearRegression  # Linear Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features (optional, depends on your model)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Linear Regression model
model = LinearRegression()

# Define the hyperparameter grid
param_grid = {
    'fit_intercept': [True, False],
    #'normalize': [True, False]  # Note: 'normalize' is deprecated in recent versions of sklearn
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation manually to compute metrics
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.0058476636026123e+22, 1.369224058808431e+22, 4.351227002550909e+20, 4.3183031926516613e+21, 1.314302500463134e+20, 6.588523095780071e+21, 5.887513092867056e+22, 3.0969007562500053e+20, 5845.0405370390845, 2.6323759687522383e+21]
Average MSE: 9.704129343589137e+21
Standard Deviation of MSE: 1.6977981267401884e+22

RMSE scores for each fold: [100291956985.72305, 117013847847.52747, 20859594920.685562, 65713797582.027336, 11464303295.286346, 81169717849.5778, 242641980969.22665, 17598013399.955135, 76.4528648059645, 51306685419.66279]
Average RMSE: 70805989834.6125
Standard Deviation of RMSE: 68488255541.588196

R2 scores for each fold: [-3.474139517281581e+21, -3.9051812944888353e+21, -1.1545071543261294e+20, -1.2008484700307904e+21, -3.7069337118612005e+19, -1.7556624779081453e+21, -1.5644586271572199e+22, -8.386616372637976e+19, -1586.7596307268773, -8.158943231486499e+20]
Average R2: -2.70326985707078e+21
Standard Deviation of R2: 4.5189411879695454e+21

M

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import LinearRegression  # Linear Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features (optional, depends on your model)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Linear Regression model
model = LinearRegression()

# Define the hyperparameter grid
param_grid = {
    'fit_intercept': [True, False],
    #'normalize': [True, False]  # Note: 'normalize' is deprecated in recent versions of sklearn
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation manually to compute metrics
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [2511.498195224288, 16.351753178216207, 2848.6732981153637, 3043.8381361495017, 2856.648144986097, 6532.594618697772, 3.8984832273748585e+21, 3943.3516041935195, 3668.1513297303377, 2528.0685137553687]
Average MSE: 3.8984832273748584e+20
Standard Deviation of MSE: 1.1695449682124575e+21

RMSE scores for each fold: [50.11485004691013, 4.043730106994803, 53.37296411213606, 55.170989987034865, 53.447620573661624, 80.82446794565227, 62437834903.00459, 62.79611137796288, 60.56526504301238, 50.27990168800421]
Average RMSE: 6243783537.362049
Standard Deviation of RMSE: 18731350455.214184

R2 scores for each fold: [-866.4569165234186, -3.6637042515334963, -754.8359288274156, -845.4408833164924, -804.705330990638, -1739.7590576373977, -1.0359239328543501e+21, -1066.8862427236213, -995.4246721363671, -782.5646478270927]
Average R2: -1.0359239328543502e+20
Standard Deviation of R2: 3.1077717985630506e+20

MAPE scores for each fold: [2.2211852299116692e+18, 3.874400106591

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Ridge  # Ridge Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Ridge Regression model
model = Ridge()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
    'solver': ['auto', 'svd', 'cholesky', 'sag', 'saga', 'lsqr'],  # Solvers available
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.5948030053379014, 1.0728937824041376, 1.8905440117104833, 1.273474515382964, 1.4507405695442506, 1.1613524131198625, 1.9407540452259318, 1.6941744384820645, 1.380080691506213, 1.3947029525095713]
Average MSE: 1.4853520425223379
Standard Deviation of MSE: 0.2770640276005913

RMSE scores for each fold: [1.262855100689664, 1.0358058613486107, 1.3749705493975073, 1.1284832809496843, 1.204466923391527, 1.077660620566541, 1.393109487881671, 1.3016045630229116, 1.174768356530858, 1.1809754241767993]
Average RMSE: 1.2134700167955772
Standard Deviation of RMSE: 0.11332502309939745

R2 scores for each fold: [0.44916428763383953, 0.6939986043143129, 0.4983836545855693, 0.6458678663362226, 0.5908248578608599, 0.6905311212085514, 0.4842933915641753, 0.5412061217911555, 0.6251118541892593, 0.5677174404647807]
Average R2: 0.5787099199948725
Standard Deviation of R2: 0.08141363083138921

MAPE scores for each fold: [1.0300620970402534e+17, 8.379299356504226e+16, 2.023746846

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Ridge  # Ridge Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Ridge Regression model
model = Ridge()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
    'solver': ['auto', 'svd', 'cholesky', 'sag', 'saga', 'lsqr'],  # Solvers available
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.7313809065367876, 1.6386384955851652, 1.53097376695836, 1.6349677086945367, 1.7524291497546658, 2.192598227988427, 2.227679931106646, 1.60100973410976, 1.8541733322747096, 1.447107428455751]
Average MSE: 1.7610958681464808
Standard Deviation of MSE: 0.24931768737177631

RMSE scores for each fold: [1.315819480983918, 1.280093158947881, 1.2373252470382878, 1.278658558292454, 1.3237934694485638, 1.4807424583594633, 1.4925414336314573, 1.2653101335679566, 1.361680334100008, 1.2029577833223206]
Average RMSE: 1.3238922057692313
Standard Deviation of RMSE: 0.09168039948626347

R2 scores for each fold: [0.40199107235360887, 0.5326418375267661, 0.5937881048258706, 0.5453426070507144, 0.5057348905153327, 0.41573211749493, 0.4080500490118417, 0.5664357528493641, 0.49632828947881924, 0.5514748843331294]
Average R2: 0.5017519605440377
Standard Deviation of R2: 0.06645172633482346

MAPE scores for each fold: [1.0836194158690565e+17, 1.0186165405845506e+17, 1.629968820162

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Ridge  # Ridge Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Ridge Regression model
model = Ridge()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
    'solver': ['auto', 'svd', 'cholesky', 'sag', 'saga', 'lsqr'],  # Solvers available
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.5136529810814119, 1.519468880564102, 1.574817120958732, 1.65673513495066, 1.1232572434461288, 1.190392527016378, 2.275069419942024, 1.9343041855767225, 1.4458455132049817, 2.2244196781488093]
Average MSE: 1.6457962684889949
Standard Deviation of MSE: 0.37054346952514605

RMSE scores for each fold: [1.2303060517941915, 1.232667384400229, 1.254917176931901, 1.2871422357108246, 1.0598383100483435, 1.0910511110925913, 1.508333325210984, 1.3907926465065605, 1.2024331637163796, 1.4914488520055957]
Average RMSE: 1.27489302574176
Standard Deviation of RMSE: 0.1429826612006327

R2 scores for each fold: [0.4771930355545795, 0.566630354486999, 0.5821551870687856, 0.5392894469667595, 0.6831901224143236, 0.6827927195089435, 0.39545748344542286, 0.476177364751487, 0.6072473537886939, 0.3105500850769317]
Average R2: 0.5320683153062926
Standard Deviation of R2: 0.11350367784267335

MAPE scores for each fold: [9.080190737747763e+16, 9.756075923660677e+16, 1.5715506101840765

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Lasso  # Lasso Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Lasso Regression model
model = Lasso()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.7121669357817164, 1.5005683240043262, 2.034866542827537, 1.7650724116120835, 1.6825353226954642, 1.7633196766629846, 1.9285196123378219, 1.7365274118065883, 1.809733427813069, 1.731735597332945]
Average MSE: 1.766504526287454
Standard Deviation of MSE: 0.13527045596430534

RMSE scores for each fold: [1.3084979693456602, 1.2249768667221133, 1.4264874842870292, 1.3285602777488432, 1.2971257929343107, 1.3279004769420728, 1.388711493557183, 1.3177736572744911, 1.3452633302863306, 1.3159542535107156]
Average RMSE: 1.328125160260875
Standard Deviation of RMSE: 0.050873224484779093

R2 scores for each fold: [0.40862746646173875, 0.5720210061818647, 0.46009068696806465, 0.5091626478231619, 0.5254481440232162, 0.530123193336429, 0.4875443846028775, 0.5297366506175473, 0.5084000425916377, 0.46325553042937284]
Average R2: 0.499440975303591
Standard Deviation of R2: 0.04384596883157975

MAPE scores for each fold: [1.2429256371834795e+17, 1.127759327065085e+17, 2.041439

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Lasso  # Lasso Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Lasso Regression model
model = Lasso()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.8996775495798606, 1.6910776502940545, 2.1081447842944976, 1.6868595559459127, 1.5577950550031772, 1.848584030838457, 2.696501322834874, 1.7533283669104727, 1.957540752811732, 1.6467537662339193]
Average MSE: 1.8846262834746959
Standard Deviation of MSE: 0.31195473795893797

RMSE scores for each fold: [1.3782879051852195, 1.300414414828617, 1.4519451726199917, 1.2987915752521313, 1.2481166031277595, 1.3596264306192554, 1.6421027138504076, 1.3241330623885474, 1.399121421754285, 1.2832590409710423]
Average RMSE: 1.3685798340597257
Standard Deviation of RMSE: 0.107775327787715

R2 scores for each fold: [0.343862387525966, 0.5176855997400759, 0.440647836944279, 0.5309123453023209, 0.5606306003734238, 0.5074025585063215, 0.28347254755818296, 0.5251868385234455, 0.468249336660437, 0.48959530650525684]
Average R2: 0.4667645357639709
Standard Deviation of R2: 0.08395818621668144

MAPE scores for each fold: [1.3168107357602058e+17, 1.228482466981669e+17, 2.2763076702

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Lasso  # Lasso Regression model
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Lasso Regression model
model = Lasso()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.6933376671047107, 1.6003626320037734, 1.8271598126988076, 1.7527815823318385, 1.6104937882448804, 1.5220462745612406, 2.5173218340718186, 1.715804967002175, 1.5835635532741985, 2.0816597936186056]
Average MSE: 1.7904531904912049
Standard Deviation of MSE: 0.28535504559116587

RMSE scores for each fold: [1.301283084922228, 1.2650543988318341, 1.3517247547850886, 1.323926577394622, 1.269052318954928, 1.2337123953990414, 1.5866070194196855, 1.3098873871452366, 1.2583972160149586, 1.4427958253400257]
Average RMSE: 1.334244097820765
Standard Deviation of RMSE: 0.10122192411556939

R2 scores for each fold: [0.4151309867023686, 0.5435585450975153, 0.5152013272069496, 0.5125805235206599, 0.5457671491696703, 0.5944160026397138, 0.3310849931839259, 0.5353484286032333, 0.5698373233433796, 0.35479793597064746]
Average R2: 0.49177232154380635
Standard Deviation of R2: 0.08692766810536952

MAPE scores for each fold: [8.949668823381803e+16, 1.1034472835848608e+17, 1.83207

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define a pipeline that includes polynomial feature transformation and Lasso regression
pipeline = Pipeline([
    ('poly', PolynomialFeatures()),  # To generate polynomial features
    ('scaler', StandardScaler()),  # Standardize features again after polynomial transformation
    ('lasso', Lasso(max_iter=10000))  # Lasso Regression
])

# Define the hyperparameter grid
param_grid = {
    'poly__degree': [2],  # Degree of the polynomial features
    'lasso__alpha': [0.1, 1, 10, 100],  # Regularization strength for Lasso
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.2804018578244376, 1.5296424301054847, 1.5997239676778554, 1.491022539942742, 1.3045981498411625, 1.5946382551730958, 1.896714077926521, 1.4687554535890843, 1.2503750782939878, 1.2513676622171312]
Average MSE: 1.4667239472591502
Standard Deviation of MSE: 0.1943403535010725

RMSE scores for each fold: [1.1315484337068553, 1.2367871401763058, 1.2648019480052421, 1.2210743384179121, 1.1421900673010437, 1.2627898697618285, 1.3772124302105762, 1.2119222143310537, 1.1182017162810955, 1.1186454586763097]
Average RMSE: 1.2085173616868223
Standard Deviation of RMSE: 0.07880186394161424

R2 scores for each fold: [0.5577566212823604, 0.5637287435262526, 0.575546675788617, 0.5853713701903802, 0.6320436991960509, 0.5750722112156693, 0.4959959059695799, 0.6022511050891055, 0.6603453714304194, 0.6121436360556403]
Average R2: 0.5860255339744075
Standard Deviation of R2: 0.04268296624010193

MAPE scores for each fold: [1.0274929203011469e+17, 1.2033232878846115e+17, 1.89475

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define a pipeline that includes polynomial feature transformation and Lasso regression
pipeline = Pipeline([
    ('poly', PolynomialFeatures()),  # To generate polynomial features
    ('scaler', StandardScaler()),  # Standardize features again after polynomial transformation
    ('lasso', Lasso(max_iter=10000))  # Lasso Regression
])

# Define the hyperparameter grid
param_grid = {
    'poly__degree': [2],  # Degree of the polynomial features
    'lasso__alpha': [0.1, 1, 10, 100],  # Regularization strength for Lasso
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.754400066030498, 1.4552423056842894, 2.094612962951398, 1.5866860729878585, 1.4567664346640636, 2.153989334477702, 2.4167941091937917, 1.6927378627704734, 1.9582466193570442, 1.5039765040169943]
Average MSE: 1.8073452272134112
Standard Deviation of MSE: 0.31629528905192567

RMSE scores for each fold: [1.3245376801097422, 1.2063342429377892, 1.4472777767074978, 1.2596372783416099, 1.2069657968078729, 1.4676475511776328, 1.5546041647936595, 1.3010525980030452, 1.3993736525163836, 1.2263671978722337]
Average RMSE: 1.3393797939267464
Standard Deviation of RMSE: 0.11578857816795605

R2 scores for each fold: [0.39404038811527464, 0.5849484973224386, 0.4442382229532169, 0.5587689288679526, 0.5891252885038898, 0.42602033909860737, 0.3577977093975777, 0.5415951561946329, 0.4680575935239736, 0.5338485435429408]
Average R2: 0.48984406675205056
Standard Deviation of R2: 0.07842793922277437

MAPE scores for each fold: [1.277373967233513e+17, 1.1094258727044672e+17, 2.18

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define a pipeline that includes polynomial feature transformation and Lasso regression
pipeline = Pipeline([
    ('poly', PolynomialFeatures()),  # To generate polynomial features
    ('scaler', StandardScaler()),  # Standardize features again after polynomial transformation
    ('lasso', Lasso(max_iter=10000))  # Lasso Regression
])

# Define the hyperparameter grid
param_grid = {
    'poly__degree': [2],  # Degree of the polynomial features
    'lasso__alpha': [0.1, 1, 10, 100],  # Regularization strength for Lasso
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.3650368944587932, 1.3589944035469403, 1.9087827929990908, 1.6153230926641675, 1.0319274547949806, 0.8828111348165062, 2.0189694761117742, 1.4781682967052763, 1.2700458170431435, 1.7694628063515476]
Average MSE: 1.469952216949222
Standard Deviation of MSE: 0.3470995226921796

RMSE scores for each fold: [1.1683479338188574, 1.1657591533189609, 1.3815870558886583, 1.270953615465241, 1.0158383015002834, 0.9395802971627844, 1.4209044570666158, 1.2157994475674334, 1.126963094800865, 1.330211564508273]
Average RMSE: 1.2035944921097974
Standard Deviation of RMSE: 0.1459880663348248

R2 scores for each fold: [0.5285241702902432, 0.6123994834954148, 0.49354437511990545, 0.5508054488318459, 0.708949296754234, 0.7647548074211704, 0.4635096066797858, 0.5997020436110452, 0.6550019688150621, 0.4515621340331464]
Average R2: 0.5828753335051854
Standard Deviation of R2: 0.09939101168414814

MAPE scores for each fold: [7.386656256622578e+16, 1.0165555617003302e+17, 1.73319257

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the ElasticNet model
model = ElasticNet()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
    'l1_ratio': [0.1, 0.5, 0.7, 1.0]  # Ratio of L1 regularization (Lasso) to L2 (Ridge)
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


MSE scores for each fold: [1.5557377846084777, 1.123279707038237, 1.777343858060546, 1.5560648457519872, 1.5768257335565101, 1.6090417228198455, 1.8074762773167878, 1.7117199408575994, 1.6353470381624446, 1.5456520252758454]
Average MSE: 1.589848893344828
Standard Deviation of MSE: 0.17942177660890032

RMSE scores for each fold: [1.2472921809297441, 1.0598489076459139, 1.333170603508998, 1.247423282511589, 1.2557172187863437, 1.26848008373007, 1.3444241433851103, 1.3083271536040209, 1.2788068807143809, 1.2432425448301894]
Average RMSE: 1.2586732999646362
Standard Deviation of RMSE: 0.07476909322013656

R2 scores for each fold: [0.4626571883978897, 0.6796279708799394, 0.5284189497294569, 0.5672841841050638, 0.5552630792841704, 0.5712340782484556, 0.5197086085708826, 0.5364546812681794, 0.5557707439376596, 0.5209313836793841]
Average R2: 0.5497350868101083
Standard Deviation of R2: 0.05258695333564081

MAPE scores for each fold: [1.064946437672908e+17, 9.383562564211392e+16, 1.8532578076

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the ElasticNet model
model = ElasticNet()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
    'l1_ratio': [0.1, 0.5, 0.7, 1.0]  # Ratio of L1 regularization (Lasso) to L2 (Ridge)
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


MSE scores for each fold: [1.7965654758494847, 1.605698791732396, 1.8356610371255728, 1.658277004697856, 1.6859786967420531, 1.639871161488429, 2.26197540012468, 1.6033385700876503, 1.7849209790133849, 1.449970431615225]
Average MSE: 1.7322257548476732
Standard Deviation of MSE: 0.20656126351999732

RMSE scores for each fold: [1.340360203769675, 1.2671617070178518, 1.3548656896997475, 1.2877410472210071, 1.2984524237499244, 1.280574543511009, 1.5039865026404593, 1.2662300620691527, 1.3360093484004463, 1.2041471802131272]
Average RMSE: 1.31395287082924
Standard Deviation of RMSE: 0.07585254173244123

R2 scores for each fold: [0.37947670001266054, 0.5420366122171687, 0.5129457049616659, 0.5388606785722526, 0.5244769551735146, 0.5630188700904892, 0.3989368901056356, 0.5658050883405985, 0.5151401506018336, 0.5505875080416345]
Average R2: 0.5091285158117453
Standard Deviation of R2: 0.062482213067819224

MAPE scores for each fold: [1.073844491090978e+17, 1.0187403941563606e+17, 1.9740853882

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the ElasticNet model
model = ElasticNet()

# Define the hyperparameter grid
param_grid = {
    'alpha': [0.1, 1, 10, 100],  # Regularization strength
    'l1_ratio': [0.1, 0.5, 0.7, 1.0]  # Ratio of L1 regularization (Lasso) to L2 (Ridge)
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(


MSE scores for each fold: [1.6338947004525242, 1.5031533312922383, 1.5419652914473927, 1.7299949615035244, 1.4378819167283547, 1.3618252721265898, 2.2190265139270973, 1.8477812078321518, 1.3944831394908477, 1.9588676092247652]
Average MSE: 1.6628873944025486
Standard Deviation of MSE: 0.2626534919526497

RMSE scores for each fold: [1.2782389058593562, 1.226031537641768, 1.2417589506210103, 1.3152927284462286, 1.1991171405364676, 1.1669726955360138, 1.489639726218087, 1.3593311619440467, 1.1808823563297268, 1.399595516292034]
Average RMSE: 1.2856860719424739
Standard Deviation of RMSE: 0.09949230530890535

R2 scores for each fold: [0.4356622427706236, 0.5712837329765095, 0.590871733500702, 0.5189171047050046, 0.5944515856191627, 0.6371105486036664, 0.4103494771315579, 0.4996083713892544, 0.6211995416315041, 0.392856878676102]
Average R2: 0.5272311217004086
Standard Deviation of R2: 0.08505742650011869

MAPE scores for each fold: [8.918360795568531e+16, 1.0354317176378603e+17, 1.51649506

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.svm import SVR  # Support Vector Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the SVR model
model = SVR()

# Define the hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'epsilon': [0.01, 0.1, 0.2, 0.5],  # Epsilon-tube within which no penalty is associated
    'kernel': ['linear', 'rbf', 'poly'],  # Different kernel types
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.2057107650462682, 0.9955305937983354, 1.5309943799033179, 1.106768544749199, 1.0304171237538178, 1.109477713619973, 1.7455890827562617, 1.3445193265227555, 1.2383414278285338, 1.24593766332796]
Average MSE: 1.255328662130642
Standard Deviation of MSE: 0.22103469848934135

RMSE scores for each fold: [1.0980486168864603, 0.9977627943546179, 1.2373335766491258, 1.0520306767148946, 1.0150946378312802, 1.0533174799745673, 1.3212074336591744, 1.159534098904709, 1.112807902482964, 1.1162157781217572]
Average RMSE: 1.116335299557955
Standard Deviation of RMSE: 0.09552047472397227

R2 scores for each fold: [0.5835544917153628, 0.7160634574025874, 0.59378263561169, 0.6922260308396346, 0.7093752791327643, 0.7043543198436218, 0.536153575047148, 0.6358950872291484, 0.663614218634857, 0.6138266423286683]
Average R2: 0.6448845737785482
Standard Deviation of R2: 0.05881732016250384

MAPE scores for each fold: [8.076177921747566e+16, 8.163117981279902e+16, 1.765887945837163

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.svm import SVR  # Support Vector Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the SVR model
model = SVR()

# Define the hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'epsilon': [0.01, 0.1, 0.2, 0.5],  # Epsilon-tube within which no penalty is associated
    'kernel': ['linear', 'rbf', 'poly'],  # Different kernel types
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.6380689573009672, 1.2277707980582193, 1.6464301574120708, 1.291348176432629, 1.2111286242741406, 1.5190642205500766, 1.9935143648053721, 1.7090048058964435, 1.6720111286661228, 1.3905007461965906]
Average MSE: 1.5298841979592632
Standard Deviation of MSE: 0.23703290178908779

RMSE scores for each fold: [1.2798706799130009, 1.1080481930215036, 1.2831329461174594, 1.136375015755199, 1.1005128914620403, 1.2325032334846333, 1.4119186820795921, 1.3072891057055602, 1.2930626932465892, 1.1791949568229125]
Average RMSE: 1.2331908397608493
Standard Deviation of RMSE: 0.09552251404352347

R2 scores for each fold: [0.4342204786549205, 0.6498259343566346, 0.5631541643963468, 0.6408974977521309, 0.6584063771361728, 0.5952106390488734, 0.4702736627184544, 0.5371899581501781, 0.5458112299821676, 0.5690198973767717]
Average R2: 0.566400983957265
Standard Deviation of R2: 0.0705770585459459

MAPE scores for each fold: [1.0801820360048814e+17, 9.41714505132991e+16, 1.8435873

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.svm import SVR  # Support Vector Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the SVR model
model = SVR()

# Define the hyperparameter grid
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'epsilon': [0.01, 0.1, 0.2, 0.5],  # Epsilon-tube within which no penalty is associated
    'kernel': ['linear', 'rbf', 'poly'],  # Different kernel types
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.184201680976907, 1.3736072759875613, 1.6406238270126126, 1.5168111501696602, 0.9728631027352509, 1.0455650253388213, 2.0607849816757122, 1.656002285599053, 1.2074519390199432, 1.82233684982314]
Average MSE: 1.4480248118338663
Standard Deviation of MSE: 0.33487811944067236

RMSE scores for each fold: [1.0882103110046821, 1.1720099299867563, 1.2808683878574771, 1.2315888722173727, 0.9863382293793802, 1.022528740593056, 1.4355434447190067, 1.286857523426371, 1.0988411800710525, 1.3499395726561763]
Average RMSE: 1.1952726191911331
Standard Deviation of RMSE: 0.13909772696142308

R2 scores for each fold: [0.5909836046566064, 0.6082317276232658, 0.5646947528286732, 0.57819998556229, 0.7256081433852264, 0.7213853155684948, 0.45239818707076596, 0.5515433985573361, 0.6720051071211353, 0.43517403733933113]
Average R2: 0.5900224259713125
Standard Deviation of R2: 0.0937409778801375

MAPE scores for each fold: [8.010627527119288e+16, 9.843305627796958e+16, 1.7834448312

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.tree import DecisionTreeRegressor  # Decision Tree Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Decision Tree Regressor model
model = DecisionTreeRegressor(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'max_depth': [None, 5, 10, 15, 20],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],     # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],        # Minimum number of samples required to be at a leaf node
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [2.1362364161544183, 2.5037154614470993, 2.4153002068383893, 2.2961030243438416, 2.6001395889139567, 2.063252342715572, 3.903098733461449, 2.782753707448132, 3.175477857329307, 2.5548583662464663]
Average MSE: 2.6430935704898633
Standard Deviation of MSE: 0.5181717887430212

RMSE scores for each fold: [1.4615869512808393, 1.5823133259399351, 1.5541236137574093, 1.5152897493033606, 1.6124948337634935, 1.436402569865277, 1.9756261623752225, 1.6681587776492177, 1.781987053075669, 1.5983924318659877]
Average RMSE: 1.618637546887641
Standard Deviation of RMSE: 0.15200678371514334

R2 scores for each fold: [0.2621563264325337, 0.28591213951684824, 0.3591505644257237, 0.3614918451051915, 0.266641804737217, 0.4501992832229841, -0.03715038759032541, 0.24641150489552455, 0.13740623042990507, 0.20813194535528723]
Average R2: 0.25403512565308894
Standard Deviation of R2: 0.1276221488966932

MAPE scores for each fold: [9.127647678957733e+16, 5.337227236967271e+16, 1.764716

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.tree import DecisionTreeRegressor  # Decision Tree Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Decision Tree Regressor model
model = DecisionTreeRegressor(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'max_depth': [None, 5, 10, 15, 20],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],     # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],        # Minimum number of samples required to be at a leaf node
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [2.2341686973653885, 3.0123468899963215, 3.278133311484316, 2.2285779811505555, 1.850069535220024, 2.2562269721725414, 3.511865187502761, 2.8671018072934213, 2.8378880802897855, 2.473281256389334]
Average MSE: 2.654965971886445
Standard Deviation of MSE: 0.5027459094100611

RMSE scores for each fold: [1.4947135837227774, 1.7356113879542048, 1.8105616011294163, 1.4928422492515931, 1.360172612288611, 1.5020742232568074, 1.8739971151265844, 1.6932518440248105, 1.6846032412083818, 1.5726669248093614]
Average RMSE: 1.6220494782772548
Standard Deviation of RMSE: 0.1546656455290902

R2 scores for each fold: [0.22833108425282123, 0.14084472503626022, 0.13021583136786286, 0.3802694392642596, 0.47819583946786703, 0.39877678516073567, 0.06681009394982174, 0.22356946987920134, 0.22910985786119842, 0.23341644180313226]
Average R2: 0.25095395680431604
Standard Deviation of R2: 0.12364064440253292

MAPE scores for each fold: [1.0221327189547555e+17, 1.894282526973301e+17, 2.

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.tree import DecisionTreeRegressor  # Decision Tree Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the Decision Tree Regressor model
model = DecisionTreeRegressor(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'max_depth': [None, 5, 10, 15, 20],  # Maximum depth of the tree
    'min_samples_split': [2, 5, 10],     # Minimum number of samples required to split an internal node
    'min_samples_leaf': [1, 2, 4],        # Minimum number of samples required to be at a leaf node
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [2.1936083649997133, 1.9597340098221017, 3.445679828161218, 2.821836668737222, 2.5262266470197887, 2.1453084950285373, 2.9597656138584205, 2.995657159095835, 2.0655641173075017, 2.73333595725637]
Average MSE: 2.5846716861286705
Standard Deviation of MSE: 0.46246015541224506

RMSE scores for each fold: [1.4810835104745828, 1.3999050002846984, 1.8562542466378946, 1.6798323335193968, 1.5894107861153417, 1.464687166267438, 1.7203969349712351, 1.7307966833501371, 1.437207054431442, 1.653280362569026]
Average RMSE: 1.6012854078621193
Standard Deviation of RMSE: 0.14337617897167285

R2 scores for each fold: [0.2423403879083038, 0.44106177888876674, 0.085760864510812, 0.21529404139659292, 0.2874886323095438, 0.428333547062833, 0.21351667912653438, 0.18875581251415652, 0.43890563301376173, 0.15281353527906583]
Average R2: 0.2694270912010371
Standard Deviation of R2: 0.12016190467261705

MAPE scores for each fold: [6.65243384844576e+16, 1.224770480922582e+17, 1.92354843

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor  # KNN Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the KNN Regressor model
model = KNeighborsRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  # Number of neighbors to use
    'weights': ['uniform', 'distance'],  # Weight function used in prediction
    'p': [1, 2]  # Distance metric: 1 for Manhattan, 2 for Euclidean
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


  _data = np.array(data, dtype=dtype, copy=copy,


MSE scores for each fold: [1.2893750000000002, 1.5851041666666668, 2.2185638412983053, 1.4810104573773277, 1.0679386337868477, 1.5227969440949993, 2.548200622438624, 1.8012191537514657, 1.8619581948611816, 1.5006206650051197]
Average MSE: 1.6876787679280536
Standard Deviation of MSE: 0.41517892234592396

RMSE scores for each fold: [1.1355064949175764, 1.2590092003899997, 1.4894844213009766, 1.2169677306228492, 1.0334111639550096, 1.234016589878353, 1.5963084358727873, 1.3420950613691511, 1.36453588991319, 1.2249982306130567]
Average RMSE: 1.289633321883295
Standard Deviation of RMSE: 0.15660352172384714

R2 scores for each fold: [0.5546573499956275, 0.5479104313380281, 0.4113504476768136, 0.588155564234689, 0.6987924985010328, 0.5942159695951954, 0.3228797312856315, 0.5122176900543312, 0.49421359233219897, 0.5348886723207791]
Average R2: 0.5259281947334327
Standard Deviation of R2: 0.09765087764459389

MAPE scores for each fold: [8.100224329784435e+16, 1.1947049011496734e+17, 1.8984831

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor  # KNN Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Codebert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the KNN Regressor model
model = KNeighborsRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  # Number of neighbors to use
    'weights': ['uniform', 'distance'],  # Weight function used in prediction
    'p': [1, 2]  # Distance metric: 1 for Manhattan, 2 for Euclidean
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


  _data = np.array(data, dtype=dtype, copy=copy,


MSE scores for each fold: [1.6279783950617284, 1.6338541666666668, 2.3451865418754734, 1.6262191358024694, 1.5485575274348422, 1.6193705442531734, 2.4112871674491396, 2.0201390459487873, 1.7798748043818464, 1.459115805946792]
Average MSE: 1.8071583134820919
Standard Deviation of MSE: 0.31917114302050387

RMSE scores for each fold: [1.2759225662483318, 1.2782230504362948, 1.5314001899815324, 1.2752329731474439, 1.244410514032585, 1.2725449085408238, 1.5528319830068995, 1.4213159557075223, 1.3341194865460313, 1.2079386598444442]
Average RMSE: 1.339394028749191
Standard Deviation of RMSE: 0.11481267017669933

R2 scores for each fold: [0.4377057003535394, 0.5340063820422534, 0.37775376020665086, 0.5477754400185961, 0.563236005319792, 0.5684817278403463, 0.35926104075426046, 0.45293270494485083, 0.5165109045459308, 0.5477529361230451]
Average R2: 0.4905416602149265
Standard Deviation of R2: 0.07391435915518316

MAPE scores for each fold: [1.1665574034786077e+17, 9.789074190048374e+16, 1.563

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor  # KNN Regressor
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/T5.xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the KNN Regressor model
model = KNeighborsRegressor()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11],  # Number of neighbors to use
    'weights': ['uniform', 'distance'],  # Weight function used in prediction
    'p': [1, 2]  # Distance metric: 1 for Manhattan, 2 for Euclidean
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


MSE scores for each fold: [1.4857697568135626, 1.570191601564939, 1.6877893518518519, 1.5326826985302, 1.4553748582766441, 1.1414914292113656, 2.475855749689105, 1.8471738335955206, 1.3688140016698482, 2.3186578122944774]
Average MSE: 1.6883801093497515
Standard Deviation of MSE: 0.3971581545862304

RMSE scores for each fold: [1.2189215548235919, 1.2530728636296211, 1.2991494724826131, 1.2380156293561888, 1.206389181929548, 1.0684060226390366, 1.5734852238547095, 1.3591077343593922, 1.169963247999632, 1.522713962730518]
Average RMSE: 1.2909224893804851
Standard Deviation of RMSE: 0.14798390372416437

R2 scores for each fold: [0.48682373956707414, 0.5521636629339433, 0.5521803664653495, 0.5737863712986583, 0.5895177766427573, 0.6958235340476018, 0.34210356289641797, 0.49977285243395586, 0.628172362526326, 0.2813413551742586]
Average R2: 0.5201685583986342
Standard Deviation of R2: 0.11938406764110504

MAPE scores for each fold: [7.100787987416966e+16, 1.0695691540822226e+17, 1.548112371

In [None]:
import pandas as pd
from sklearn.model_selection import KFold, GridSearchCV
from sklearn.linear_model import SGDRegressor  # Linear Regression with Gradient Descent
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.preprocessing import StandardScaler
import numpy as np

# Load your dataset
df = pd.read_excel('/content/Mathbert (1).xlsx')

# Assume your dataset has all the embeddings as features and 'putting marks' as the target
X = df.drop(columns=['putting marks'])  # Features (all columns except 'putting marks')
y = df['putting marks']  # Target variable

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Define the SGDRegressor model (Linear Regression using Gradient Descent)
model = SGDRegressor(random_state=42)

# Define the hyperparameter grid for tuning the gradient descent parameters
param_grid = {
    'alpha': [0.0001, 0.001, 0.01, 0.1],  # Regularization strength
    'max_iter': [1000, 2000, 3000],       # Number of iterations
    'penalty': ['l2', 'l1', 'elasticnet'],  # Type of regularization
    'learning_rate': ['constant', 'optimal', 'invscaling', 'adaptive'],
    'eta0': [0.001, 0.01, 0.1],           # Initial learning rate for 'constant', 'invscaling', or 'adaptive'
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)

# Set up k-fold cross-validation for final evaluation
k = 10  # Number of folds
kf = KFold(n_splits=k, shuffle=True, random_state=42)

# Initialize lists to store metrics for each fold
mse_scores = []
rmse_scores = []
r2_scores = []
mape_scores = []

# Perform k-fold cross-validation
for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Fit the GridSearchCV on the training set
    grid_search.fit(X_train, y_train)

    # Get the best model from Grid Search
    best_model = grid_search.best_estimator_

    # Predict on the test set
    y_pred = best_model.predict(X_test)

    # Calculate metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    r2 = r2_score(y_test, y_pred)
    mape = mean_absolute_percentage_error(y_test, y_pred) * 100  # Convert to percentage

    # Store the metrics
    mse_scores.append(mse)
    rmse_scores.append(rmse)
    r2_scores.append(r2)
    mape_scores.append(mape)

# Print the metrics for each fold and their averages
print(f"MSE scores for each fold: {mse_scores}")
print(f"Average MSE: {np.mean(mse_scores)}")
print(f"Standard Deviation of MSE: {np.std(mse_scores)}\n")

print(f"RMSE scores for each fold: {rmse_scores}")
print(f"Average RMSE: {np.mean(rmse_scores)}")
print(f"Standard Deviation of RMSE: {np.std(rmse_scores)}\n")

print(f"R2 scores for each fold: {r2_scores}")
print(f"Average R2: {np.mean(r2_scores)}")
print(f"Standard Deviation of R2: {np.std(r2_scores)}\n")

print(f"MAPE scores for each fold: {mape_scores}")
print(f"Average MAPE: {np.mean(mape_scores)}")
print(f"Standard Deviation of MAPE: {np.std(mape_scores)}\n")

# Print the best parameters found by Grid Search
print(f"Best Parameters: {grid_search.best_params_}")


  _data = np.array(data, dtype=dtype, copy=copy,
  _data = np.array(data, dtype=dtype, copy=copy,
