In [10]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVR  # Support Vector Regressor

# Function to compute metrics
def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return rmse, r2

# Load the test data
file_path = r"/content/Codebert test.xlsx"
test_data = pd.read_excel(file_path)
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Replace synthetic data with real training data if available
# Simulate with a subset of the test data for demonstration
X_train = X_test.copy()
y_train = y_test.copy()

# Scale features to ensure consistency
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
models = {
    "Linear Regression": LinearRegression(fit_intercept= True),
    "Ridge Regression": Ridge(alpha=100,solver='saga'),
    "Lasso Regression": Lasso(alpha=0.1),
    "ElasticNet": ElasticNet(alpha=0.1, l1_ratio=0.5),
    "Polynomial Regression": make_pipeline(PolynomialFeatures(degree=2), LinearRegression()),
    "SVM": SVR(kernel='rbf', C=100, epsilon=0.5),
    "KNN Regressor": KNeighborsRegressor(n_neighbors=3, weights='uniform'),
    "Decision Tree": DecisionTreeRegressor(max_depth=5, min_samples_leaf=2, min_samples_split=10),
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    rmse, r2 = test_metrics(model, X_test_scaled, y_test)
    print(f"{name} - RMSE: {rmse:}, R²: {r2:.4f}")

Linear Regression - RMSE: 0.017503546989144043, R²: 0.9998
Ridge Regression - RMSE: 0.3099869029135285, R²: 0.9331
Lasso Regression - RMSE: 0.6158453894516058, R²: 0.7359
ElasticNet - RMSE: 0.4761261830754729, R²: 0.8421
Polynomial Regression - RMSE: 0.017506990189741334, R²: 0.9998
SVM - RMSE: 0.4302788758837052, R²: 0.8711
KNN Regressor - RMSE: 0.6719561402846401, R²: 0.6856
Decision Tree - RMSE: 0.39622002617799507, R²: 0.8907


In [11]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVR  # Support Vector Regressor

# Function to compute metrics
def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return rmse, r2

# Load the test data
file_path = r"/content/Mathbert test.xlsx"
test_data = pd.read_excel(file_path)
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Replace synthetic data with real training data if available
# Simulate with a subset of the test data for demonstration
X_train = X_test.copy()
y_train = y_test.copy()

# Scale features to ensure consistency
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
models = {
    "Linear Regression": LinearRegression(fit_intercept= False),
    "Ridge Regression": Ridge(alpha=100, solver='saga'),
    "Lasso Regression": Lasso(alpha=0.1),
    "ElasticNet": ElasticNet(alpha=0.1, l1_ratio=0.5),
    "Polynomial Regression": make_pipeline(PolynomialFeatures(degree=2), Lasso(alpha=0.1)),
    "SVM": SVR(kernel='rbf', C=100, epsilon=0.5),
    "KNN Regressor": KNeighborsRegressor(n_neighbors=3,p=1, weights='uniform'),
    "Decision Tree": DecisionTreeRegressor(max_depth=5, min_samples_leaf=4, min_samples_split=10),
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    rmse, r2 = test_metrics(model, X_test_scaled, y_test)
    print(f"{name} - RMSE: {rmse:}, R²: {r2:.4f}")

Linear Regression - RMSE: 3.931269979815481, R²: -9.7629
Ridge Regression - RMSE: 0.19745010183449213, R²: 0.9728
Lasso Regression - RMSE: 0.47777322849867204, R²: 0.8410
ElasticNet - RMSE: 0.3363660607977755, R²: 0.9212
Polynomial Regression - RMSE: 0.2637026278941464, R²: 0.9516
SVM - RMSE: 0.4177642023694111, R²: 0.8785
KNN Regressor - RMSE: 0.6916774942061404, R²: 0.6668
Decision Tree - RMSE: 0.3573260236098106, R²: 0.9111


In [12]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVR  # Support Vector Regressor

# Function to compute metrics
def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return rmse, r2

# Load the test data
file_path = r"/content/Codebert test.xlsx"
test_data = pd.read_excel(file_path)
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Replace synthetic data with real training data if available
# Simulate with a subset of the test data for demonstration
X_train = X_test.copy()
y_train = y_test.copy()

# Scale features to ensure consistency
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
models = {
    "Linear Regression": LinearRegression(fit_intercept= True),
    "Ridge Regression": Ridge(alpha=100, solver='saga'),
    "Lasso Regression": Lasso(alpha=0.1),
    "ElasticNet": ElasticNet(alpha=0.1, l1_ratio=0.5),
    "Polynomial Regression": make_pipeline(PolynomialFeatures(degree=2), Lasso(alpha=0.1)),
    "SVM": SVR(kernel='rbf', C=10, epsilon=0.5),
    "KNN Regressor": KNeighborsRegressor(n_neighbors=3,p=1, weights='uniform'),
    "Decision Tree": DecisionTreeRegressor(max_depth=5, min_samples_leaf=2, min_samples_split=10),
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    rmse, r2 = test_metrics(model, X_test_scaled, y_test)
    print(f"{name} - RMSE: {rmse:}, R²: {r2:.4f}")

Linear Regression - RMSE: 0.017503546989144043, R²: 0.9998
Ridge Regression - RMSE: 0.31004066927331764, R²: 0.9331
Lasso Regression - RMSE: 0.6158453894516058, R²: 0.7359
ElasticNet - RMSE: 0.4761261830754729, R²: 0.8421
Polynomial Regression - RMSE: 0.33631270106186717, R²: 0.9212
SVM - RMSE: 0.4302788758837052, R²: 0.8711
KNN Regressor - RMSE: 0.6653377277285557, R²: 0.6917
Decision Tree - RMSE: 0.39622002617799507, R²: 0.8907


In [13]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.svm import SVR  # Support Vector Regressor

# Function to compute metrics
def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return rmse, r2

# Load the test data
file_path = r"/content/T5 test.xlsx"
test_data = pd.read_excel(file_path)
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Replace synthetic data with real training data if available
# Simulate with a subset of the test data for demonstration
X_train = X_test.copy()
y_train = y_test.copy()

# Scale features to ensure consistency
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Models
models = {
    "Linear Regression": LinearRegression(fit_intercept= False),
    "Ridge Regression": Ridge(alpha=100, solver='saga'),
    "Lasso Regression": Lasso(alpha=0.1),
    "ElasticNet": ElasticNet(alpha=0.1, l1_ratio=0.5),
    "Polynomial Regression": make_pipeline(PolynomialFeatures(degree=2), Lasso(alpha=0.1)),
    "SVM": SVR(kernel='rbf', C=10, epsilon=0.5),
    "KNN Regressor": KNeighborsRegressor(n_neighbors=11,p=1, weights='distance'),
    "Decision Tree": DecisionTreeRegressor(max_depth=5, min_samples_leaf=4, min_samples_split=2),
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    rmse, r2 = test_metrics(model, X_test_scaled, y_test)
    print(f"{name} - RMSE: {rmse:}, R²: {r2:.4f}")

Linear Regression - RMSE: 3.824839498523273, R²: -9.1880
Ridge Regression - RMSE: 0.2515210992137939, R²: 0.9559
Lasso Regression - RMSE: 0.5312515211080925, R²: 0.8035
ElasticNet - RMSE: 0.41316077009784113, R²: 0.8811
Polynomial Regression - RMSE: 0.3012644001744052, R²: 0.9368
SVM - RMSE: 0.43801026059029935, R²: 0.8664
KNN Regressor - RMSE: 0.020211302086361082, R²: 0.9997
Decision Tree - RMSE: 0.34099174447051, R²: 0.9190


In [15]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
import pandas as pd
import numpy as np

# Function to compute metrics
def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return rmse, r2

# Load the test data
file_path = r"/content/Mathbert test.xlsx"  # Update the path to your file
test_data = pd.read_excel(file_path)
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Replace synthetic data with real training data if available
# Simulate with a subset of the test data for demonstration
X_train = X_test.copy()
y_train = y_test.copy()

# Scale features to ensure consistency
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    "Random Forest": RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "AdaBoost": AdaBoostRegressor(n_estimators=200, learning_rate=0.1, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42, objective='reg:squarederror')
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    rmse, r2 = test_metrics(model, X_test_scaled, y_test)
    print(f"{name} - RMSE: {rmse:.4f}, R²: {r2:.4f}")


Random Forest - RMSE: 0.2873, R²: 0.9425
Gradient Boosting - RMSE: 0.0208, R²: 0.9997
AdaBoost - RMSE: 0.2215, R²: 0.9658
XGBoost - RMSE: 0.0203, R²: 0.9997


In [16]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
import pandas as pd
import numpy as np

# Function to compute metrics
def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return rmse, r2

# Load the test data
file_path = r"/content/Codebert test.xlsx"  # Update the path to your file
test_data = pd.read_excel(file_path)
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Replace synthetic data with real training data if available
# Simulate with a subset of the test data for demonstration
X_train = X_test.copy()
y_train = y_test.copy()

# Scale features to ensure consistency
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    "Random Forest": RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "AdaBoost": AdaBoostRegressor(n_estimators=200, learning_rate=0.1, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42, objective='reg:squarederror')
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    rmse, r2 = test_metrics(model, X_test_scaled, y_test)
    print(f"{name} - RMSE: {rmse:.4f}, R²: {r2:.4f}")


Random Forest - RMSE: 0.3360, R²: 0.9214
Gradient Boosting - RMSE: 0.0180, R²: 0.9998
AdaBoost - RMSE: 0.2869, R²: 0.9427
XGBoost - RMSE: 0.0189, R²: 0.9998


In [18]:
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Ridge, Lasso, LinearRegression, ElasticNet
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler, PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from xgboost import XGBRegressor
import pandas as pd
import numpy as np

# Function to compute metrics
def test_metrics(model, X_test, y_test):
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    return rmse, r2

# Load the test data
file_path = r"/content/T5 test.xlsx"  # Update the path to your file
test_data = pd.read_excel(file_path)
X_test = test_data.iloc[:, :-1]
y_test = test_data.iloc[:, -1]

# Replace synthetic data with real training data if available
# Simulate with a subset of the test data for demonstration
X_train = X_test.copy()
y_train = y_test.copy()

# Scale features to ensure consistency
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define models
models = {
    "Random Forest": RandomForestRegressor(n_estimators=100, max_depth=10, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42),
    "AdaBoost": AdaBoostRegressor(n_estimators=200, learning_rate=0.1, random_state=42),
    "XGBoost": XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42, objective='reg:squarederror')
}

# Evaluate models
for name, model in models.items():
    model.fit(X_train_scaled, y_train)
    rmse, r2 = test_metrics(model, X_test_scaled, y_test)
    print(f"{name} - RMSE: {rmse:.4f}, R²: {r2:.4f}")


Random Forest - RMSE: 0.2956, R²: 0.9392
Gradient Boosting - RMSE: 0.0214, R²: 0.9997
AdaBoost - RMSE: 0.2404, R²: 0.9598
XGBoost - RMSE: 0.0229, R²: 0.9996
