In [1]:
import pandas as pd

# Load the Parkinsons Telemonitoring dataset
parkinsons_data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/telemonitoring/parkinsons_updrs.data')

# Print the first few rows of the dataset
parkinsons_data.head()


Unnamed: 0,subject#,age,sex,test_time,motor_UPDRS,total_UPDRS,Jitter(%),Jitter(Abs),Jitter:RAP,Jitter:PPQ5,...,Shimmer(dB),Shimmer:APQ3,Shimmer:APQ5,Shimmer:APQ11,Shimmer:DDA,NHR,HNR,RPDE,DFA,PPE
0,1,72,0,5.6431,28.199,34.398,0.00662,3.4e-05,0.00401,0.00317,...,0.23,0.01438,0.01309,0.01662,0.04314,0.01429,21.64,0.41888,0.54842,0.16006
1,1,72,0,12.666,28.447,34.894,0.003,1.7e-05,0.00132,0.0015,...,0.179,0.00994,0.01072,0.01689,0.02982,0.011112,27.183,0.43493,0.56477,0.1081
2,1,72,0,19.681,28.695,35.389,0.00481,2.5e-05,0.00205,0.00208,...,0.181,0.00734,0.00844,0.01458,0.02202,0.02022,23.047,0.46222,0.54405,0.21014
3,1,72,0,25.647,28.905,35.81,0.00528,2.7e-05,0.00191,0.00264,...,0.327,0.01106,0.01265,0.01963,0.03317,0.027837,24.445,0.4873,0.57794,0.33277
4,1,72,0,33.642,29.187,36.375,0.00335,2e-05,0.00093,0.0013,...,0.176,0.00679,0.00929,0.01819,0.02036,0.011625,26.126,0.47188,0.56122,0.19361


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/telemonitoring/parkinsons_updrs.data"
data = pd.read_csv(url)

# Split the dataset into features and target
X = data.drop(['total_UPDRS'], axis=1).values  # Replace 'target_variable' with the actual target variable name
y = data['total_UPDRS'].values

# List of regression models
models = [
    ("Lasso Regression", Lasso()),
    ("Random Forest Regression", RandomForestRegressor()),
    ("XGBoost", XGBRegressor()),
    ("Support Vector Regression (SVR)", SVR()),
    ("k-Nearest Neighbors (k-NN) Regression", KNeighborsRegressor()),
    ("AdaBoost Regression", AdaBoostRegressor()),
    ("Extra Trees Regression", ExtraTreesRegressor())
]

kf = KFold(n_splits=10, shuffle=True, random_state=42)

for name, model in models:
    mse_train_list = []
    mse_test_list = []
    r2_train_list = []
    r2_test_list = []

    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

        mse_train_list.append(mean_squared_error(y_train, y_train_pred))
        mse_test_list.append(mean_squared_error(y_test, y_test_pred))

        r2_train_list.append(r2_score(y_train, y_train_pred))
        r2_test_list.append(r2_score(y_test, y_test_pred))

    print(f"{name}:")
    print(f"  Training set - Mean Squared Error (MSE): {np.mean(mse_train_list):.2f}, R-squared (R2) Score: {np.mean(r2_train_list):.2f}")
    print(f"  Testing set - Mean Squared Error (MSE): {np.mean(mse_test_list):.2f}, R-squared (R2) Score: {np.mean(r2_test_list):.2f}")
    print()



Lasso Regression:
  Training set - Mean Squared Error (MSE): 11.40, R-squared (R2) Score: 0.90
  Testing set - Mean Squared Error (MSE): 11.43, R-squared (R2) Score: 0.90

Random Forest Regression:
  Training set - Mean Squared Error (MSE): 0.01, R-squared (R2) Score: 1.00
  Testing set - Mean Squared Error (MSE): 0.08, R-squared (R2) Score: 1.00

XGBoost:
  Training set - Mean Squared Error (MSE): 0.01, R-squared (R2) Score: 1.00
  Testing set - Mean Squared Error (MSE): 0.11, R-squared (R2) Score: 1.00

Support Vector Regression (SVR):
  Training set - Mean Squared Error (MSE): 13.74, R-squared (R2) Score: 0.88
  Testing set - Mean Squared Error (MSE): 13.79, R-squared (R2) Score: 0.88

k-Nearest Neighbors (k-NN) Regression:
  Training set - Mean Squared Error (MSE): 0.48, R-squared (R2) Score: 1.00
  Testing set - Mean Squared Error (MSE): 1.26, R-squared (R2) Score: 0.99

AdaBoost Regression:
  Training set - Mean Squared Error (MSE): 7.23, R-squared (R2) Score: 0.94
  Testing set 