In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import requests
import zipfile
from io import BytesIO
from io import StringIO
from sklearn.linear_model import Lasso
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor, ExtraTreesRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import matplotlib.pyplot as plt
import time

from sklearn.datasets import load_digits
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import ExtraTreesClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import MinMaxScaler

2023-04-26 12:56:15.863640: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
def run_regression(X, y, random_state=42, test_size=0.2):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    regression_models = [
        ('Lasso', Lasso(alpha=0.1)),
        ('RandomForest', RandomForestRegressor(n_estimators=100, random_state=random_state)),
        ('XGBoost', XGBRegressor(n_estimators=100, random_state=random_state)),
        ('SVR', SVR(kernel='linear')),
        ('k-NN', KNeighborsRegressor(n_neighbors=5)),
        ('AdaBoost', AdaBoostRegressor(n_estimators=100, random_state=random_state)),
        ('ExtraTrees', ExtraTreesRegressor(n_estimators=100, random_state=random_state))
    ]

    results = {}
    for name, model in regression_models:
        start_time = time.time()
        model.fit(X_train, y_train)
        elapsed_time = time.time() - start_time
        y_pred_test = model.predict(X_test)
        test_error = mean_squared_error(y_test, y_pred_test)
        test_r2 = r2_score(y_test, y_pred_test)

        results[name] = {
            'test_mse': test_error,
            'test_r2': test_r2,
            'elapsed_time': elapsed_time
        }

    return results

In [4]:


# Load and preprocess the datasets
def load_and_preprocess_parkinsons():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/parkinsons/telemonitoring/parkinsons_updrs.data'
    df = pd.read_csv(url)
    df = df.drop(['subject#'], axis=1)
    X = df.drop(['motor_UPDRS', 'total_UPDRS'], axis=1).values
    y = df['total_UPDRS'].values
    return X, y

def load_and_preprocess_energy():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00242/ENB2012_data.xlsx'
    df = pd.read_excel(url)
    X = df.drop(['Y1', 'Y2'], axis=1).values
    y = df['Y1'].values
    return X, y

# def load_and_preprocess_superconductivity():
#     url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00464/superconduct.zip'
#     df = pd.read_csv(url, compression='zip')
#     X = df.drop(['critical_temp'], axis=1).values
#     y = df['critical_temp'].values
#     return X, y

def load_and_preprocess_superconductivity():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00464/superconduct.zip'
    response = requests.get(url)
    zf = zipfile.ZipFile(BytesIO(response.content))
    df = pd.read_csv(zf.open('train.csv'))
    X = df.drop(['critical_temp'], axis=1).values
    y = df['critical_temp'].values
    return X, y

def load_and_preprocess_forest_fires():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/forest-fires/forestfires.csv'
    df = pd.read_csv(url)
    df = pd.get_dummies(df)
    X = df.drop(['area'], axis=1).values
    y = df['area'].values
    return X, y

def load_and_preprocess_wine_quality():
    url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
    df = pd.read_csv(url, delimiter=';')
    X = df.drop(['quality'], axis=1).values
    y = df['quality'].values
    return X, y

X_parkinsons, y_parkinsons = load_and_preprocess_parkinsons()
X_energy, y_energy = load_and_preprocess_energy()
X_superconductivity, y_superconductivity = load_and_preprocess_superconductivity()
X_forest_fires, y_forest_fires = load_and_preprocess_forest_fires()
X_wine_quality, y_wine_quality = load_and_preprocess_wine_quality()

# Scale the datasets
scaler_parkinsons = StandardScaler().fit(X_parkinsons)
scaler_energy = StandardScaler().fit(X_energy)
scaler_superconductivity = StandardScaler().fit(X_superconductivity)
scaler_forest_fires = StandardScaler().fit(X_forest_fires)
scaler_wine_quality = StandardScaler().fit(X_wine_quality)

X_parkinsons_scaled = scaler_parkinsons.transform(X_parkinsons)
X_energy_scaled = scaler_energy.transform(X_energy)
X_superconductivity_scaled = scaler_superconductivity.transform(X_superconductivity)
X_forest_fires_scaled = scaler_forest_fires.transform(X_forest_fires)
X_wine_quality_scaled = scaler_wine_quality.transform(X_wine_quality)

In [6]:
datasets = {
    'Parkinsons': (X_parkinsons_scaled, y_parkinsons),
    'Energy Efficiency': (X_energy_scaled, y_energy),
    'Superconductivity': (X_superconductivity_scaled, y_superconductivity),
    'Forest Fires': (X_forest_fires_scaled, y_forest_fires),
    'Wine Quality': (X_wine_quality_scaled, y_wine_quality)
}

for dataset_name, (X, y) in datasets.items():
    print(f"\n{dataset_name}:")
    results = run_regression(X, y)
    for model_name, metrics in results.items():
        print(f"{model_name}: Test MSE: {metrics['test_mse']:.4f}, Test R2: {metrics['test_r2']:.4f}")


Parkinsons:
Lasso: Test MSE: 93.4615, Test R2: 0.1566
RandomForest: Test MSE: 2.5748, Test R2: 0.9768
XGBoost: Test MSE: 4.8244, Test R2: 0.9565
SVR: Test MSE: 96.6305, Test R2: 0.1280
k-NN: Test MSE: 39.4418, Test R2: 0.6441
AdaBoost: Test MSE: 69.1746, Test R2: 0.3758
ExtraTrees: Test MSE: 2.2313, Test R2: 0.9799

Energy Efficiency:
Lasso: Test MSE: 9.9394, Test R2: 0.9046
RandomForest: Test MSE: 0.2444, Test R2: 0.9977
XGBoost: Test MSE: 0.1442, Test R2: 0.9986
SVR: Test MSE: 9.7768, Test R2: 0.9062
k-NN: Test MSE: 5.3575, Test R2: 0.9486
AdaBoost: Test MSE: 4.0551, Test R2: 0.9611
ExtraTrees: Test MSE: 0.2564, Test R2: 0.9975

Superconductivity:
Lasso: Test MSE: 328.8063, Test R2: 0.7143
RandomForest: Test MSE: 81.4574, Test R2: 0.9292
XGBoost: Test MSE: 89.1691, Test R2: 0.9225
SVR: Test MSE: 321.2115, Test R2: 0.7209
k-NN: Test MSE: 108.9181, Test R2: 0.9054
AdaBoost: Test MSE: 408.6658, Test R2: 0.6450
ExtraTrees: Test MSE: 78.7690, Test R2: 0.9316

Forest Fires:
Lasso: Test MS

In [None]:
def run_classifiers(X, y, output=None, random_state=42, test_size=0.2, epochs=50, batch_size=16, learning_rate=0.001):
    if output == None:
        output = len(np.unique(y))

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)

    classifiers = [
        ('k-NN', KNeighborsClassifier(n_neighbors=3)),
        ('ExtraTrees', ExtraTreesClassifier(n_estimators=100, random_state=random_state)),
    ]

    nn_setup = {
        'epochs': epochs,
        'batch_size': batch_size,
        'learning_rate': learning_rate
    }

    results = {}
    for name, classifier in classifiers:
        start_time = time.time()
        classifier.fit(X_train, y_train)
        elapsed_time = time.time() - start_time
        y_pred_test = classifier.predict(X_test)
        mse = mean_squared_error(y_test, y_pred_test)
        accuracy = accuracy_score(y_test, y_pred_test)

        results[name] = {
            'mse': mse,
            'accuracy': accuracy,
            'elapsed_time': elapsed_time
        }

    # Neural Networks
    model = Sequential()
    model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(output, activation='softmax'))

    optimizer = Adam(learning_rate=nn_setup['learning_rate'])
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    y_train_categorical = to_categorical(y_train)
    y_test_categorical = to_categorical(y_test)
    start_time = time.time()
    model.fit(X_train, y_train_categorical, epochs=nn_setup['epochs'], batch_size=nn_setup['batch_size'], verbose=0)
    elapsed_time_neural_network = time.time() - start_time

    y_pred_test_neural_network = np.argmax(model.predict(X_test), axis=1)
    mse_neural_network = mean_squared_error(y_test, y_pred_test_neural_network)
    accuracy_neural_network = accuracy_score(y_test, y_pred_test_neural_network)

    results['neural_network'] = {
        'mse': mse_neural_network,
        'accuracy': accuracy_neural_network,
        'elapsed_time': elapsed_time_neural_network
    }
    
    return results