In [76]:
import pandas as pd
import pickle
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

## Read Data

In [77]:
train_slice_df = pd.read_csv("slice_std_train.csv")
test_slice_df = pd.read_csv("slice_std_test.csv")
print(train_slice_df.shape, test_slice_df.shape)

(42800, 361) (10700, 361)


In [78]:
X_train = train_slice_df[train_slice_df.columns[:-1]]
y_train = train_slice_df[train_slice_df.columns[-1]]
X_test = test_slice_df[train_slice_df.columns[:-1]]
y_test = test_slice_df[train_slice_df.columns[-1]]

## Create Model

In [ ]:
# Use GPU-acceleration to find the best parameter with grid search based on MAE

from cuml.svm import SVR
Cs = [0.1, 1, 10, 50, 100, 200]
gammas = ['scale', 'auto']
epsilons = [0.001, 0.01, 0.1, 0.2]

# Placeholder for the best model and score
best_model = None
best_score = float('inf')

# Parameter tuning
for C in Cs:
        for gamma in gammas:
            for epsilon in epsilons:
                # Initialize and train the model
                svr = SVR(kernel='rbf', C=C, gamma=gamma, epsilon=epsilon)
                svr.fit(X_train, y_train)
                
                # Predict and evaluate
                y_pred = svr.predict(X_test)
                m_MAE = mean_absolute_error(y_test, y_pred)
                
                # Check if this model is better
                if m_MAE < best_score:
                    best_score = m_MAE
                    best_model = svr
                    print(f"New best parameters: C={C}, Gamma={gamma}, Epsilon={epsilon}, MSE={m_MAE}")

# Final evaluation
y_pred = best_model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)

print(f"Best Model RMSE: {rmse}")
print(f"Best Model MAE: {mae}")

In [None]:
from sklearn.svm import SVR 

# Create and train the SVR model
svr_model = SVR(kernel="rbf", C=200, gamma="auto", epsilon=0.001, verbose=True)
svr_model.fit(X_train, y_train)
y_pred = svr_model.predict(X_test)

## Get Metric

In [80]:
# Compute the metrics
m_RMSE = np.sqrt(mean_squared_error(y_test, y_pred))
m_MAE = mean_absolute_error(y_test, y_pred)
m_R2 = r2_score(y_test, y_pred)
m_num_variables = X_train.shape[1]  # Number of predictor variables

In [81]:
validation_svm = {'AIC': None, 'R2': m_R2, 'RMSE': m_RMSE, 'MAE': m_MAE, 'num_variables': m_num_variables}

In [82]:
# For RMSE - 0.64238
# for MAE - 0.34035
validation_svm

{'AIC': None,
 'R2': 0.9991770979784319,
 'RMSE': 0.6415018701405892,
 'MAE': 0.339972046748102,
 'num_variables': 360}

## Dump Answer

In [None]:
with open('answer4.pickle', 'rb') as f:
    old_results = pickle.load(f)

In [83]:
old_results['Validation']['validation_svm'] = validation_svm

In [84]:
res = {'Validation': old_results['Validation'],
        'SVM_Model' : svr_model}

In [85]:
with open('answer5.pickle', 'wb') as f:
 pickle.dump(res,f)