# Imports

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import cross_val_predict
import os
from pathlib import Path
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Dados

In [2]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), '..'))

# Definindo o caminho base para a pasta Pre-processamento
base_path = Path(parent_dir) / 'Partial Components Analysis'

file_path_raw_cal = base_path / 'RAW_calibration.xlsx'
file_path_msc_cal = base_path / 'MSC_calibration.xlsx'
file_path_snv_cal = base_path / 'SNV_calibration.xlsx'
file_path_sg_cal = base_path / 'SG_calibration.xlsx'

df_raw_cal = pd.read_excel(file_path_raw_cal)
df_msc_cal = pd.read_excel(file_path_msc_cal)
df_snv_cal = pd.read_excel(file_path_snv_cal)
df_sg_cal = pd.read_excel(file_path_sg_cal)

file_path_raw_val = base_path / 'RAW_validation.xlsx'
file_path_msc_val = base_path / 'MSC_validation.xlsx'
file_path_snv_val = base_path / 'SNV_validation.xlsx'
file_path_sg_val = base_path / 'SG_validation.xlsx'

df_raw_val = pd.read_excel(file_path_raw_val)
df_msc_val = pd.read_excel(file_path_msc_val)
df_snv_val = pd.read_excel(file_path_snv_val)
df_sg_val = pd.read_excel(file_path_sg_val)

# Testes

In [3]:
X_test, y_test = df_msc_val.iloc[:,6:], df_msc_val['SST'].values
X_train, y_train = df_msc_cal.iloc[:,6:], df_msc_cal['SST'].values

In [4]:
X_train.shape, X_test.shape

((175, 2151), (75, 2151))

In [5]:
cols = X_train.columns

In [6]:
scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

X_train = pd.DataFrame(X_train, columns=[cols])

X_test = pd.DataFrame(X_test, columns=[cols])

In [37]:
# Função para calcular as métricas
def calculate_metrics(y_true, y_pred):
    correlation_coefficient = np.corrcoef(y_true, y_pred)[0, 1]
    mae = mean_absolute_error(y_true, y_pred)
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    mean_y_true = np.mean(y_true)
    relative_absolute_error = 100 * (mae / np.mean(np.abs(y_true - mean_y_true)))
    root_relative_squared_error = 100 * (rmse / np.std(y_true))

    return {
        "Correlation coefficient": correlation_coefficient,
        "Mean absolute error": mae,
        "Root mean squared error": rmse,
        "Relative absolute error": relative_absolute_error,
        "Root relative squared error": root_relative_squared_error,
        "Total Number of Instances": len(y_true)
    }

def display_metrics(title, metrics):
    print(f"\n=== {title} ===")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.4f}")

# Configurações do modelo SVR com kernel RBF
model = SVR(
    C=4,             # Regularização
    epsilon=0.2,       # Tolerância
    kernel='rbf',      # Kernel Radial Basis Function (RBF)
    gamma=0.01,        # Parâmetro gamma para o kernel RBF
    tol=1e-12           # Tolerância para precisão
)

# Treinar o modelo
model.fit(X_train, y_train)

# Fazer previsões no conjunto de treinamento
y_train_pred = model.predict(X_train)
training_metrics = calculate_metrics(y_train, y_train_pred)
display_metrics("Training Metrics", training_metrics)

# Validação cruzada Leave-One-Out (LOO)
from sklearn.model_selection import LeaveOneOut
loo = LeaveOneOut()
y_train_cv = cross_val_predict(model, X_train, y_train, cv=loo)
cv_metrics = calculate_metrics(y_train, y_train_cv)
display_metrics("Cross-Validation Metrics", cv_metrics)

# Avaliação final no conjunto de teste
y_pred_val = model.predict(X_test)
validation_metrics = calculate_metrics(y_test, y_pred_val)
display_metrics("Validation Metrics", validation_metrics)


=== Training Metrics ===
Correlation coefficient: 0.9936
Mean absolute error: 0.2565
Root mean squared error: 0.4067
Relative absolute error: 11.6179
Root relative squared error: 15.3114
Total Number of Instances: 175.0000

=== Cross-Validation Metrics ===
Correlation coefficient: 0.6227
Mean absolute error: 1.8401
Root mean squared error: 2.2896
Relative absolute error: 83.3368
Root relative squared error: 86.1919
Total Number of Instances: 175.0000

=== Validation Metrics ===
Correlation coefficient: 0.5886
Mean absolute error: 1.4584
Root mean squared error: 1.8290
Relative absolute error: 84.6202
Root relative squared error: 86.9141
Total Number of Instances: 75.0000


In [11]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, LeaveOneOut

# Configurações do GridSearchCV para encontrar os melhores parâmetros com o kernel polinomial
param_grid_poly = {
    'C': [0.1, 1, 4, 10, 100],
    'degree': [1 ,2],  # Grau do polinômio
    'coef0': [0, 0.5, 1, 10],  # Coeficiente do polinômio
    'epsilon': [0.01, 0.1, 0.2],
    'tol': [1e-4, 1e-5, 1e-6]
}

loo = LeaveOneOut()

# Configuração do GridSearchCV com Leave-One-Out para o kernel polinomial
grid_search_poly = GridSearchCV(SVR(kernel='poly'), param_grid_poly, cv=loo, scoring='neg_mean_squared_error', n_jobs=-1)
grid_search_poly.fit(X_train, y_train)

# Obtenção do melhor modelo e parâmetros
best_model_poly = grid_search_poly.best_estimator_
print(f"Best parameters for polynomial kernel: {grid_search_poly.best_params_}")

# Avaliação dos melhores parâmetros encontrados
best_score_poly = grid_search_poly.best_score_
print(f"Best score for polynomial kernel: {-best_score_poly:.4f}")


KeyboardInterrupt: 

In [10]:
from sklearn.svm import SVR
from sklearn.model_selection import GridSearchCV, LeaveOneOut

# Configurações do GridSearchCV para encontrar os melhores parâmetros
param_grid = {
    'C': [0.1, 1, 10],
    'gamma': [0.001, 0.01, 0.1, 'auto'],
    'epsilon': [0.01, 0.1, 0.2],
    'tol': [1e-4, 1e-5, 1e-6]
}

loo = LeaveOneOut()

grid_search = GridSearchCV(SVR(kernel='rbf'), param_grid, cv=loo, scoring='neg_mean_squared_error')
grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_
print(f"Best parameters: {grid_search.best_params_}")


KeyboardInterrupt: 