### Bibliotecas

In [10]:
import pandas as pd
import numpy as np
import pywt

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier, ShapeDTW, ElasticEnsemble
from aeon.utils.numba.stats import (
    row_iqr,
    row_mean,
    row_median,
    row_numba_max,
    row_numba_min,
    row_slope,
    row_std,
)

from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB

from scipy.fftpack import fft

from tqdm import tqdm
from numba import jit
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

### Dados

In [14]:
"""try:
    train_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\Car_TRAIN.parquet')
    test_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\Car_TRAIN.parquet')
except FileNotFoundError:
    print("Ensure the Parquet files are in the correct path.")
    raise
    
    
X_train = train_data.drop('target', axis=1).values
y_train = train_data['target'].values

X_test = test_data.drop('target', axis=1).values
y_test = test_data['target'].values"""


### Função de transformação dos dados.

In [87]:
# Transform data atualizado
def transform_data(X):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)
    
    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    
    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    
    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])

    return {
        "TS": data_X,
        "FFT": data_FFT,
        "DWT": data_DWT,
        "PAA": X_paa,
        "SAX": X_sax
    }

### Acrescentando algumas métricas nas transformações como média, soma, máximo, minimo etc.

In [25]:
@jit
def transform_data_math(X):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)
    
    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    stats_PAA = np.hstack([np.mean(X_paa, axis=1).reshape(-1,1), 
                           np.std(X_paa, axis=1).reshape(-1,1), 
                           np.max(X_paa, axis=1).reshape(-1,1), 
                           np.min(X_paa, axis=1).reshape(-1,1),
                           ])

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    stats_SAX = np.hstack([np.mean(X_sax, axis=1).reshape(-1,1), 
                           np.std(X_sax, axis=1).reshape(-1,1), 
                           np.max(X_sax, axis=1).reshape(-1,1), 
                           np.min(X_sax, axis=1).reshape(-1,1),
                           ])

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    stats_X = np.hstack([np.mean(data_X, axis=1).reshape(-1,1), 
                         np.std(data_X, axis=1).reshape(-1,1), 
                         np.max(data_X, axis=1).reshape(-1,1), 
                         np.min(data_X, axis=1).reshape(-1,1),
                         ])

    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    stats_FFT = np.hstack([np.mean(data_FFT, axis=1).reshape(-1,1), 
                           np.std(data_FFT, axis=1).reshape(-1,1), 
                           np.max(data_FFT, axis=1).reshape(-1,1), 
                           np.min(data_FFT, axis=1).reshape(-1,1),
                           ])

    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    stats_DWT = np.hstack([np.mean(data_DWT, axis=1).reshape(-1,1), 
                           np.std(data_DWT, axis=1).reshape(-1,1), 
                           np.max(data_DWT, axis=1).reshape(-1,1), 
                           np.min(data_DWT, axis=1).reshape(-1,1),
                           ])

    return {
        "TS": np.hstack([data_X, stats_X]),
        "FFT": np.hstack([data_FFT, stats_FFT]),
        "DWT": np.hstack([data_DWT, stats_DWT]),
        "PAA": np.hstack([X_paa, stats_PAA]),
        "SAX": np.hstack([X_sax, stats_SAX])
    }


### Transform_data junto de extração de caracteristicas - ERRO na função Predict

In [93]:
def transform_and_extract_features(X, default_fc_parameters=MinimalFCParameters()):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)
    
    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    
    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    
    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])

    id_column = np.arange(X.shape[0]).reshape(-1, 1)  # Criando um índice único para cada linha de X

    transformed_data = {
        "TS": np.hstack([id_column, data_X]),
        "FFT": np.hstack([id_column, data_FFT]),
        "DWT": np.hstack([id_column, data_DWT]),
        "PAA": np.hstack([id_column, X_paa]),
        "SAX": np.hstack([id_column, X_sax])
    }
    
    extracted_features_dict = {}
    for key, value in transformed_data.items():
        # Criando DataFrame
        df = pd.DataFrame(value, columns=[f'{key}_{i}' for i in range(value.shape[1])])
        df['id'] = df.index
        
        # Extrair características
        features = extract_features(df, column_id='id', default_fc_parameters=default_fc_parameters)
        clean_features = np.delete(features, np.where(np.std(features, axis=0) < 10e-5), axis=1)
        
        # Armazenar características
        extracted_features_dict[key] = clean_features
    
    return extracted_features_dict


### Teste utilizando Math no transform_data

In [23]:
@jit
def train_with_meta_classifier(X_train, y_train, base_option='random_forest', meta_option='1nn', random_state=42):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    
    X_train_transformed = transform_data_math(X_train)  # Transformar todo o conjunto de treino
    loo = LeaveOneOut()
    
    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, desc="Training Base Models"):
        model = select_model(base_option, random_state)
        scores = []
        for train_index, _ in loo.split(X_trans):
            model.fit(X_trans[train_index], y_train[train_index])
            score = model.score(X_trans[train_index], y_train[train_index])  # Score do modelo nos dados de treino
            scores.append(score)
        avg_score = np.mean(scores)
        trained_models[rep] = (model, avg_score)  # Salvar o modelo treinado e a média dos scores
        
    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, (model, _) in trained_models.items():
            proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades
        meta_features.append(instance_features)
    
    meta_features = np.array(meta_features)
    #np.savetxt("meta-features-train.csv", meta_features, delimiter=",")
    
    # Treinar o meta-classificador
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, y_train)
    
    return trained_models, meta_classifier

@jit
def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier):
    predictions = []
    meta_features_test = []
    
    for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data_math(x_instance)
        
        instance_features = []
        for rep, (model, _) in trained_base_models.items():
            proba = model.predict_proba(x_transformed[rep].reshape(1, -1))  # Ajuste para acessar corretamente as características transformadas
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades
        
        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])
        meta_features_test.append(meta_feature)
    
    meta_features_test = np.array(meta_features_test)
    #np.savetxt("meta-features-test.csv", meta_features_test, delimiter=",")
    return predictions


In [28]:
dataset_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour',
                'GunPoint', 'Lightning2', 'Lightning7', 'MoteStrain', 'OliveOil','MedicalImages', 'Trace', 'TwoPatterns',
                'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl']

# Para cada conjunto de dados na lista
for dataset_name in dataset_list:
    # Carregue os dados de treinamento e teste
    X_train, y_train = load_classification(dataset_name, split="TRAIN")
    X_test, y_test = load_classification(dataset_name, split="test")
    
    # Achatando os dados para 2D, pois alguns algoritmos esperam 2D
    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)
    
    dataset_accuracies = []
    trained_base_models, meta_classifier = train_with_meta_classifier(X_train_flat, y_train, base_option='svm', meta_option='rd')
    predictions_test_meta = predict_with_meta_classifier(X_test_flat, trained_base_models, meta_classifier)
    test_accuracy_meta = np.mean(predictions_test_meta == y_test)
    dataset_accuracies.append(test_accuracy_meta)
        
    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")
        
#np.savetxt("Results_MSLOO_.csv", dataset_accuracies, delimiter=",")

Training Base Models: 100%|##########| 5/5 [08:08<00:00, 97.78s/it] 
Testing Instances: 100%|##########| 391/391 [00:02<00:00, 165.26it/s]


Acurácia Adiac: 0.7979539641943734


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.57it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 150.71it/s]


Acurácia Beef: 0.8666666666666667


Training Base Models: 100%|##########| 5/5 [00:05<00:00,  1.08s/it]
Testing Instances: 100%|##########| 60/60 [00:00<00:00, 128.08it/s]


Acurácia Car: 0.85


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 13.80it/s]
Testing Instances: 100%|##########| 900/900 [00:04<00:00, 215.96it/s]


Acurácia CBF: 0.8711111111111111


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 17.46it/s]
Testing Instances: 100%|##########| 28/28 [00:00<00:00, 153.70it/s]


Acurácia Coffee: 1.0


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 34.43it/s]
Testing Instances: 100%|##########| 306/306 [00:01<00:00, 162.00it/s]


Acurácia DiatomSizeReduction: 0.9705882352941176


Training Base Models: 100%|##########| 5/5 [00:02<00:00,  1.91it/s]
Testing Instances: 100%|##########| 100/100 [00:00<00:00, 222.82it/s]


Acurácia ECG200: 0.93


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 26.81it/s]
Testing Instances: 100%|##########| 861/861 [00:04<00:00, 214.19it/s]


Acurácia ECGFiveDays: 0.9988385598141696


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 11.57it/s]
Testing Instances: 100%|##########| 88/88 [00:00<00:00, 164.67it/s]


Acurácia FaceFour: 0.8409090909090909


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  7.53it/s]
Testing Instances: 100%|##########| 150/150 [00:00<00:00, 214.10it/s]


Acurácia GunPoint: 0.9466666666666667


Training Base Models: 100%|##########| 5/5 [00:04<00:00,  1.00it/s]
Testing Instances: 100%|##########| 61/61 [00:00<00:00, 118.86it/s]


Acurácia Lightning2: 0.7540983606557377


Training Base Models: 100%|##########| 5/5 [00:07<00:00,  1.50s/it]
Testing Instances: 100%|##########| 73/73 [00:00<00:00, 169.99it/s]


Acurácia Lightning7: 0.547945205479452


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 28.59it/s]
Testing Instances: 100%|##########| 1252/1252 [00:05<00:00, 233.37it/s]


Acurácia MoteStrain: 0.8674121405750799


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  6.85it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 132.77it/s]


Acurácia OliveOil: 0.9


Training Base Models: 100%|##########| 5/5 [04:48<00:00, 57.66s/it]
Testing Instances: 100%|##########| 760/760 [00:03<00:00, 210.31it/s]


Acurácia MedicalImages: 0.7368421052631579


Training Base Models: 100%|##########| 5/5 [00:10<00:00,  2.06s/it]
Testing Instances: 100%|##########| 100/100 [00:00<00:00, 175.74it/s]


Acurácia Trace: 0.92


Training Base Models: 100%|##########| 5/5 [1:38:53<00:00, 1186.61s/it]
Testing Instances: 100%|##########| 4000/4000 [00:23<00:00, 172.31it/s]


Acurácia TwoPatterns: 0.92925


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 37.53it/s]
Testing Instances: 100%|##########| 601/601 [00:02<00:00, 240.29it/s]


Acurácia SonyAIBORobotSurface1: 0.7753743760399334


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 25.16it/s]
Testing Instances: 100%|##########| 953/953 [00:03<00:00, 241.43it/s]


Acurácia SonyAIBORobotSurface2: 0.8551941238195173


Training Base Models: 100%|##########| 5/5 [00:56<00:00, 11.28s/it]
Testing Instances: 100%|##########| 300/300 [00:01<00:00, 232.34it/s]

Acurácia SyntheticControl: 0.97





In [29]:
dataset_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour',
                'GunPoint', 'Lightning2', 'Lightning7', 'MoteStrain', 'OliveOil','MedicalImages', 'Trace', 'TwoPatterns',
                'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl']

# Para cada conjunto de dados na lista
for dataset_name in dataset_list:
    # Carregue os dados de treinamento e teste
    X_train, y_train = load_classification(dataset_name, split="TRAIN")
    X_test, y_test = load_classification(dataset_name, split="test")
    
    # Achatando os dados para 2D, pois alguns algoritmos esperam 2D
    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)
    
    dataset_accuracies = []
    trained_base_models, meta_classifier = train_with_meta_classifier(X_train_flat, y_train, base_option='3nn', meta_option='rd')
    predictions_test_meta = predict_with_meta_classifier(X_test_flat, trained_base_models, meta_classifier)
    test_accuracy_meta = np.mean(predictions_test_meta == y_test)
    dataset_accuracies.append(test_accuracy_meta)
        
    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")

Training Base Models:   0%|          | 0/5 [00:00<?, ?it/s]

### Seleção do modelo extrator e modelo classificador

In [27]:
@jit
def select_model(option, random_state):
    if option == '1nn':
        return KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1, n_jobs=-1)
    elif option == '3nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=3, n_jobs=-1)
    elif option == 'svm':
        return SVC(C = 1000, gamma=0.01, kernel='rbf', probability=True)
    elif option == 'gbc':
        return GradientBoostingClassifier(n_estimators=100, random_state=random_state)
    elif option == 'nb':
        return GaussianNB()
    elif option == 'shape':
        return ShapeDTW(n_neighbors=1)
    elif option == 'ee':
        return ElasticEnsemble(proportion_of_param_options= 0.5,
                               proportion_train_in_param_finding= 0.5,
                               proportion_train_for_test=0.5,
                               n_jobs=-1,
                               random_state=random_state,
                               majority_vote=True)
    elif option == 'rd':
        return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    else:
        return RandomForestClassifier(n_estimators=200,
                                    n_jobs=-1,
                                    random_state=random_state)
        #return RandomForestClassifier(n_estimators=100,random_state=random_state)

### Treino do modelos extrator e classificador

In [None]:
def train_with_meta_classifier(X_train, y_train, base_option='random_forest', meta_option='1nn', random_state=42):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    
    X_train_transformed = transform_data(X_train)  # Transformar todo o conjunto de treino
    
    loo = LeaveOneOut()
    
    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, desc="Training Base Models"):
        model = select_model(base_option, random_state)
        scores = []
        for train_index, _ in loo.split(X_trans):
            model.fit(X_trans[train_index], y_train[train_index])
            score = model.score(X_trans[train_index], y_train[train_index])  # Score do modelo nos dados de treino
            scores.append(score)
        avg_score = np.mean(scores)
        trained_models[rep] = (model, avg_score)  # Salvar o modelo treinado e a média dos scores
        
    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, (model, _) in trained_models.items():
            proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades
        meta_features.append(instance_features)
    
    meta_features = np.array(meta_features)
    np.savetxt("meta-features-train.csv", meta_features, delimiter=",")
    
    # Treinar o meta-classificador
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, y_train)
    
    return trained_models, meta_classifier


### Predicao do meta-classificador

In [84]:
def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste
    
    for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data(x_instance)
        
        instance_features = []
        for rep, (model, _) in trained_base_models.items():  # Ajuste para percorrer os modelos treinados e os scores médios
            proba = model.predict_proba(x_transformed[rep][0].reshape(1, -1))  # Ajuste aqui para pegar o primeiro elemento
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades
        
        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões
        
        meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista
    
    # Converter a lista de meta-recursos dos dados de teste em um array numpy
    meta_features_test = np.array(meta_features_test)

    # Salvar todos os meta-recursos dos dados de teste em um arquivo CSV
    np.savetxt("meta-features-test.csv", meta_features_test, delimiter=",")
    
    return predictions


### Testando um único modelo - Random Forest como extrator e SVM como meta-classificador

In [None]:
dataset_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour',
                'GunPoint', 'Lightning2', 'Lightning7', 'MoteStrain', 'OliveOil','MedicalImages', 'Trace', 'TwoPatterns', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl']
#'

# Para cada conjunto de dados na lista
for dataset_name in dataset_list:
    # Carregue os dados de treinamento e teste
    X_train, y_train = load_classification(dataset_name, split="TRAIN")
    X_test, y_test = load_classification(dataset_name, split="test")
    
    # Achatando os dados para 2D, pois alguns algoritmos esperam 2D
    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)
    
    dataset_accuracies = []
    trained_base_models, meta_classifier = train_with_meta_classifier(X_train_flat, y_train, base_option='1nn', meta_option='rd')
    predictions_test_meta = predict_with_meta_classifier(X_test_flat, trained_base_models, meta_classifier)
    test_accuracy_meta = np.mean(predictions_test_meta == y_test)
    dataset_accuracies.append(test_accuracy_meta)
        
    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")
        
#np.savetxt("Results_MSLOO_.csv", dataset_accuracies, delimiter=",")

In [None]:
total_time = 0
algos = ['1nn', '3nn', 'svm', 'nb', 'gbc', 'random_forest']
    # Carregue os dados de treinamento e teste
try:
    train_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\GunPoint_TRAIN.parquet')
    test_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\GunPoint_TEST.parquet')
except FileNotFoundError:
    print("Ensure the Parquet files are in the correct path.")
    raise

X_train = train_data.drop('target', axis=1).values
y_train = train_data['target'].values

X_test = test_data.drop('target', axis=1).values
y_test = test_data['target'].values

In [None]:
trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option='svm', meta_option='rd')
predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)
test_accuracy_meta = np.mean(predictions_test_meta == y_test)
print(f"Accuracy: {test_accuracy_meta}")

Training Base Models: 100%|##########| 5/5 [00:00<00:00,  7.09it/s]
Testing Instances: 100%|##########| 150/150 [00:00<00:00, 177.86it/s]

Accuracy: 0.9533333333333334





In [None]:
# Treino
trained_base_models, meta_classifier = train_with_meta_classifier(xtrain, train_labels, base_option='svm', meta_option='rd', random_state=42)

# Teste
predictions_test_meta = predict_with_meta_classifier(xtest, trained_base_models, meta_classifier)

# Resultado
test_accuracy_meta = accuracy_score(test_labels, predictions_test_meta)

print(f'Accuracy: {test_accuracy_meta}')

Training Base Models: 100%|##########| 5/5 [00:04<00:00,  1.20it/s]
Testing Instances: 100%|##########| 60/60 [00:02<00:00, 28.46it/s]

Accuracy: 0.8833333333333333





### Testando um único modelo - SVM como extrator e meta-classificador

In [None]:
# Treino
trained_base_models, meta_classifier = train_with_meta_classifier(xtrain, train_labels, base_option='random_forest', meta_option='svm', random_state=42)

# Teste
predictions_test_meta = predict_with_meta_classifier(xtest, trained_base_models, meta_classifier)

# Resultado
test_accuracy_meta = accuracy_score(test_labels, predictions_test_meta)

print(f'Accuracy: {test_accuracy_meta}')

In [None]:
#Teste utilizando o classificador SVM
meta_attrib_train = np.loadtxt("meta-features-train.csv", delimiter=",")
meta_attrib_test = np.loadtxt("meta-features-test.csv", delimiter=",")
from sklearn.metrics import accuracy_score

clf = SVC(probability=True)
clf.fit(meta_attrib_train, y_train)
y_hat = clf.predict(meta_attrib_test)
test_accuracy_meta = accuracy_score(y_hat, y_test)
print(f"Accuracy: {test_accuracy_meta}")

In [None]:
clf_2 = SVC(probability=True)
clf_2.fit(X_train, y_train)
y_hat_ = clf_2.predict(X_test)
test_accuracy_meta_2 = accuracy_score(y_hat_,y_test)
print(f"Accuracy: {test_accuracy_meta_2}")

### Gráfico das diferenças de dados

In [None]:
import matplotlib.pyplot as plt

y1 = y_hat  # depois da transformação
y2 = y_test  

z1 = y_hat_ #antes da transformação
z2 = y_test

#suavizar os dados do gráfico
window_size = 15
y1_smoothed = pd.Series(y1).rolling(window=window_size).mean()
y2_smoothed = pd.Series(y2).rolling(window=window_size).mean()
z1_smoothed = pd.Series(z1).rolling(window=window_size).mean()
z2_smoothed = pd.Series(z2).rolling(window=window_size).mean()

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5), layout='constrained')

# Conjunto de validação do classificador
axs[0].set_title('Antes da transformação')
axs[0].plot(z1_smoothed, label='Treino')
axs[0].plot(z2_smoothed, label='Teste')
axs[0].set_xlabel('Tempo (s)')
axs[0].set_ylabel('Treino')
axs[0].grid(True)

# Conjunto de validação do meta-classificador
axs[1].set_title('Depois da transformação')
axs[1].plot(y1_smoothed, label='Treino')
axs[1].plot(y2_smoothed, label='Teste')
axs[1].set_xlabel('Tempo (s)')
axs[1].set_ylabel('Treino')
axs[1].grid(True)

plt.legend()
plt.show()



In [None]:
w1 = y_hat  # meta-classificador
w2 = y_hat_ #classificação

# Suavizar os dados do gráfico
window_size = 15
w1_smoothed = pd.Series(w1).rolling(window=window_size).mean()
w2_smoothed = pd.Series(w2).rolling(window=window_size).mean()

# Plotar os dados
plt.figure(figsize=(10, 6))
plt.plot(w1_smoothed, label='w1 (Classificação usando meta-caracteristicas)')
plt.plot(w2_smoothed, label='w2 (classificação utilizando dados brutos)')
plt.xlabel('Tempo (s)')
plt.ylabel('Valores suavizados')
plt.title('Comparação entre os resultados de um SVM')
plt.legend()
plt.grid(True)
plt.show()


### Treino em loop de todas as opções de classificadores disponiveis no Select Model.

In [None]:
algos = ['1nn', '3nn', 'svm', 'nb', 'gbc', 'ee', 'shape', 'rf', 'rd']
for algo in algos:
    
    print(f'Meta-classificador com modelo extrator {algo.upper()}')
    
    # Training
    try:
        trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option='svm', meta_option=algo)
        # Testing
        predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)
        test_accuracy_meta = np.mean(predictions_test_meta == y_test)
        
        print(f'Acurácia do teste usando o meta-classificador com modelo extrator {algo}: {test_accuracy_meta}')
    except Exception as e:
        print(f"Ocorreu um erro no teste com o {algo}: {e}")
    print("-------------------------------")