### Bibliotecas

In [1]:
import pandas as pd
import numpy as np
from scipy.fftpack import fft
from scipy.stats import norm
import pywt
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
import xgboost as xgb
#from sklearn.model_selection import LeaveOneOut
from tqdm import tqdm

#SAX e DWT grindSearch
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin


### Dados

In [3]:
try:
    train_data = pd.read_csv('MSC\CSV_Dissertação\Beef_TRAIN.csv')
    test_data = pd.read_csv('MSC\CSV_Dissertação\Beef_TEST.csv')
except FileNotFoundError:
    print("Ensure the CSV files are in the correct path.")
    raise

X_train = train_data.drop('target', axis=1).values
y_train = train_data['target'].values

X_test = test_data.drop('target', axis=1).values
y_test = test_data['target'].values

### Função SAX

In [4]:
def sax_transform(series, w, a):
    paa = [series[i:i + w].mean() for i in range(0, len(series), w)]
    
    if np.std(paa) != 0:
        paa = (paa - np.mean(paa)) / np.std(paa)
    else:
        paa = paa - np.mean(paa)
    
    breakpoints = norm.ppf(np.linspace(0, 1, a+1)[1:-1])
    sax_symbols = np.array(range(a))
    sax_representation = sax_symbols[np.digitize(paa, breakpoints)]
    
    return sax_representation

### Função de transformação dos dados

In [5]:
def transform_data(X):
    a = 3
    w = 5
    

    X_sax = np.array([sax_transform(row, w, a) for row in X])
    X_fft = np.abs(fft(X, axis=1))
    
    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db4', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))
    
    X_paa = np.column_stack([X[:, i:i+2].mean(axis=1) for i in range(0, X.shape[1], 2)])

    return {
        "TS": X,
        "FFT": X_fft,
        "DWT": X_dwt,
        "PAA": X_paa,
        "SAX": X_sax
    }

### Seleção do modelo extrator e modelo classificador

In [6]:
def select_model(option, random_state):
    if option == '1nn':
        return KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1)
    elif option == '5nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=1)
    elif option == 'svm':
        return svm.SVC(C=100, gamma=0.01, kernel='rbf', probability=True)
    elif option == 'gbc':
        return GradientBoostingClassifier(n_estimators=100, random_state=random_state)
    elif option == 'nb':
        return GaussianNB()
    elif option == 'xgb':
        return xgb.XGBClassifier(random_state=random_state)
    else:
        return RandomForestClassifier(n_estimators=100,random_state=random_state)

### Treino do modelos extrator e classificador

In [7]:
def train_with_meta_classifier(X_train, y_train, base_option='random_forest', meta_option='1nn', random_state=42):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    
    X_train_transformed = transform_data(X_train)  # Transformar todo o conjunto de treino

    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, desc="Training Base Models"):
        model = select_model(base_option, random_state)
        model.fit(X_trans, y_train)
        trained_models[rep] = model
        
    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, model in trained_models.items():
            proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            instance_features.extend(proba[0])
        meta_features.append(instance_features)
    
    meta_features = np.array(meta_features)
    
    # Treinar o meta-classificador
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, y_train)
    
    return trained_models, meta_classifier

### Predicao do meta-classificador

In [8]:
def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier):
    predictions = []
    
    for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data(x_instance)
        
        instance_features = []
        for rep, model in trained_base_models.items():
            proba = model.predict_proba(x_transformed[rep])
            instance_features.extend(proba[0])
        
        meta_feature = np.array(instance_features).reshape(1, -1)
        
        # Utilizar o meta-classificador para a predição
        predicted_value = trained_meta_classifier.predict(meta_feature)[0]
        predictions.append(predicted_value)
    
    return predictions

### Testar um único modelo

In [9]:
# Treino
trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option='1nn', meta_option='svm', random_state=42)

# Teste
predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)

# Resultado
test_accuracy_meta = np.mean(predictions_test_meta == y_test)

Training Base Models: 100%|##########| 5/5 [00:00<00:00, 999.79it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 140.12it/s]


In [11]:
test_accuracy_meta

0.7333333333333333

### Treino em loop de todas as opções de classificadores

In [10]:
algos = ['1nn', '5nn', 'svm', 'nb', 'gbc', 'xgb']
for algo in algos:
    
    print(f'Meta-classificador com modelo extrator {algo.upper()}')
    
    # Training
    try:
        trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option=algo, meta_option=algo)
        # Testing
        predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)
    
        test_accuracy_meta = np.mean(predictions_test_meta == y_test)
        print(f'Acurácia do teste usando o meta-classificador com modelo extrator {algo}: {test_accuracy_meta}')
    except Exception as e:
        print(f"Ocorreu um erro no teste com o {algo}: {e}")
    print("-------------------------------")
print('RF (10x)')
for i in range(10):
    print(f'RF: Random seed = {i}')
    try:
        trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option='random_forest', random_state=i)
    
        # Testing
        predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)
    
        test_accuracy = np.mean(predictions_test_meta == y_test)
        print(f'Acurácia do teste usando seed {i}: {test_accuracy}')
    except Exception as e:
        print(f"Ocorreu um erro no teste com o RF (seed {i}): {e}")
    print("-------------------------------")

Meta-classificador com modelo extrator 1NN


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 1249.20it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 135.10it/s]


Acurácia do teste usando o meta-classificador com modelo extrator 1nn: 0.6666666666666666
-------------------------------
Meta-classificador com modelo extrator 5NN


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 999.69it/s]
Testing Instances: 100%|##########| 30/30 [00:22<00:00,  1.33it/s]


Acurácia do teste usando o meta-classificador com modelo extrator 5nn: 0.3333333333333333
-------------------------------
Meta-classificador com modelo extrator SVM


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 217.35it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 138.22it/s]


Acurácia do teste usando o meta-classificador com modelo extrator svm: 0.8333333333333334
-------------------------------
Meta-classificador com modelo extrator NB


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 714.19it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 138.22it/s]


Acurácia do teste usando o meta-classificador com modelo extrator nb: 0.7666666666666667
-------------------------------
Meta-classificador com modelo extrator GBC


Training Base Models: 100%|##########| 5/5 [00:18<00:00,  3.68s/it]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 103.78it/s]


Acurácia do teste usando o meta-classificador com modelo extrator gbc: 0.7333333333333333
-------------------------------
Meta-classificador com modelo extrator XGB


Training Base Models:   0%|          | 0/5 [00:00<?, ?it/s]


Ocorreu um erro no teste com o xgb: Invalid classes inferred from unique values of `y`.  Expected: [0 1 2 3 4], got [1 2 3 4 5]
-------------------------------
RF (10x)
RF: Random seed = 0


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.75it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.73it/s]


Acurácia do teste usando seed 0: 0.8
-------------------------------
RF: Random seed = 1


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.77it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.67it/s]


Acurácia do teste usando seed 1: 0.7666666666666667
-------------------------------
RF: Random seed = 2


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.75it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.63it/s]


Acurácia do teste usando seed 2: 0.7333333333333333
-------------------------------
RF: Random seed = 3


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.77it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.66it/s]


Acurácia do teste usando seed 3: 0.7333333333333333
-------------------------------
RF: Random seed = 4


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.76it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.20it/s]


Acurácia do teste usando seed 4: 0.8
-------------------------------
RF: Random seed = 5


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.76it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.65it/s]


Acurácia do teste usando seed 5: 0.8
-------------------------------
RF: Random seed = 6


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.48it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.35it/s]


Acurácia do teste usando seed 6: 0.7666666666666667
-------------------------------
RF: Random seed = 7


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.59it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.49it/s]


Acurácia do teste usando seed 7: 0.7666666666666667
-------------------------------
RF: Random seed = 8


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.79it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.49it/s]


Acurácia do teste usando seed 8: 0.7666666666666667
-------------------------------
RF: Random seed = 9


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.79it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 15.49it/s]

Acurácia do teste usando seed 9: 0.7666666666666667
-------------------------------



