In [1]:
import numpy as np
import pandas as pd
import pickle
import os
from pathlib import Path

from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import onnxruntime as rt

try:
    from onnxmltools.convert import convert_xgboost, convert_lightgbm
    xgb_lgbm_available = True
except:
    xgb_lgbm_available = False
    print("onnxmltools nao disponivel, XGBoost e LightGBM nao serao convertidos")

print(f"ONNX Runtime version: {rt.__version__}")

ONNX Runtime version: 1.23.2


In [2]:
models_dir = '../models/'
onnx_dir = '../models/onnx/'
os.makedirs(onnx_dir, exist_ok=True)

X_train = np.load('../data/processed/X_train.npy')
X_test = np.load('../data/processed/X_test.npy')
y_test = np.load('../data/processed/y_test.npy')

print(f"Shape X_train: {X_train.shape}")
print(f"Shape X_test: {X_test.shape}")

Shape X_train: (472, 27)
Shape X_test: (119, 27)


## Funcao para converter modelo sklearn para ONNX

In [3]:
def convert_model_to_onnx(model, model_name, n_features, output_path):
    initial_type = [('float_input', FloatTensorType([None, n_features]))]
    
    try:
        onnx_model = convert_sklearn(model, initial_types=initial_type, target_opset=12)
        
        with open(output_path, "wb") as f:
            f.write(onnx_model.SerializeToString())
        
        print(f"Modelo {model_name} convertido com sucesso")
        return True
    except Exception as e:
        print(f"Erro ao converter {model_name}: {str(e)}")
        return False

## Funcao para testar modelo ONNX

In [4]:
def test_onnx_model(onnx_path, sklearn_model, X_sample):
    sess = rt.InferenceSession(onnx_path)
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name
    
    X_sample_float = X_sample.astype(np.float32)
    
    pred_onnx = sess.run([label_name], {input_name: X_sample_float})[0]
    
    pred_sklearn = sklearn_model.predict_proba(X_sample)[:, 1]
    
    if hasattr(pred_onnx[0], '__len__'):
        pred_onnx_proba = pred_onnx[:, 1]
    else:
        pred_onnx_proba = pred_onnx
    
    diff = np.abs(pred_sklearn - pred_onnx_proba).mean()
    
    print(f"Diferenca media entre predicoes: {diff:.6f}")
    
    if diff < 0.001:
        print("Modelo ONNX validado com sucesso")
        return True
    else:
        print("Atencao: diferenca significativa detectada")
        return False

## Carregar e converter modelos individuais

In [5]:
model_files = [f for f in os.listdir(models_dir) if f.endswith('.pkl')]

print(f"Total de modelos encontrados: {len(model_files)}")
print("Modelos:")
for f in model_files:
    print(f"  - {f}")

Total de modelos encontrados: 36
Modelos:
  - ensemble_rank_averaging.pkl
  - svm_rbf_bayessearch.pkl
  - ensemble_voting_optimized.pkl
  - advanced_xgboost_top15.pkl
  - logistic_regression_optimized.pkl
  - logistic_regression_bayessearch.pkl
  - svm_rbf_optimized.pkl
  - ensemble_weighted_rank.pkl
  - advanced_xgboost_all.pkl
  - advanced_mlp_all.pkl
  - advanced_lightgbm_top15.pkl
  - bayesopt_mlp_rfe.pkl
  - bayesopt_gradient_boosting_all.pkl
  - advanced_mlp_top10.pkl
  - svm_linear_baseline.pkl
  - bayesopt_mlp_all.pkl
  - naive_bayes_baseline.pkl
  - ensemble_baseline_advanced.pkl
  - logistic_regression_baseline.pkl
  - random_forest_bayessearch.pkl
  - bayesopt_gradient_boosting_rfe.pkl
  - advanced_lightgbm_top10.pkl
  - bayesopt_lightgbm_rfe.pkl
  - advanced_lightgbm_all.pkl
  - bayesopt_elastic_net_all.pkl
  - random_forest_baseline.pkl
  - random_forest_optimized.pkl
  - bayesopt_xgboost_rfe.pkl
  - bayesopt_svm_rbf_rfe.pkl
  - advanced_mlp_top15.pkl
  - bayesopt_xgboost_

In [6]:
conversion_results = []

for model_file in model_files:
    if 'ensemble' in model_file.lower():
        print(f"Pulando {model_file} (ensemble nao suportado)")
        continue
    
    print(f"\nProcessando: {model_file}")
    
    model_path = os.path.join(models_dir, model_file)
    
    try:
        with open(model_path, 'rb') as f:
            model_data = pickle.load(f)
    except Exception as e:
        print(f"  Erro ao carregar: {str(e)[:100]}")
        conversion_results.append({
            'model': model_file.replace('.pkl', ''),
            'converted': False,
            'validated': False,
            'path': None
        })
        continue
    
    if hasattr(model_data, 'best_estimator_'):
        model = model_data.best_estimator_
        print(f"  Extraindo best_estimator_")
    elif hasattr(model_data, 'predict_proba'):
        model = model_data
    elif isinstance(model_data, dict) and 'model' in model_data:
        model = model_data['model']
    else:
        print(f"  Formato nao reconhecido, pulando")
        continue
    
    model_type = type(model).__name__
    if 'XGB' in model_type or 'LightGBM' in model_type:
        print(f"  Tipo {model_type} - requer onnxmltools, pulando")
        conversion_results.append({
            'model': model_file.replace('.pkl', ''),
            'converted': False,
            'validated': False,
            'path': None
        })
        continue
    
    n_features = X_train.shape[1]
    model_name = model_file.replace('.pkl', '')
    onnx_path = os.path.join(onnx_dir, f"{model_name}.onnx")
    
    success = convert_model_to_onnx(model, model_name, n_features, onnx_path)
    
    if success:
        try:
            X_sample = X_test[:10]
            validation = test_onnx_model(onnx_path, model, X_sample)
        
        conversion_results.append({
            'model': model_name,
            'converted': True,
            'validated': validation,
            'path': onnx_path
        })
    else:
        conversion_results.append({
            'model': model_name,
            'converted': False,
            'validated': False,
            'path': None
        })

print("\n" + "="*50)
print("Conversao finalizada")

SyntaxError: expected 'except' or 'finally' block (3897015551.py, line 58)

## Resultados da conversao

In [7]:
df_results = pd.DataFrame(conversion_results)

print("\nResumo:")
print(f"Total de modelos processados: {len(df_results)}")
print(f"Convertidos com sucesso: {df_results['converted'].sum()}")
print(f"Validados com sucesso: {df_results['validated'].sum()}")

print("\nDetalhes:")
df_results

NameError: name 'conversion_results' is not defined

## Salvar informacoes dos modelos ONNX

In [8]:
model_info = {
    'n_features': X_train.shape[1],
    'feature_names_path': '../data/processed/feature_names.txt',
    'models': conversion_results,
    'preprocessing': {
        'scaler_path': '../data/processed/scaler.pkl',
        'label_encoder_path': '../data/processed/label_encoder.pkl'
    }
}

with open(os.path.join(onnx_dir, 'model_info.pkl'), 'wb') as f:
    pickle.dump(model_info, f)

print("Informacoes dos modelos salvas em model_info.pkl")

NameError: name 'conversion_results' is not defined

## Exemplo de uso do modelo ONNX

In [9]:
successful_models = df_results[df_results['validated'] == True]

if len(successful_models) > 0:
    example_model_path = successful_models.iloc[0]['path']
    print(f"Testando modelo: {successful_models.iloc[0]['model']}")
    
    sess = rt.InferenceSession(example_model_path)
    input_name = sess.get_inputs()[0].name
    label_name = sess.get_outputs()[0].name
    
    print(f"\nInput name: {input_name}")
    print(f"Input shape: {sess.get_inputs()[0].shape}")
    print(f"Output name: {label_name}")
    print(f"Output shape: {sess.get_outputs()[0].shape}")
    
    X_example = X_test[:5].astype(np.float32)
    predictions = sess.run([label_name], {input_name: X_example})[0]
    
    print(f"\nPredicoes para 5 exemplos:")
    if hasattr(predictions[0], '__len__') and len(predictions[0]) == 2:
        proba_class_1 = predictions[:, 1]
        print(f"Probabilidade de conversao: {proba_class_1}")
    else:
        print(f"Predicoes: {predictions}")
else:
    print("Nenhum modelo validado disponivel para exemplo")

NameError: name 'df_results' is not defined

## Criar arquivo de metadados para a API

In [10]:
with open('../data/processed/feature_names.txt', 'r') as f:
    feature_names = [line.strip() for line in f.readlines()]

api_metadata = {
    'task': 'binary_classification',
    'problem': 'Predicao de conversao MCI para Demencia em 3 anos',
    'target_classes': ['Nao-Conversor', 'Conversor'],
    'n_features': len(feature_names),
    'feature_names': feature_names,
    'models_available': [m['model'] for m in conversion_results if m['validated']],
    'best_model': successful_models.iloc[0]['model'] if len(successful_models) > 0 else None,
    'onnx_directory': onnx_dir
}

import json
with open(os.path.join(onnx_dir, 'api_metadata.json'), 'w') as f:
    json.dump(api_metadata, f, indent=2)

print("Metadados para API salvos em api_metadata.json")
print(f"\nModelos disponiveis para a API: {len(api_metadata['models_available'])}")

NameError: name 'conversion_results' is not defined