# Desenvolvimento do modelo de predição de preço

Exploração inicial de modelos de base

In [2]:
import pandas as pd
from dagshub.data_engine import datasources
import mlflow
import dagshub
from sklearn.model_selection import train_test_split
import mlflow.sklearn
import mlflow.catboost
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from sklearn.tree import DecisionTreeRegressor
from sklearn.neural_network import MLPRegressor
from xgboost import XGBRegressor
import lightgbm as lgb
from sklearn.svm import SVR
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as C
import mlflow.models.signature
from mlflow.models import infer_signature
from catboost import CatBoostRegressor

## Carregando Dataset

In [5]:
ds = datasources.get_datasource("wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance", "processed")

In [6]:
ds.all().dataframe

Output()

Unnamed: 0,path,datapoint_id,dagshub_download_url,media type,size
0,quantum_finance_test_processed.csv,103597636,https://dagshub.com/api/v1/repos/wagnerdataset...,text/plain,12949087
1,quantum_finance_train_processed.csv,103597637,https://dagshub.com/api/v1/repos/wagnerdataset...,text/plain,26058129


In [7]:
res = ds.head()

for dp in res:
    dataset_url = dp.download_url

Output()

In [8]:
dataset_url

'https://dagshub.com/api/v1/repos/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance/raw/main/data/processed/quantum_finance_train_processed.csv'

In [9]:
df = pd.read_csv(dataset_url)
df.head()

Unnamed: 0,Age,Occupation,Annual_Income,Num_Bank_Accounts,Num_Credit_Card,Interest_Rate,Num_of_Loan,Delay_from_due_date,Num_of_Delayed_Payment,Num_Credit_Inquiries,Credit_Mix,Outstanding_Debt,Credit_Utilization_Ratio,Credit_History_Age,Payment_of_Min_Amount,Total_EMI_per_month,Amount_invested_monthly,Payment_Behaviour,Monthly_Balance,Credit_Score
0,23.0,13,19114.12,3,4,3,4,3,7.0,4.0,3,809.98,26.82262,265.0,1,49.574949,80.415295,2,312.494089,2
1,23.0,13,19114.12,3,4,3,4,-1,,4.0,1,809.98,31.94496,219.0,1,49.574949,118.280222,3,284.629162,2
2,33.0,13,19114.12,3,4,3,4,3,7.0,4.0,1,809.98,28.609352,267.0,1,49.574949,81.699521,4,331.209863,2
3,23.0,13,19114.12,3,4,3,4,5,4.0,4.0,1,809.98,31.377862,268.0,1,49.574949,199.458074,5,223.45131,2
4,23.0,13,19114.12,3,4,3,4,6,,4.0,1,809.98,24.797347,269.0,1,49.574949,41.420153,1,341.489231,2


## Desenvolvimento e experimentos de modelos

In [10]:
dagshub.init(repo_owner="wagnerdataset", repo_name="fiap-ds-mlops-10dtsr-quantum-finance", mlflow=True)

In [11]:
mlflow.autolog()

2025/08/02 19:07:11 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2025/08/02 19:07:16 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2025/08/02 19:07:16 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


In [12]:
features = list(df.columns)
features.remove("Credit_Score")

features

['Age',
 'Occupation',
 'Annual_Income',
 'Num_Bank_Accounts',
 'Num_Credit_Card',
 'Interest_Rate',
 'Num_of_Loan',
 'Delay_from_due_date',
 'Num_of_Delayed_Payment',
 'Num_Credit_Inquiries',
 'Credit_Mix',
 'Outstanding_Debt',
 'Credit_Utilization_Ratio',
 'Credit_History_Age',
 'Payment_of_Min_Amount',
 'Total_EMI_per_month',
 'Amount_invested_monthly',
 'Payment_Behaviour',
 'Monthly_Balance']

In [13]:
X = df[features]

In [14]:
len(features)

19

In [15]:
y = df["Credit_Score"]
y

0        2
1        2
2        2
3        2
4        2
        ..
99995    0
99996    0
99997    0
99998    1
99999    0
Name: Credit_Score, Length: 100000, dtype: int64

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [17]:
def evaluate_and_log_model(kind, model_name, model, X_test, y_test):
    predictions = model.predict(X_test)

    mse = mean_squared_error(y_test, predictions)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    mape = mean_absolute_percentage_error(y_test, predictions)

    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("MAE", mae)
    mlflow.log_metric("R2", r2)
    mlflow.log_metric("MAPE", mape)

    signature = infer_signature(X_test, predictions)

    if kind == "catboost":
        mlflow.catboost.log_model(model, model_name, signature=signature, input_example=X_test[:5])
    elif kind == "xgboost":
        mlflow.xgboost.log_model(model, model_name, signature=signature, input_example=X_test[:5])
    elif kind == "lightgbm":
        mlflow.lightgbm.log_model(model, model_name, signature=signature, input_example=X_test[:5])
    else:
        mlflow.sklearn.log_model(model, model_name, signature=signature, input_example=X_test[:5])

    print(f"Model {model_name} logged with MSE: {mse}, MAE: {mae}, R2: {r2}, MAPE: {mape}")

### Decision Tree Regressor

In [54]:
with mlflow.start_run(run_name="Decision Tree Regression"):
    param_grid = {
        'max_depth': [None, 3, 5, 10, 15],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    
    ridge = DecisionTreeRegressor()

    grid_search = GridSearchCV(ridge, param_grid, cv=5, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False))
    grid_search.fit(X_train, y_train)

    best_model = grid_search.best_estimator_

    mlflow.log_param("best_max_depth", grid_search.best_params_['max_depth'])
    mlflow.log_param("best_min_samples_split", grid_search.best_params_['min_samples_split'])
    mlflow.log_param("best_min_samples_leaf", grid_search.best_params_['min_samples_leaf'])

    evaluate_and_log_model("sklearn", "ridge_regression", best_model, X_test, y_test)

2025/08/02 15:03:09 INFO mlflow.sklearn.utils: Logging the 5 best runs, 40 runs will be omitted.


🏃 View run invincible-rat-596 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/953c9fc501444c4ab89e7103563af7c3
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run rogue-asp-237 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/d68885d59b9f4e1ea9c028f79b6a490d
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run popular-conch-487 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/58fe172c006940f388df315a55716b95
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run rogue-pug-385 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/862bb51406d34d579c783cd43bf97960



Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Model ridge_regression logged with MSE: 0.38506666666666667, MAE: 0.34413333333333335, R2: 0.15817847573857158, MAPE: 504403158265495.7
🏃 View run Decision Tree Regression at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/b89a7a5d70954442a54c2098a47f4933
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0


🏃 View run dazzling-mule-65 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/1a41ef84525d4917b7a43f8f68e8ca85
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run dapper-stag-525 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/af3ba6e874a54ca6ba7f984e3255df6c
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run traveling-croc-651 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/ba733bf1172b448e808e7faa49098977
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run serious-lamb-652 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/628fdab1f64747a5a426362a6ef4

## XGBoost

In [20]:
with mlflow.start_run(run_name="XGBoost_Regressor_Advanced"):
   
    param_grid = {
        'n_estimators': [100, 200],
        'max_depth': [3, 5, 7],
        'learning_rate': [0.01, 0.1],
        'subsample': [0.8, 1.0],
        'colsample_bytree': [0.8, 1.0],
        'gamma': [0, 1],
        'reg_alpha': [0, 0.1],
        'reg_lambda': [1, 5],
        'min_child_weight': [1, 3]
    }
   
    xgb = XGBRegressor(random_state=42, verbosity=0)
    grid_search = GridSearchCV(xgb, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_max_depth", best_model.max_depth)
    mlflow.log_param("best_learning_rate", best_model.learning_rate)
    evaluate_and_log_model("xgboost", "XGBoost Regressor", best_model, X_test, y_test)

2025/08/02 20:26:23 INFO mlflow.sklearn.utils: Logging the 5 best runs, 763 runs will be omitted.
  self.get_booster().save_model(fname)


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Model XGBoost Regressor logged with MSE: 0.21641436219215393, MAE: 0.35510239005088806, R2: 0.5268812775611877, MAPE: 591370959978496.0
🏃 View run XGBoost_Regressor_Advanced at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/75dbcb193d214caba6ff676c87d036cb
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0


## Nova abordagem XGBoost

In [18]:
with mlflow.start_run(run_name="XGBoost_Regressor"):
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [3, 5, 7, 9],
        'learning_rate': [0.01, 0.1, 0.2, 0.3]
    }
    xgb = XGBRegressor(random_state=42, verbosity=0)
    grid_search = GridSearchCV(xgb, param_grid, scoring=make_scorer(mean_absolute_percentage_error, greater_is_better=False), cv=5)
    grid_search.fit(X_train, y_train)
    best_model = grid_search.best_estimator_

    mlflow.log_param("best_n_estimators", best_model.n_estimators)
    mlflow.log_param("best_max_depth", best_model.max_depth)
    mlflow.log_param("best_learning_rate", best_model.learning_rate)
    evaluate_and_log_model("xgboost", "XGBoost Regressor", best_model, X_test, y_test)

2025/08/02 19:18:36 INFO mlflow.sklearn.utils: Logging the 5 best runs, 43 runs will be omitted.


🏃 View run delightful-fly-627 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/a39f3ab7d9de471d9ecb641726f9f502
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run luminous-croc-596 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/75f55e176e9c421bb1a65ef580edc264
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run popular-vole-367 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/c85482608bd549d7bb2af41946b8b094
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run ambitious-whale-798 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/98b16ea25cad463b86f6d89

  self.get_booster().save_model(fname)


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Model XGBoost Regressor logged with MSE: 0.1858082264661789, MAE: 0.3244054615497589, R2: 0.5937914848327637, MAPE: 497322383376384.0
🏃 View run XGBoost_Regressor at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/a174eb896c9347a59274ff008c685f18
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0


🏃 View run grandiose-stag-16 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/42021c916584420b8196982264c43331
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run blushing-hound-814 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/72aa5907e0c6455fba1d517915a5b6b9
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run orderly-shrike-537 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/13e0045721be43b5983a6b8a240ade83
🧪 View experiment at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0
🏃 View run caring-flea-49 at: https://dagshub.com/wagnerdataset/fiap-ds-mlops-10dtsr-quantum-finance.mlflow/#/experiments/0/runs/547246a4f36f45f98c98a330e6

## Registro de Modelo em Produção

In [21]:
run_id = "75dbcb193d214caba6ff676c87d036cb"

mlflow.register_model(model_uri=f"runs:/{run_id}/model", name="quantum-finance-model")

Registered model 'quantum-finance-model' already exists. Creating a new version of this model...
2025/08/02 20:56:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: quantum-finance-model, version 3
Created version '3' of model 'quantum-finance-model'.


<ModelVersion: aliases=[], creation_timestamp=1754178992778, current_stage='None', description='', last_updated_timestamp=1754178992778, name='quantum-finance-model', run_id='75dbcb193d214caba6ff676c87d036cb', run_link='', source='mlflow-artifacts:/9af8db55c0b241cc842637ef3f2e2fc4/75dbcb193d214caba6ff676c87d036cb/artifacts/model', status='READY', status_message=None, tags={}, user_id='', version='3'>