In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import math
from xgboost import XGBRFRegressor
import mlflow

In [2]:
df = pd.read_csv('../data/processed/casas.csv')

In [3]:
df.head()

Unnamed: 0,tamanho,ano,garagem,preco
0,159.0,2003,2,208500
1,117.0,1976,2,181500
2,166.0,2001,2,223500
3,160.0,1915,3,140000
4,204.0,2000,3,250000


In [4]:
X = df.drop('preco',axis=1)
y = df['preco'].copy()

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.25,random_state=42)

In [6]:
mlflow.set_experiment('house-prices-eda')

<Experiment: artifact_location='file:///C:/Users/vitor/Documents/ambientes/mlflow_win64x/mlflow_alura/notebooks/mlruns/602980415233333545', creation_time=1741946667148, experiment_id='602980415233333545', last_update_time=1741946667148, lifecycle_stage='active', name='house-prices-eda', tags={}>

## linear regression

In [7]:
mlflow.start_run()

<ActiveRun: >

In [8]:
lr = LinearRegression()
lr.fit(X_train,y_train)

In [9]:
mlflow.sklearn.log_model(lr,'lr')



<mlflow.models.model.ModelInfo at 0x2192befd1f0>

In [10]:
y_predicted = lr.predict(X_test)

In [11]:
X_test.iloc[0]

tamanho      99.0
ano        1963.0
garagem       1.0
Name: 892, dtype: float64

In [12]:
y_test.iloc[0]

np.int64(154500)

In [13]:
mse = mean_squared_error(y_test,y_predicted)
rmse = math.sqrt(mse)
r2 = r2_score(y_test,y_predicted)
mlflow.log_metric('mse',mse)
mlflow.log_metric('rmse',rmse)
mlflow.log_metric('r2',r2)

In [14]:
mse

2146554580.6444082

In [15]:
rmse

46330.92466856676

In [16]:
len(y_predicted), len(y_test)

(365, 365)

In [17]:
r2

0.6935817442732812

In [18]:
mlflow.end_run()

## XGBoost

In [19]:
xgb_params={
    'learning_rate':0.2,
    'n_estimators':50,
    'random_state':42
}

with mlflow.start_run():
    xgb = XGBRFRegressor(**xgb_params)
    xgb.fit(X_train,y_train)
    mlflow.xgboost.log_model(xgb,'xgboost')
    xgb_predited = xgb.predict(X_test)

    mse = mean_squared_error(y_test,xgb_predited)
    rmse = math.sqrt(mse)
    r2 = r2_score(y_test,xgb_predited)
    mlflow.log_metric('mse',mse)
    mlflow.log_metric('rmse',rmse)
    mlflow.log_metric('r2',r2)



MSE quanto menor melhor. Quer dizer que modelo está errando menos. É melhor

In [20]:
mse, rmse, r2

(5056111616.0, 71106.3401955128, 0.278245747089386)

In [21]:
mlflow.get_experiment_by_name('house-prices-eda')

<Experiment: artifact_location='file:///C:/Users/vitor/Documents/ambientes/mlflow_win64x/mlflow_alura/notebooks/mlruns/602980415233333545', creation_time=1741946667148, experiment_id='602980415233333545', last_update_time=1741946667148, lifecycle_stage='active', name='house-prices-eda', tags={}>

In [33]:
mlflow.search_runs()

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.r2,metrics.rmse,metrics.mse,tags.mlflow.runName,tags.mlflow.log-model.history,tags.mlflow.user,tags.mlflow.source.name,tags.mlflow.source.type
0,097b123faf32408588de27cfb241c5a9,602980415233333545,FINISHED,file:///C:/Users/vitor/Documents/ambientes/mlf...,2025-03-15 14:54:41.621000+00:00,2025-03-15 14:54:56.810000+00:00,0.278246,71106.340196,5056112000.0,crawling-bear-123,"[{""run_id"": ""097b123faf32408588de27cfb241c5a9""...",vitor,C:\Users\vitor\Documents\ambientes\mlflow_win6...,LOCAL
1,195466eef5c14380b7d016bee3636b72,602980415233333545,FINISHED,file:///C:/Users/vitor/Documents/ambientes/mlf...,2025-03-15 14:53:53.203000+00:00,2025-03-15 14:54:41.554000+00:00,0.693582,46330.924669,2146555000.0,awesome-lark-483,"[{""run_id"": ""195466eef5c14380b7d016bee3636b72""...",vitor,C:\Users\vitor\Documents\ambientes\mlflow_win6...,LOCAL
2,aa1c28f1cb8c4cceb0b2c1c891f7bdaa,602980415233333545,FINISHED,file:///C:/Users/vitor/Documents/ambientes/mlf...,2025-03-14 10:16:04.733000+00:00,2025-03-14 10:16:12.081000+00:00,0.278246,71106.340196,5056112000.0,traveling-elk-124,"[{""run_id"": ""aa1c28f1cb8c4cceb0b2c1c891f7bdaa""...",vitor,C:\Users\vitor\Documents\ambientes\mlflow_win6...,LOCAL
3,d56d335cdb95453599f9459e67eaaf5b,602980415233333545,FINISHED,file:///C:/Users/vitor/Documents/ambientes/mlf...,2025-03-14 10:15:48.556000+00:00,2025-03-14 10:16:04.706000+00:00,0.693582,46330.924669,2146555000.0,luminous-auk-264,"[{""run_id"": ""d56d335cdb95453599f9459e67eaaf5b""...",vitor,C:\Users\vitor\Documents\ambientes\mlflow_win6...,LOCAL


In [32]:
mlflow.get_run('097b123faf32408588de27cfb241c5a9')

<Run: data=<RunData: metrics={'mse': 5056111616.0, 'r2': 0.278245747089386, 'rmse': 71106.3401955128}, params={}, tags={'mlflow.log-model.history': '[{"run_id": "097b123faf32408588de27cfb241c5a9", '
                             '"artifact_path": "xgboost", "utc_time_created": '
                             '"2025-03-15 14:54:41.965566", "model_uuid": '
                             '"0e0584e3a918427782287b3b2f577690", "flavors": '
                             '{"python_function": {"loader_module": '
                             '"mlflow.xgboost", "python_version": "3.12.9", '
                             '"data": "model.xgb", "env": {"conda": '
                             '"conda.yaml", "virtualenv": "python_env.yaml"}}, '
                             '"xgboost": {"xgb_version": "2.1.4", "data": '
                             '"model.xgb", "model_class": '
                             '"xgboost.sklearn.XGBRFRegressor", '
                             '"model_format": "xgb", "code": null

In [25]:
dir(mlflow)

['ActiveRun',
 'Image',
 'LazyLoader',
 'MLFLOW_CONFIGURE_LOGGING',
 'MlflowClient',
 'MlflowException',
 'RunOperations',
 'VERSION',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_configure_mlflow_loggers',
 'active_run',
 'add_trace',
 'anthropic',
 'artifacts',
 'autogen',
 'autolog',
 'azure',
 'bedrock',
 'catboost',
 'client',
 'config',
 'contextlib',
 'create_experiment',
 'crewai',
 'data',
 'delete_expectation',
 'delete_experiment',
 'delete_feedback',
 'delete_run',
 'delete_tag',
 'disable_system_metrics_logging',
 'diviner',
 'doctor',
 'dspy',
 'enable_system_metrics_logging',
 'end_run',
 'entities',
 'environment_variables',
 'evaluate',
 'exceptions',
 'fastai',
 'flush_artifact_async_logging',
 'flush_async_logging',
 'flush_trace_async_logging',
 'gateway',
 'gemini',
 'get_artifact_uri',
 'get_current_active_span',
 'get_experiment',
 'get_experiment_by_n