In [1]:
from sagemaker.workflow.function_step import step
from sagemaker.workflow.pipeline import Pipeline
import sagemaker
from sagemaker.workflow.parameters import ParameterString, ParameterInteger
from sagemaker.workflow.execution_variables import ExecutionVariables
import utils

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


## GLOBAL VARIABLES

In [2]:
user = utils.get_username()

role = sagemaker.get_execution_role()
default_bucket = "mlops-chester"
default_prefix = "sagemaker/bank-attrition-detection"
default_path = default_bucket + "/" + default_prefix
sagemaker_session = sagemaker.Session(default_bucket=default_bucket,
                                      default_bucket_prefix=default_prefix)

instance_type = "ml.m5.2xlarge"
pipeline_name = "pipeline-inference"
model_version = "latest"
model_name = "attrition-detection-model"
cod_month = ParameterString(name="PeriodoCargaClientes")
cod_month_start = ParameterInteger(name="PeriodoCargaRequerimientosInicio")
cod_month_end = ParameterInteger(name="PeriodoCargaRequerimientosFin")

tracking_server_arn = 'arn:aws:sagemaker:us-east-2:762233743642:mlflow-tracking-server/mlops-mlflow-server'
experiment_name = "pipeline-inference-attrition-detection"

## Data pull

In [3]:
@step(
    name="DataPull",
    instance_type=instance_type
)
def data_pull(experiment_name: str, run_name: str, cod_month: str, cod_month_start: str, cod_month_end: str) -> str:
    import mlflow
    from mlflow.artifacts import download_artifacts
    import subprocess
    subprocess.run(['pip', 'install', 'awswrangler==3.12.0']) 
    import awswrangler as wr
    import os
    from sklearn.preprocessing import LabelEncoder
    from sklearn.preprocessing import StandardScaler
    import numpy as np
    import pandas as pd
    import pickle
    from sklearn.model_selection import train_test_split
    import tempfile
    output_dir = tempfile.mkdtemp()
    import boto3
    s3 = boto3.client('s3')

    TARGET_COL = 'ATTRITION'
    query_clientes = """
       SELECT
        TRY_CAST(id_correlativo AS BIGINT) AS id_correlativo,
        TRY_CAST(codmes AS BIGINT) AS codmes,
        TRY_CAST(flg_bancarizado AS BIGINT) AS flg_bancarizado,
        rang_ingreso,
        flag_lima_provincia,
        TRY_CAST(edad AS DOUBLE) AS edad,
        TRY_CAST(antiguedad AS DOUBLE) AS antiguedad,
        rang_sdo_pasivo_menos0,
        TRY_CAST(sdo_activo_menos0 AS BIGINT) AS sdo_activo_menos0,
        TRY_CAST(sdo_activo_menos1 AS BIGINT) AS sdo_activo_menos1,
        TRY_CAST(sdo_activo_menos2 AS BIGINT) AS sdo_activo_menos2,
        TRY_CAST(sdo_activo_menos3 AS BIGINT) AS sdo_activo_menos3,
        TRY_CAST(sdo_activo_menos4 AS BIGINT) AS sdo_activo_menos4,
        TRY_CAST(sdo_activo_menos5 AS BIGINT) AS sdo_activo_menos5,
        TRY_CAST(flg_seguro_menos0 AS BIGINT) AS flg_seguro_menos0,
        TRY_CAST(flg_seguro_menos1 AS BIGINT) AS flg_seguro_menos1,
        TRY_CAST(flg_seguro_menos2 AS BIGINT) AS flg_seguro_menos2,
        TRY_CAST(flg_seguro_menos3 AS BIGINT) AS flg_seguro_menos3,
        TRY_CAST(flg_seguro_menos4 AS BIGINT) AS flg_seguro_menos4,
        TRY_CAST(flg_seguro_menos5 AS BIGINT) AS flg_seguro_menos5,
        rang_nro_productos_menos0,
        TRY_CAST(flg_nomina AS BIGINT) AS flg_nomina,
        TRY_CAST(nro_acces_canal1_menos0 AS BIGINT) AS nro_acces_canal1_menos0,
        TRY_CAST(nro_acces_canal1_menos1 AS BIGINT) AS nro_acces_canal1_menos1,
        TRY_CAST(nro_acces_canal1_menos2 AS BIGINT) AS nro_acces_canal1_menos2,
        TRY_CAST(nro_acces_canal1_menos3 AS BIGINT) AS nro_acces_canal1_menos3,
        TRY_CAST(nro_acces_canal1_menos4 AS BIGINT) AS nro_acces_canal1_menos4,
        TRY_CAST(nro_acces_canal1_menos5 AS BIGINT) AS nro_acces_canal1_menos5,
        TRY_CAST(nro_acces_canal2_menos0 AS BIGINT) AS nro_acces_canal2_menos0,
        TRY_CAST(nro_acces_canal2_menos1 AS BIGINT) AS nro_acces_canal2_menos1,
        TRY_CAST(nro_acces_canal2_menos2 AS BIGINT) AS nro_acces_canal2_menos2,
        TRY_CAST(nro_acces_canal2_menos3 AS BIGINT) AS nro_acces_canal2_menos3,
        TRY_CAST(nro_acces_canal2_menos4 AS BIGINT) AS nro_acces_canal2_menos4,
        TRY_CAST(nro_acces_canal2_menos5 AS BIGINT) AS nro_acces_canal2_menos5,
        TRY_CAST(nro_acces_canal3_menos0 AS BIGINT) AS nro_acces_canal3_menos0,
        TRY_CAST(nro_acces_canal3_menos1 AS BIGINT) AS nro_acces_canal3_menos1,
        TRY_CAST(nro_acces_canal3_menos2 AS BIGINT) AS nro_acces_canal3_menos2,
        TRY_CAST(nro_acces_canal3_menos3 AS BIGINT) AS nro_acces_canal3_menos3,
        TRY_CAST(nro_acces_canal3_menos4 AS BIGINT) AS nro_acces_canal3_menos4,
        TRY_CAST(nro_acces_canal3_menos5 AS BIGINT) AS nro_acces_canal3_menos5,
        TRY_CAST(nro_entid_ssff_menos0 AS BIGINT) AS nro_entid_ssff_menos0,
        TRY_CAST(nro_entid_ssff_menos1 AS BIGINT) AS nro_entid_ssff_menos1,
        TRY_CAST(nro_entid_ssff_menos2 AS BIGINT) AS nro_entid_ssff_menos2,
        TRY_CAST(nro_entid_ssff_menos3 AS BIGINT) AS nro_entid_ssff_menos3,
        TRY_CAST(nro_entid_ssff_menos4 AS BIGINT) AS nro_entid_ssff_menos4,
        TRY_CAST(nro_entid_ssff_menos5 AS BIGINT) AS nro_entid_ssff_menos5,
        TRY_CAST(flg_sdo_otssff_menos0 AS BIGINT) AS flg_sdo_otssff_menos0,
        TRY_CAST(flg_sdo_otssff_menos1 AS BIGINT) AS flg_sdo_otssff_menos1,
        TRY_CAST(flg_sdo_otssff_menos2 AS BIGINT) AS flg_sdo_otssff_menos2,
        TRY_CAST(flg_sdo_otssff_menos3 AS BIGINT) AS flg_sdo_otssff_menos3,
        TRY_CAST(flg_sdo_otssff_menos4 AS BIGINT) AS flg_sdo_otssff_menos4,
        TRY_CAST(flg_sdo_otssff_menos5 AS BIGINT) AS flg_sdo_otssff_menos5
        FROM oot_clientes_sample
        WHERE codmes = '{}';
    """.format(cod_month)
    
    query_requerimientos = """
        SELECT *
        FROM oot_requerimientos_sample
        WHERE codmes between {} and {};
        """.format(cod_month_start, cod_month_end)
    
    train_s3_path = f"s3://{default_path}"

    mlflow.set_tracking_uri(tracking_server_arn)
    mlflow.set_experiment(experiment_name)

    def prepare_impute_missing(df_data, x_cols):
        df_data_imputed = df_data.copy()
        s3_key = f'{default_prefix}/outputs/preprocess/imputacion_parametros.csv'
        local_path = 'imputacion_parametros.csv'
        s3.download_file(default_bucket, s3_key, local_path)
        df_impute_parameters = pd.read_csv(local_path)
        for col in x_cols:
            impute_value = df_impute_parameters[df_impute_parameters["variable"]==col]["valor"].values[0]
            df_data_imputed[col] = df_data_imputed[col].fillna(impute_value)
        return df_data_imputed
       
    def generar_variables_ingenieria(clientes_df):
        clientes_df["VAR_SDO_ACTIVO_6M"] = clientes_df["SDO_ACTIVO_MENOS0"] - clientes_df["SDO_ACTIVO_MENOS5"]
        clientes_df["PROM_SDO_ACTIVO_0M_2M"] = clientes_df[[f"SDO_ACTIVO_MENOS{i}" for i in range(3)]].mean(axis=1)
        clientes_df["PROM_SDO_ACTIVO_3M_5M"] = clientes_df[[f"SDO_ACTIVO_MENOS{i}" for i in range(3, 6)]].mean(axis=1)
        clientes_df["VAR_SDO_ACTIVO_3M"] = clientes_df["PROM_SDO_ACTIVO_0M_2M"] - clientes_df["PROM_SDO_ACTIVO_3M_5M"]
        clientes_df["PROM_SDO_ACTIVO_6M"] = clientes_df[[f"SDO_ACTIVO_MENOS{i}" for i in range(6)]].mean(axis=1)
        clientes_df["MESES_CON_SEGURO"] = clientes_df[[f"FLG_SEGURO_MENOS{i}" for i in range(6)]].sum(axis=1)
        for canal in [1, 2, 3]:
            base = f"NRO_ACCES_CANAL{canal}_MENOS"
            clientes_df[f"VAR_NRO_ACCES_CANAL{canal}_6M"] = clientes_df[f"{base}0"] - clientes_df[f"{base}5"]
            clientes_df[f"PROM_NRO_ACCES_CANAL{canal}_6M"] = clientes_df[[f"{base}{i}" for i in range(6)]].mean(axis=1)
            clientes_df[f"PROM_NRO_ACCES_CANAL{canal}_0M_2M"] = clientes_df[[f"{base}{i}" for i in range(3)]].mean(axis=1)
            clientes_df[f"PROM_NRO_ACCES_CANAL{canal}_3M_5M"] = clientes_df[[f"{base}{i}" for i in range(3, 6)]].mean(axis=1)
            clientes_df[f"VAR_NRO_ACCES_CANAL{canal}_3M"] = (clientes_df[f"PROM_NRO_ACCES_CANAL{canal}_0M_2M"] - clientes_df[f"PROM_NRO_ACCES_CANAL{canal}_3M_5M"])
        clientes_df["PROM_NRO_ENTID_SSFF_6M"] = clientes_df[[f"NRO_ENTID_SSFF_MENOS{i}" for i in range(6)]].mean(axis=1)
        clientes_df["VAR_NRO_ENTID_SSFF_6M"] = clientes_df["NRO_ENTID_SSFF_MENOS0"] - clientes_df["NRO_ENTID_SSFF_MENOS5"]
        clientes_df["PROM_NRO_ENTID_SSFF_0M_2M"] = clientes_df[[f"NRO_ENTID_SSFF_MENOS{i}" for i in range(3)]].mean(axis=1)
        clientes_df["PROM_NRO_ENTID_SSFF_3M_5M"] = clientes_df[[f"NRO_ENTID_SSFF_MENOS{i}" for i in range(3, 6)]].mean(axis=1)
        clientes_df["VAR_NRO_ENTID_SSFF_3M"] = (clientes_df["PROM_NRO_ENTID_SSFF_0M_2M"] - clientes_df["PROM_NRO_ENTID_SSFF_3M_5M"])
        clientes_df["MESES_CON_SALDO"] = clientes_df[[f"FLG_SDO_OTSSFF_MENOS{i}" for i in range(6)]].sum(axis=1)
        return clientes_df

    def construir_variables_requerimientos(df_reqs, id_col='ID_CORRELATIVO'):
        total_reqs = df_reqs.groupby(id_col).size().rename('total_requerimientos')
        if not isinstance(total_reqs, pd.DataFrame):
            total_reqs = total_reqs.to_frame()
        n_tipo_req = df_reqs.groupby(id_col)['TIPO_REQUERIMIENTO2'].nunique().rename('nro_tipos_requerimiento').to_frame()
        n_dictamen = df_reqs.groupby(id_col)['DICTAMEN'].nunique().rename('nro_dictamenes').to_frame()
        n_producto = df_reqs.groupby(id_col)['PRODUCTO_SERVICIO_2'].nunique().rename('nro_productos_servicios').to_frame()
        n_submotivo = df_reqs.groupby(id_col)['SUBMOTIVO_2'].nunique().rename('nro_submotivos').to_frame()
        tipo_ohe = pd.get_dummies(df_reqs['TIPO_REQUERIMIENTO2'], prefix='tipo')
        tipo_ohe[id_col] = df_reqs[id_col]
        tipo_ohe = tipo_ohe.groupby(id_col).sum()
        dictamen_ohe = pd.get_dummies(df_reqs['DICTAMEN'], prefix='dictamen')
        dictamen_ohe[id_col] = df_reqs[id_col]
        dictamen_ohe = dictamen_ohe.groupby(id_col).sum()
        df_agregado = pd.concat([total_reqs, n_tipo_req, n_dictamen, n_producto, n_submotivo, tipo_ohe, dictamen_ohe],axis=1)
        return df_agregado
    
    def apply_label_encoders_to_test(df_test):
        df_test['RANG_SDO_PASIVO_MENOS0'] = df_test['RANG_SDO_PASIVO_MENOS0'].replace('Cero', 'Rango_SDO_00')
        df_test['FLAG_LIMA_PROVINCIA'] = df_test['FLAG_LIMA_PROVINCIA'].map({'Lima': 1, 'Provincia': 0})
        s3_key = f'{default_prefix}/outputs/preprocess/label_encoder_train.pkl'
        local_path = 'label_encoder_train.pkl'
        s3.download_file(default_bucket, s3_key, local_path)
        with open(local_path, 'rb') as f:
            encoders_clientes = pickle.load(f)
        for col, le in encoders_clientes.items():
            df_test[col] = le.transform(df_test[col])
        return df_test
    
    def aplicar_estandarizacion_test(df_test):
        s3_key = f'{default_prefix}/outputs/preprocess/scaler_train.pkl'
        local_path = 'scaler_train.pkl'
        s3.download_file(default_bucket, s3_key, local_path)
        with open(local_path, 'rb') as f:
            scaler = pickle.load(f)
        no_escalar = ['ID_CORRELATIVO', 'CODMES']
        columnas_a_escalar = df_test.columns.difference(no_escalar)
        df_predictoras = df_test[columnas_a_escalar]
        df_escaladas = pd.DataFrame(scaler.transform(df_predictoras),columns=columnas_a_escalar,index=df_test.index)
        df_test_estandarizado = pd.concat([df_test[no_escalar], df_escaladas], axis=1)
        return df_test_estandarizado
    
    def prepare_dataset(df_data_test, df_requerimientos_test):
        x_cols_clientes = ['RANG_INGRESO','FLAG_LIMA_PROVINCIA','EDAD','ANTIGUEDAD']
        x_cols_requerimientos = ['DICTAMEN']
        df_data_imputed_clientes = prepare_impute_missing(df_data_test, x_cols_clientes)
        df_data_imputed_requerimientos = prepare_impute_missing(df_requerimientos_test, x_cols_requerimientos)
        df_data_feature_clientes = generar_variables_ingenieria(df_data_imputed_clientes)
        df_data_feature_requerimientos = construir_variables_requerimientos(df_data_imputed_requerimientos)
        df_data_encoder_clientes = apply_label_encoders_to_test(df_data_feature_clientes)
        df_final = df_data_encoder_clientes.merge(df_data_feature_requerimientos, on='ID_CORRELATIVO', how='left')
        df_final.fillna(0, inplace=True)
        df_final = aplicar_estandarizacion_test(df_final)
        return df_final

    with mlflow.start_run(run_name=run_name) as run:
        run_id = run.info.run_id
        with mlflow.start_run(run_name="DataPull", nested=True) as data_pull:
            data_pull_id = data_pull.info.run_id
            df_data_test = wr.athena.read_sql_query(sql=query_clientes, database="bank_attrition")
            df_data_test.columns = df_data_test.columns.str.upper()
            df_data_test['RANG_INGRESO'] = df_data_test['RANG_INGRESO'].replace('', np.nan)
            df_data_test['FLAG_LIMA_PROVINCIA'] = df_data_test['FLAG_LIMA_PROVINCIA'].replace('', np.nan) 

            df_requerimientos_test = wr.athena.read_sql_query(sql=query_requerimientos, database="bank_attrition")
            df_requerimientos_test.columns = df_requerimientos_test.columns.str.upper()
            df_requerimientos_test['DICTAMEN'] = df_requerimientos_test['DICTAMEN'].replace('', np.nan) 

            df_data_score_prepared = prepare_dataset(df_data_test, df_requerimientos_test)
            
            df_data_score_prepared.to_csv(os.path.join(train_s3_path, "inf-raw-data", f"df_data_score_prepared_{cod_month}.csv"), index=False)
            df_data_score_prepared.to_csv(os.path.join(output_dir, f"df_data_score_prepared_{cod_month}.csv"), index=False)
            mlflow.log_artifact(os.path.join(output_dir, f"df_data_score_prepared_{cod_month}.csv"), artifact_path=f"inf-raw-data")
            
    return run_id, data_pull_id

## Model inference

In [4]:
@step(
    name="ModelInference",
    instance_type=instance_type
)
def model_inference(experiment_name: str, run_id: str, data_pull_id: str, cod_month: int) -> str:
    import os
    import pandas as pd
    import mlflow
    from mlflow.artifacts import download_artifacts
    import boto3
    import tempfile

    output_dir = tempfile.mkdtemp()
    s3 = boto3.client("s3")

    train_s3_path = f"s3://{default_path}"

    mlflow.set_tracking_uri(tracking_server_arn)
    mlflow.set_experiment(experiment_name)
    model_uri = f"models:/{model_name}/{model_version}"
    flavors = mlflow.models.get_model_info(f"models:/{model_name}/{model_version}").flavors

    if "xgboost" in flavors:
        import mlflow.xgboost
        model = mlflow.xgboost.load_model(model_uri)
    elif "sklearn" in flavors:
        import mlflow.sklearn
        model = mlflow.sklearn.load_model(model_uri)
    
    info = mlflow.models.get_model_info(model_uri)
    artifact_path = info.artifact_path
    name_model = artifact_path.replace('_model', '')
    
    df_data_score_path = download_artifacts(run_id=data_pull_id, artifact_path=f"inf-raw-data/df_data_score_prepared_{cod_month}.csv")
    df_data_score = pd.read_csv(df_data_score_path)

    s3_key = f'{default_prefix}/outputs/train/feature_importance/{name_model}/feature_importance.csv'
    local_path = 'feature_importance.csv'
    s3.download_file(default_bucket, s3_key, local_path)
    features = pd.read_csv(local_path)['variable'].to_list()

    y_pred = model.predict_proba(df_data_score[features])
    df_data_score['y_prob'] = y_pred[:,1]

    with mlflow.start_run(run_id=run_id):
        with mlflow.start_run(run_name="ModelInference", nested=True) as model_inference:
            model_inference_id = model_inference.info.run_id
            df_data_score.to_csv(os.path.join(train_s3_path, "inf-proc-data", f"df_data_score_prob_{cod_month}.csv"), index=False)
            df_data_score.to_csv(os.path.join(output_dir, f"df_data_score_prob_{cod_month}.csv"), index=False)
            mlflow.log_artifact(os.path.join(output_dir, f"df_data_score_prob_{cod_month}.csv"), artifact_path=f"inf-proc-data")
            mlflow.log_input(mlflow.data.from_pandas(df_data_score, os.path.join(train_s3_path, "inf-proc-data", f"df_data_score_prob_{cod_month}.csv")), context="ModelInference")
    return model_inference_id

## Data push

In [5]:
@step(
    name="DataPush",
    instance_type=instance_type
)
def data_push(experiment_name: str, run_id: str, model_inference_id: str, cod_month: str):
    
    import pandas as pd
    import mlflow
    from mlflow.artifacts import download_artifacts
    import subprocess
    subprocess.run(['pip', 'install', 'awswrangler==3.12.0']) 
    import awswrangler as wr
    import numpy as np
    from datetime import datetime
    import pytz
    import tempfile
    import os

    output_dir = tempfile.mkdtemp()

    ID_COL = "ID_CORRELATIVO"
    TIME_COL = "CODMES"
    PRED_COL = "y_prob"
    train_s3_path = f"s3://{default_path}"
    mlflow.set_tracking_uri(tracking_server_arn)
    mlflow.set_experiment(experiment_name)

    df_path = download_artifacts(run_id=model_inference_id, artifact_path=f'inf-proc-data/df_data_score_prob_{cod_month}.csv')
    df = pd.read_csv(df_path)
    
    df['attrition_profile'] = np.where(df[PRED_COL] >= 0.415, 'High risk',
                                   np.where(df[PRED_COL] >= 0.285, 'Medium risk',
                                   'Low risk'))

    df['model'] = model_name
    timezone = pytz.timezone("America/Lima")
    df['load_date'] = datetime.now(timezone).strftime("%Y%m%d")
    df['order'] = df.y_prob.rank(method='first', ascending=False).astype(int)

    inf_posproc_s3_path = f"s3://{default_path}/inf-posproc-data"
    inf_posproc_s3_path_partition = inf_posproc_s3_path + f'/output_{cod_month}.parquet'
    database = 'bank_attrition'
    table_name = database + f'.attrition_detection'

    # Pushing data to S3 path
    df = df[[ID_COL, PRED_COL, 'model','attrition_profile','load_date', 'order', TIME_COL]] 
    df.to_parquet(inf_posproc_s3_path_partition, engine='pyarrow', compression='snappy')

    # Creating table
    ddl = f"""
    CREATE EXTERNAL TABLE IF NOT EXISTS {table_name} (
    {ID_COL} int,
    {PRED_COL} double,
    model string,
    attrition_profile string,
    load_date string,
    order int,
    {TIME_COL} int
    )
    STORED AS parquet
    LOCATION '{inf_posproc_s3_path}'
    TBLPROPERTIES ('parquet.compression'='SNAPPY')
    """
    query_exec_id = wr.athena.start_query_execution(sql=ddl, database=database)
    wr.athena.wait_query(query_execution_id=query_exec_id)

    with mlflow.start_run(run_id=run_id):
        with mlflow.start_run(run_name="DataPush", nested=True):
            
            mlflow.log_input(mlflow.data.from_pandas(df, inf_posproc_s3_path_partition),context="DataPush")
            df.to_csv(os.path.join(output_dir, f"score_prob_{cod_month}.csv"), index=False)
            mlflow.log_artifact(os.path.join(output_dir, f"score_prob_{cod_month}.csv"), artifact_path=f"inf-posproc-data")

## Pipeline

In [6]:
data_pull_step = data_pull(experiment_name=experiment_name,
                           run_name=ExecutionVariables.PIPELINE_EXECUTION_ID,
                           cod_month=cod_month,
                           cod_month_start=cod_month_start,
                           cod_month_end=cod_month_end)

model_inference_step = model_inference(experiment_name=experiment_name,
                                       run_id=data_pull_step[0],
                                       data_pull_id=data_pull_step[1],
                                       cod_month=cod_month)

data_push_step = data_push(experiment_name=experiment_name,
                            run_id=data_pull_step[0],
                            model_inference_id=model_inference_step,
                            cod_month=cod_month)

In [7]:
pipeline = Pipeline(name=pipeline_name,
                    steps=[data_pull_step,model_inference_step,data_push_step],
                   parameters=[cod_month, cod_month_start, cod_month_end])
pipeline.upsert(role_arn=role)

2025-06-29 17:29:12,359 sagemaker.remote_function INFO     Uploading serialized function code to s3://sagemaker-us-east-2-762233743642/pipeline-inference/DataPull/2025-06-29-17-29-12-160/function
2025-06-29 17:29:12,417 sagemaker.remote_function INFO     Uploading serialized function arguments to s3://sagemaker-us-east-2-762233743642/pipeline-inference/DataPull/2025-06-29-17-29-12-160/arguments
2025-06-29 17:29:12,615 sagemaker.remote_function INFO     Uploading serialized function code to s3://sagemaker-us-east-2-762233743642/pipeline-inference/ModelInference/2025-06-29-17-29-12-160/function
2025-06-29 17:29:12,670 sagemaker.remote_function INFO     Uploading serialized function arguments to s3://sagemaker-us-east-2-762233743642/pipeline-inference/ModelInference/2025-06-29-17-29-12-160/arguments
2025-06-29 17:29:12,945 sagemaker.remote_function INFO     Uploading serialized function code to s3://sagemaker-us-east-2-762233743642/pipeline-inference/DataPush/2025-06-29-17-29-12-160/funct

{'PipelineArn': 'arn:aws:sagemaker:us-east-2:762233743642:pipeline/pipeline-inference',
 'ResponseMetadata': {'RequestId': 'f3af7f2b-2f3a-49ab-b1b7-e712fadeccbb',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'x-amzn-requestid': 'f3af7f2b-2f3a-49ab-b1b7-e712fadeccbb',
   'content-type': 'application/x-amz-json-1.1',
   'content-length': '86',
   'date': 'Sun, 29 Jun 2025 17:29:14 GMT'},
  'RetryAttempts': 0}}

In [8]:
pipeline.start(parameters={"PeriodoCargaClientes": '202408',
                           "PeriodoCargaRequerimientosInicio": 202403,
                          "PeriodoCargaRequerimientosFin": 202408},
               execution_display_name="test-inference-full-1",
               execution_description="Testando inferece full 1")

_PipelineExecution(arn='arn:aws:sagemaker:us-east-2:762233743642:pipeline/pipeline-inference/execution/kjxwd7pwefsu', sagemaker_session=<sagemaker.session.Session object at 0x7f26aee20800>)