In [None]:
from datetime import datetime
datetime.now()

In [None]:
#%%capture
import warnings
#warnings.filterwarnings('ignore')
!pip install kfp #--user
!pip install -U google-cloud-pipeline-components #--user

import google.cloud.aiplatform as aip
import json

from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component
#from datetime import datetime

TIMESTAMP = datetime.now().strftime("%Y%m%d%H%M%S")

PROJECT_ID = ''  ###BQ Dataset Project ID
dataset = ''  
dataset_id = PROJECT_ID + '.' + dataset
BUCKET_NAME = 'gs://<bucket>'
table = 'new_train'
DISPLAY_NAME = 'Fraud-Detection-AutoML'

aip.init(project=PROJECT_ID, staging_bucket=BUCKET_NAME)

In [None]:
datetime.now()

In [None]:
## Define Vertex AI Pipeline

from google_cloud_pipeline_components import aiplatform as gcc_aip

PIPELINE_ROOT = "{}/pipeline_root/Fraud-Detection".format(BUCKET_NAME)

@dsl.pipeline(
    name="fraud-detection", 
    pipeline_root=PIPELINE_ROOT,
    description="Fraud-Detection AutoML training"
)

def pipeline(
    bq_source: str = 'bq://' + dataset_id + '.' +  table,
    display_name: str = DISPLAY_NAME,
    project: str = PROJECT_ID,
    gcp_region: str = "us-central1",
    api_endpoint: str = "us-central1-aiplatform.googleapis.com",
    thresholds_dict_str: str = '{"auRoc": 1.0}',
    model_name: str = DISPLAY_NAME+TIMESTAMP
):
    ###############################################
    ## Step 1 Create DS
    dataset_op = gcc_aip.TabularDatasetCreateOp(
        project=project, 
        display_name="Fraud-Detection-AutoML", 
        bq_source=bq_source
    ).set_caching_options(True)
    ###############################################
    
    ###############################################
    ## Step 2 AutoML Training Job
    training_op = gcc_aip.AutoMLTabularTrainingJobRunOp(
        project=project,
        display_name=model_name,
        model_display_name=model_name,
        optimization_prediction_type="classification",
        optimization_objective="maximize-au-prc",
        budget_milli_node_hours=24000,
        training_fraction_split=0.7,
        validation_fraction_split=0.13,
        test_fraction_split=0.17,
        column_transformations=[
            {"numeric": {"column_name": "card_transactions_amount"}},
            {"numeric": {"column_name": "card_transactions_transaction_distance"}},
            {"numeric": {"column_name": "card_transactions_transaction_hour_of_day"}},
            {"categorical": {"column_name": "category"}},
        ],
        dataset=dataset_op.outputs["dataset"],
        target_column="is_fraud",
    ).set_caching_options(False)
    ###############################################
    
    ###############################################
    ## Step 3 Create Endpoint 
    endpoint_op = gcc_aip.EndpointCreateOp(
            project=project,
            location=gcp_region,
            display_name="Fraud-Detection-AutoML",
        ).set_caching_options(True)
    ###############################################
    
    ###############################################
    ## Step 4 Deploy Model to the Endpoint
    
    deploy_op = gcc_aip.ModelDeployOp(  # noqa: F841
            model=training_op.outputs["model"],
            endpoint=endpoint_op.outputs["endpoint"],
            dedicated_resources_min_replica_count=1,
            dedicated_resources_max_replica_count=1,
            dedicated_resources_machine_type="n1-standard-2",
            dedicated_resources_accelerator_type="NVIDIA_TESLA_K80",
            dedicated_resources_accelerator_count=1,
            deployed_model_display_name=display_name
        ).set_caching_options(False)
    ###############################################
    return str(dataset_op.outputs["dataset"]),str(endpoint_op.outputs["endpoint"])

In [None]:
datetime.now()

In [None]:
## Create Pipeline instance

compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="fraud_detection_automl.json"
)

pipeline = aip.PipelineJob(
    display_name="Fraud_Detection_AutoML",
    template_path="fraud_detection_automl.json",
    pipeline_root=PIPELINE_ROOT,
    parameter_values={"project": PROJECT_ID, "display_name": DISPLAY_NAME, "model_name": DISPLAY_NAME+TIMESTAMP},
)

pipeline.submit()

#! rm -f checkride_automl.json

In [None]:
datetime.now()