In [16]:
import sys
print(sys.executable)

/Users/zacharynguyen/anaconda3/envs/39-kubeflow-pipeline-babyweight/bin/python


In [17]:
import os
import datetime
import importlib
import kfp
from kfp import dsl, compiler
from google.cloud import aiplatform
from google_cloud_pipeline_components.v1.model import ModelUploadOp,ModelExportOp

In [18]:
NOTEBOOK = 'pipeline_fraud'
PROJECT = 'e2e-fraud-detection'
REGION = "us-central1"
BUCKET = PROJECT
APPNAME = "fraud_detection_app"
BQ_DATASET = "fraud_dataset"
BQ_TABLENAME = "tb-fraud"
GOOGLE_APPLICATION_CREDENTIALS = "/Users/zacharynguyen/Documents/GitHub/2024/End-to-End-Vertex-AI-Pipeline-for-Fraud-Detection/key/e2e-fraud-detection-debf1c9863af.json"

os.environ["REGION"] = REGION
os.environ["PROJECT"] = PROJECT
os.environ["BUCKET"] = BUCKET
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = GOOGLE_APPLICATION_CREDENTIALS

In [19]:
PIPELINE_URI = f"gs://{BUCKET}/{APPNAME}"
EXPERIMENT = 'data-ingestion-pipeline'
BQ_SOURCE = 'bigquery-public-data.ml_datasets.ulb_fraud_detection'
# Resources for serving BigQuery Model Exports
TF_DEPLOY_IMAGE = 'us-docker.pkg.dev/vertex-ai/prediction/tf2-cpu.2-3:latest'
XGB_DEPLOY_IMAGE = 'us-docker.pkg.dev/vertex-ai/prediction/xgboost-cpu.0-82:latest'

# Resources for serving BigQuery Model Exports
DEPLOY_COMPUTE = 'n1-standard-4'

# Model Training
VAR_TARGET = 'Class'
VAR_OMIT = 'transaction_id' # add more variables to the string with space delimiters


In [20]:
from src import copy_table_and_return_path
importlib.reload(copy_table_and_return_path)

<module 'src.copy_table_and_return_path' from '/Users/zacharynguyen/Documents/GitHub/2024/End-to-End-Vertex-AI-Pipeline-for-Fraud-Detection/Data Sources/src/copy_table_and_return_path.py'>

In [21]:
@dsl.pipeline(name = APPNAME, pipeline_root = PIPELINE_URI)
def vertex_ai_pipeline(
    project: str=PROJECT,
    region: str=REGION,
):
    ##################################
    data_preparing_task = copy_table_and_return_path.bq_table_prep_op(
        project=project,
        region=region,
        source_bq_table_id=BQ_SOURCE,
        out_bq_dataset_id = f"{PROJECT}.{BQ_DATASET}",
    ).set_display_name('prepped-bq-table-create')

In [22]:
# Create a new (local) directory to store the complied file
DIR = f"temp"
!rm -rf {DIR}
!mkdir -p {DIR}

In [23]:
compiled_package = f"{DIR}/compiled_pipeline_package.json"

compiler.Compiler().compile(
    pipeline_func = vertex_ai_pipeline,
    package_path = compiled_package
)

In [24]:
aiplatform.init(project=PROJECT, location=REGION)

In [25]:
pipeline_job = aiplatform.PipelineJob(
    display_name = f"{APPNAME}",
    template_path = compiled_package,
    pipeline_root=PIPELINE_URI,
    parameter_values = {
        "project": PROJECT,
        "region": REGION,
    },
    labels = {'notebook':f'{NOTEBOOK}'}
)

In [26]:
SERVICE_ACCOUNT ="zacharynguyen@e2e-fraud-detection.iam.gserviceaccount.com"

In [27]:
response = pipeline_job.run(
    service_account = SERVICE_ACCOUNT,
    #sync = True
)

Creating PipelineJob
PipelineJob created. Resource name: projects/993073267534/locations/us-central1/pipelineJobs/fraud-detection-app-20240315155850
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/993073267534/locations/us-central1/pipelineJobs/fraud-detection-app-20240315155850')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/fraud-detection-app-20240315155850?project=993073267534
PipelineJob projects/993073267534/locations/us-central1/pipelineJobs/fraud-detection-app-20240315155850 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/993073267534/locations/us-central1/pipelineJobs/fraud-detection-app-20240315155850 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/993073267534/locations/us-central1/pipelineJobs/fraud-detection-app-20240315155850 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/993073267534/locations/us-