### Installation
Install the packages required for executing this notebook.

In [1]:
import os

# The Vertex AI Workbench Notebook product has specific requirements
IS_WORKBENCH_NOTEBOOK = os.getenv("DL_ANACONDA_HOME") and not os.getenv("VIRTUAL_ENV")
IS_USER_MANAGED_WORKBENCH_NOTEBOOK = os.path.exists(
    "/opt/deeplearning/metadata/env_version"
)

# Vertex AI Notebook requires dependencies to be installed with '--user'
USER_FLAG = ""
if IS_WORKBENCH_NOTEBOOK:
    USER_FLAG = "--user"

! pip3 install --upgrade google-cloud-aiplatform {USER_FLAG} -q
! pip3 install -U google-cloud-storage {USER_FLAG} -q
! pip3 install {USER_FLAG} kfp google-cloud-pipeline-components --upgrade -q

[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
cloud-tpu-client 0.10 requires google-api-python-client==1.8.0, but you have google-api-python-client 1.12.11 which is incompatible.[0m[31m
[0m

## Restart the kernel
Once you've installed the additional packages, you need to restart the notebook kernel so it can find the packages.

In [2]:
import os

if not os.getenv("IS_TESTING"):
    # Automatically restart kernel after installs
    import IPython

    app = IPython.Application.instance()
    app.kernel.do_shutdown(True)

In [None]:
Check the versions of the packages you installed. The KFP SDK version should be >=1.6.

In [1]:
! python3 -c "import kfp; print('KFP SDK version: {}'.format(kfp.__version__))"

KFP SDK version: 1.8.14


In [2]:
import kfp
from kfp.v2 import dsl
from kfp.v2.dsl import component
from kfp.v2.dsl import (
    Input,
    Output,
    Artifact,
    Dataset,
)

#### Pipeline Configurations

In [17]:
#The Google Cloud project that this pipeline runs in.
project_id = "use your project id here"
# The region that this pipeline runs in
region = "us-west1"
# Specify a Cloud Storage URI that your pipelines service account can access. The artifacts of your pipeline runs are stored within the pipeline root.
pipeline_root_path = "your bucket e.g., gs://de_jads_temp"

#### Create the Components from Component Specifications 

In [4]:
from kfp.components import load_component_from_file

data_ingestion_op = load_component_from_file(
    './components/data_ingestor/component.yaml')

mlp_model_op = load_component_from_file(
    './components/mlp_trainer/component.yaml')

#### Define the Pipeline

In [12]:
# Define the workflow of the pipeline.
@kfp.dsl.pipeline(
    name="diabetes-predictor-mlp",
    pipeline_root=pipeline_root_path)
def pipeline(project_id: str, data_bucket: str, trainset_filename: str, model_repo: str):
    
    # The first step    
    di_op = data_ingestion_op(
        project_id=project_id,
        bucket=data_bucket,
        file_name=trainset_filename
    )

    # The second step 
    training_job_run_op = mlp_model_op(
        project_id=project_id,
        model_repo=model_repo,       
        feature_path=di_op.outputs['feature_path']
    )

#### Compile the Pipeline

In [15]:
from kfp.v2 import compiler
compiler.Compiler().compile(pipeline_func=pipeline,
        package_path='diabetes_predictor_mlp_pipeline.json')

#### Run the Pipeline

In [16]:
import google.cloud.aiplatform as aip

job = aip.PipelineJob(
    display_name="diabetes-predictor-mlp-pipeline",
    template_path="diabetes_predictor_mlp_pipeline.json",
    enable_caching=False,
    pipeline_root=pipeline_root_path,
    parameter_values={
        'project_id': project_id,
        'data_bucket': 'data_de2022',
        'trainset_filename': 'training_set.csv',
        'model_repo':'model_repo_de2022'
    }
)

job.run()

Creating PipelineJob
PipelineJob created. Resource name: projects/958343845263/locations/us-central1/pipelineJobs/diabetes-predictor-mlp-20221011125216
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/958343845263/locations/us-central1/pipelineJobs/diabetes-predictor-mlp-20221011125216')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/diabetes-predictor-mlp-20221011125216?project=958343845263
PipelineJob projects/958343845263/locations/us-central1/pipelineJobs/diabetes-predictor-mlp-20221011125216 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/958343845263/locations/us-central1/pipelineJobs/diabetes-predictor-mlp-20221011125216 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/958343845263/locations/us-central1/pipelineJobs/diabetes-predictor-mlp-20221011125216 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/95834384