In [None]:
!pip install kfp
!pip install google-cloud-pipeline-components

In [14]:
import kfp

from typing import NamedTuple

from kfp.v2.dsl import pipeline
from kfp.v2.dsl import component
from kfp.v2.dsl import OutputPath
from kfp.v2.dsl import InputPath


from kfp.v2.dsl import Output
from kfp.v2.dsl import Metrics

from kfp.v2 import compiler

# from kfp.google.client import AIPlatformClient


from google.cloud import aiplatform
from google.cloud.aiplatform import pipeline_jobs

# from google_cloud_pipeline_components import aiplatform as gcc_aip

from google_cloud_pipeline_components.v1.model import ModelUploadOp

In [15]:
PROJECT_ID = "citric-program-331111"
PIPELINE_ROOT = "gs://cloud-ai-platform-4de6a256-deb4-4f4f-8b9f-8595e0183ea1/"
PROJECT_REGION = "us-central1"

In [16]:
aiplatform.init(project=PROJECT_ID, location="us-central1")

In [17]:
@component()
def concat(a: str, b: str) -> str:
    return a + b


@component
def reverse(a: str) -> NamedTuple("outputs", [("before", str), ("after", str)]):
    return a, a[::-1]


@pipeline(name="basic-pipeline", pipeline_root=PIPELINE_ROOT + "basic-pipeline")
def basic_pipeline(a: str = "stres", b: str = "sed"):
    concat_task = concat(a=a, b=b)
    reverse_task = reverse(a=concat_task.output)

compiler.Compiler().compile(
    pipeline_func=basic_pipeline, package_path="basic_pipeline.json"
)

In [39]:
job = pipeline_jobs.PipelineJob(
    display_name="basic-pipeline",
    template_path="basic_pipeline.json",
    parameter_values={"a": "stres", "b": "sed"},
)
job.run(sync=False)

# job.submit()

## Vertex AI pipeline tutorial KFP
https://cloud.google.com/vertex-ai/docs/pipelines/build-pipeline#started


In [3]:
import kfp
from google.cloud import aiplatform
from google_cloud_pipeline_components.v1.dataset import ImageDatasetCreateOp
from google_cloud_pipeline_components.v1.automl.training_job import AutoMLImageTrainingJobRunOp
from google_cloud_pipeline_components.v1.endpoint import EndpointCreateOp, ModelDeployOp

project_id = PROJECT_ID
pipeline_root_path = PIPELINE_ROOT

# Define the workflow of the pipeline.
@kfp.dsl.pipeline(
    name="automl-image-training-v2",
    pipeline_root=pipeline_root_path)
def pipeline(project_id: str):
    # The first step of your workflow is a dataset generator.
    # This step takes a Google Cloud Pipeline Component, providing the necessary
    # input arguments, and uses the Python variable `ds_op` to define its
    # output. Note that here the `ds_op` only stores the definition of the
    # output but not the actual returned object from the execution. The value
    # of the object is not accessible at the dsl.pipeline level, and can only be
    # retrieved by providing it as the input to a downstream component.
    ds_op = ImageDatasetCreateOp(
        project=project_id,
        display_name="flowers",
        gcs_source="gs://cloud-samples-data/vision/automl_classification/flowers/all_data_v2.csv",
        import_schema_uri=aiplatform.schema.dataset.ioformat.image.single_label_classification,
    )

    # The second step is a model training component. It takes the dataset
    # outputted from the first step, supplies it as an input argument to the
    # component (see `dataset=ds_op.outputs["dataset"]`), and will put its
    # outputs into `training_job_run_op`.
    training_job_run_op = AutoMLImageTrainingJobRunOp(
        project=project_id,
        display_name="train-iris-automl-mbsdk-1",
        prediction_type="classification",
        model_type="CLOUD",
        dataset=ds_op.outputs["dataset"],
        model_display_name="iris-classification-model-mbsdk",
        training_fraction_split=0.6,
        validation_fraction_split=0.2,
        test_fraction_split=0.2,
        budget_milli_node_hours=8000,
    )

    # The third and fourth step are for deploying the model.
    create_endpoint_op = EndpointCreateOp(
        project=project_id,
        display_name = "create-endpoint",
    )

    model_deploy_op = ModelDeployOp(
        model=training_job_run_op.outputs["model"],
        endpoint=create_endpoint_op.outputs['endpoint'],
        automatic_resources_min_replica_count=1,
        automatic_resources_max_replica_count=1,
    )

In [4]:
from kfp import compiler

compiler.Compiler().compile(
    pipeline_func=pipeline, package_path="image_classif_pipeline.yaml"
)

In [7]:
%env GOOGLE_APPLICATION_CREDENTIALS="/Users/yusali/Downloads/citric-program-331111-ab6d94dddf97.json"

env: GOOGLE_APPLICATION_CREDENTIALS="/Users/yusali/Downloads/citric-program-331111-ab6d94dddf97.json"


In [9]:
from google.oauth2 import service_account

credentials = service_account.Credentials.from_service_account_file(
    "/Users/yusali/Downloads/default-compute-service-account.json"
)

In [11]:
import google.cloud.aiplatform as aip

# Before initializing, make sure to set the GOOGLE_APPLICATION_CREDENTIALS
# environment variable to the path of your service account.
aip.init(project=project_id, location=PROJECT_REGION, credentials=credentials)

# Prepare the pipeline job
job = aip.PipelineJob(
    display_name="automl-image-training-v2",
    template_path="image_classif_pipeline.yaml",
    pipeline_root=pipeline_root_path,
    parameter_values={"project_id": project_id},
)

job.submit()

Creating PipelineJob
PipelineJob created. Resource name: projects/944377447452/locations/us-central1/pipelineJobs/automl-image-training-v2-20240306171039
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/944377447452/locations/us-central1/pipelineJobs/automl-image-training-v2-20240306171039')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/automl-image-training-v2-20240306171039?project=944377447452


In [20]:
%env GOOGLE_APPLICATION_CREDENTIALS="/Users/yusali/Downloads/default-compute-service-account.json"

env: GOOGLE_APPLICATION_CREDENTIALS="/Users/yusali/Downloads/default-compute-service-account.json"


In [37]:
from google.cloud import storage
from io import StringIO

def read_from_gcs(blob_path: str) -> str:
    client = storage.Client(credentials=credentials)
    bucket_name = "cloud-ai-platform-4de6a256-deb4-4f4f-8b9f-8595e0183ea1"

    bucket = client.get_bucket(bucket_name)
    blob = bucket.blob(blob_path)
    csv_string = blob.download_as_text()

    return StringIO(csv_string)


df = pd.read_csv(read_from_gcs("final_dataset.csv"))

In [34]:
df

Unnamed: 0,courier_id,order_number,courier_location_timestamp,courier_lat,courier_lon,order_created_timestamp,restaurant_lat,restaurant_lon
0,a98737cbhoho5012hoho4b5bhoho867fhoho8475c658546d,281289453,2021-04-02T04:30:42.328Z,50.484520,-104.618876,2021-04-02T04:20:42Z,50.483696,-104.614350
1,39a26fa0hohof428hoho47a4hohoa320hoho12e3d831c23a,280949566,2021-04-01T06:14:47.386Z,50.442573,-104.550463,2021-04-01T06:05:18Z,50.442422,-104.550487
2,3813235ehoho7a42hoho4601hohob7eahoho799e8af5b535,281328578,2021-04-02T05:48:57.224Z,50.495920,-104.635605,2021-04-02T05:13:26Z,50.496595,-104.635606
3,9f033953hohocd53hoho488ahohoaf51hohoc57943e499ed,281317998,2021-04-02T05:12:17.252Z,50.449445,-104.611521,2021-04-02T04:59:57Z,50.449504,-104.611074
4,56f65bc8hohoba54hoho47dfhohoa09chohof7464b5d9848,281314132,2021-04-02T05:15:38.266Z,50.495254,-104.666383,2021-04-02T04:54:53Z,50.495160,-104.665733
...,...,...,...,...,...,...,...,...
20572,2f21e0c7hoho79b8hoho4ecdhohoaacbhohoad8b3e6565c2,281356256,2021-04-02T05:49:45.288Z,50.463855,-104.618036,2021-04-02T05:48:14Z,50.475204,-104.617475
20573,b3fe5a77hohofbb2hoho4c5fhohob9d5hoho1f87dd2ab9ed,281348386,2021-04-02T05:38:50.548Z,50.482136,-104.606574,2021-04-02T05:38:29Z,50.475204,-104.617475
20574,30a6cb7fhoho1825hoho407ehoho97f0hoho9d374a6b2f20,281353044,2021-04-02T05:46:23.316Z,50.473982,-104.631716,2021-04-02T05:44:18Z,50.475204,-104.617475
20575,30a6cb7fhoho1825hoho407ehoho97f0hoho9d374a6b2f20,281313038,2021-04-02T04:53:53.119Z,50.466229,-104.618022,2021-04-02T04:53:22Z,50.475204,-104.617475


# Pipeline testing 

In [51]:
from model.data_collection import data_ingestion
from model.feature_generation import generate_features
from model.training import data_split, training_pipeline_run


FileNotFoundError: [Errno 2] No such file or directory: './config.yaml'