In [1]:
!pip install kfp google-cloud-pipeline-components google-cloud-storage google-cloud-aiplatform

Collecting kfp
  Downloading kfp-1.8.16.tar.gz (304 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m304.5/304.5 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting google-cloud-pipeline-components
  Downloading google_cloud_pipeline_components-1.0.28-py3-none-any.whl (809 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m809.6/809.6 kB[0m [31m46.0 MB/s[0m eta [36m0:00:00[0m
Collecting PyYAML<6,>=5.3
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m636.6/636.6 kB[0m [31m55.4 MB/s[0m eta [36m0:00:00[0m
Collecting kubernetes<19,>=8.0.0
  Downloading kubernetes-18.20.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m72.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting google-api-python-client<2,>=1.7.8
  Downloading google_api_py

In [8]:
from kfp.v2.dsl import (Artifact,
                        Dataset,
                        Input,
                        Model,
                        Output,
                        Metrics,
                        ClassificationMetrics,
                        component, 
                        OutputPath, 
                        InputPath)

In [16]:
from typing import NamedTuple
import google.cloud.aiplatform as aip
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component
from kfp.v2 import compiler
PROJECT_ID = "project-neuron-368805"
BUCKET_URI= "gs://char-rnn-classification/pipeline"

#Initialize the Vertex AI SDK for Python for your project and corresponding bucket.
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)

@component(output_component_file="data_loader.yaml", base_image="gcr.io/project-neuron-368805/name-classification:latest",)
def load_data(dataset:Output[Dataset]):
    from src import utils
    from src.exception import CustomException
    import os,sys
    from zipfile import ZipFile
    import time
    from src.logger import logging
    from src.ml.model import ModelParams
    FILE_SEARCH_PATTERN = "data/names/*.txt"
    DATA_SOURCE_URL = "https://download.pytorch.org/tutorial/data.zip"
    ARTIFACT_DIR = os.path.join(dataset.path,"artifact")

    artifact_dir = os.path.join(ARTIFACT_DIR)
    download_dir = os.path.join(artifact_dir,"download")
    file_search_pattern = os.path.join(download_dir,FILE_SEARCH_PATTERN)

    # from src.pipeline import TrainingPipeline
    # TrainingPipeline().start_training_pipeline()
    logging.info("Download dataset")
    file_path = utils.download_file(url = DATA_SOURCE_URL,download_dir=download_dir)

    logging.info("Extract downloaded zip file")
    #extract downloaded file
    with ZipFile(file_path) as zip_file:
        zip_file.extractall(download_dir)

    

@component(
    output_component_file="pipeline.yaml",
    base_image="gcr.io/project-neuron-368805/name-classification:latest",
     
)
def train_model(dataset:Input[Dataset],model: Output[Model], ):
    
    from src import utils
    from src.exception import CustomException
    import os,sys
    from zipfile import ZipFile
    import time
    from src.logger import logging
    from src.ml.model import ModelParams
    from src.components.model_trainer import ModelTrainer
    FILE_SEARCH_PATTERN = "data/names/*.txt"
    ARTIFACT_DIR = os.path.join(dataset.path,"artifact")

    artifact_dir = os.path.join(ARTIFACT_DIR)
    download_dir = os.path.join(artifact_dir,"download")
    file_search_pattern = os.path.join(download_dir,FILE_SEARCH_PATTERN)
    model_file_path = f"{model.path}.pth"
    
    
    logging.info("Prepare dataset")
    dataset = utils.load_data(file_search_pattern=file_search_pattern)

    logging.info("Created model trainer")
    model_trainer = ModelTrainer(category_lines=dataset.category_lines,all_categories=dataset.all_categories)
    logging.info("Start model training")
    model_trainer.start_training()
    logging.info("Predict")
    model_trainer.predict(input_line="Steve")

    utils.save_model(model=model_trainer.model_params.to_dict(),path=model_file_path)
    
#compile the pipeline
@dsl.pipeline(
    name="clasification",pipeline_root=BUCKET_URI
)
def pipleine():
    dataset = load_data()
    train_model(dataset.outputs["dataset"] )
compiler.Compiler().compile(pipeline_func=pipleine,package_path="training_pipeline.json")

DISPLAY_NAME = "classification"

job = aip.PipelineJob(
    display_name=DISPLAY_NAME,
    template_path="training_pipeline.json",
    pipeline_root=BUCKET_URI,
)

job.run()


Creating PipelineJob
PipelineJob created. Resource name: projects/134300826161/locations/us-central1/pipelineJobs/clasification-20221201130204
To use this PipelineJob in another session:
pipeline_job = aiplatform.PipelineJob.get('projects/134300826161/locations/us-central1/pipelineJobs/clasification-20221201130204')
View Pipeline Job:
https://console.cloud.google.com/vertex-ai/locations/us-central1/pipelines/runs/clasification-20221201130204?project=134300826161
PipelineJob projects/134300826161/locations/us-central1/pipelineJobs/clasification-20221201130204 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/134300826161/locations/us-central1/pipelineJobs/clasification-20221201130204 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/134300826161/locations/us-central1/pipelineJobs/clasification-20221201130204 current state:
PipelineState.PIPELINE_STATE_RUNNING
PipelineJob projects/134300826161/locations/us-central1/pipelineJobs/clasification-

In [2]:
from typing import NamedTuple
import google.cloud.aiplatform as aip
from kfp import dsl
from kfp.v2 import compiler
from kfp.v2.dsl import component
from kfp.v2 import compiler
PROJECT_ID = "project-neuron-368805"
BUCKET_URI= "gs://char-rnn-classification/pipeline"

#Initialize the Vertex AI SDK for Python for your project and corresponding bucket.
aip.init(project=PROJECT_ID, staging_bucket=BUCKET_URI)


In [3]:
from kfp.components import create_component_from_func



In [17]:
import torch

In [18]:
torch.__version__

'1.12.1'