In [1]:
import os
from zenml.repo import Repository
from zenml.datasources import CSVDatasource
from zenml.pipelines import TrainingPipeline
from zenml.steps.evaluator import TFMAEvaluator
from zenml.steps.preprocesser import StandardPreprocesser
from zenml.steps.split import RandomSplit
from zenml.steps.trainer import TFFeedForwardTrainer
from zenml.repo import Repository, ArtifactStore
from zenml.utils.naming_utils import transformed_label_name
from zenml.steps.deployer import GCAIPDeployer
from zenml.steps.deployer import CortexDeployer
from examples.cortex.predictor.tf import TensorFlowPredictor
from zenml.backends.orchestrator import OrchestratorGCPBackend
from zenml.metadata import MySQLMetadataStore
from zenml.backends.processing import ProcessingDataFlowBackend
from zenml.backends.training import SingleGPUTrainingGCAIPBackend

# Set up some variables

In [2]:
GCP_BUCKET=os.getenv('GCP_BUCKET')
GCP_PROJECT=os.getenv('GCP_PROJECT')
GCP_REGION=os.getenv('GCP_REGION')
GCP_CLOUD_SQL_INSTANCE_NAME=os.getenv('GCP_CLOUD_SQL_INSTANCE_NAME')
MODEL_NAME=os.getenv('MODEL_NAME')
CORTEX_ENV=os.getenv('CORTEX_ENV')
MYSQL_DB=os.getenv('MYSQL_DB')
MYSQL_USER=os.getenv('MYSQL_USER')
MYSQL_PWD=os.getenv('MYSQL_PWD')
MYSQL_PORT=os.getenv('MYSQL_PORT')
MYSQL_HOST=os.getenv('MYSQL_HOST')
CONNECTION_NAME = f'{GCP_PROJECT}:{GCP_REGION}:{GCP_CLOUD_SQL_INSTANCE_NAME}'
TRAINING_JOB_DIR = os.path.join(GCP_BUCKET, 'gcp_gcaip_training/staging')

TypeError: expected str, bytes or os.PathLike object, not NoneType

In [4]:
repo: Repository = Repository.get_instance()
    
artifact_store = ArtifactStore(os.path.join(GCP_BUCKET, 'all_feature_demo'))

# Create first pipeline

In [6]:
training_pipeline = TrainingPipeline(name='Experiment 1')

2021-04-27 10:46:30,928 — zenml.pipelines.base_pipeline — INFO — Pipeline Experiment 1 created.


#### Add a datasource. This will automatically track and version it.

In [7]:
try:
    ds = CSVDatasource(name='Pima Indians Diabetes', path='gs://zenml_quickstart/diabetes.csv')
except:
    repo: Repository = Repository.get_instance()
    ds = repo.get_datasource_by_name('Pima Indians Diabetes')
training_pipeline.add_datasource(ds)

2021-04-27 10:46:32,371 — zenml.datasources.base_datasource — INFO — Datasource Pima Indians Diabetes created.


#### Add a split step to partition data into train and eval

In [8]:
training_pipeline.add_split(RandomSplit(split_map={'train': 0.7, 'eval': 0.2, 'test':0.1}))

#### Add a preprocessing step to transform data to be ML-capable

In [9]:
training_pipeline.add_preprocesser(
    StandardPreprocesser(
        features=['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi',
                  'pedigree', 'age'],
        labels=['has_diabetes'],
        overwrite={'has_diabetes': {
            'transform': [{'method': 'no_transform', 'parameters': {}}]}}
    ))

#### Add a trainer which defines model and training

In [10]:
training_pipeline.add_trainer(TFFeedForwardTrainer(
    loss='binary_crossentropy',
    last_activation='sigmoid',
    output_units=1,
    metrics=['accuracy'],
    epochs=5))

#### Add an evaluator to calculate slicing metrics

In [11]:
training_pipeline.add_evaluator(
    TFMAEvaluator(slices=[['has_diabetes']],
                  metrics={transformed_label_name('has_diabetes'):
                     ['binary_crossentropy', 'binary_accuracy']}))

#### Run and evaluate

In [12]:
training_pipeline.run()

2021-04-27 10:46:38,771 — zenml.pipelines.training_pipeline — INFO — Datasource Pima Indians Diabetes has no commits. Creating the first one..
2021-04-27 10:46:38,774 — zenml.pipelines.base_pipeline — INFO — Pipeline 1619513198774 created.
2021-04-27 10:46:38,821 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component DataGen is running.
2021-04-27 10:46:39,401 — zenml.datasources.csv_datasource — INFO — Matched 1: ['gs://zenml_quickstart/diabetes.csv']
2021-04-27 10:46:39,406 — zenml.datasources.csv_datasource — INFO — Using header from file: gs://zenml_quickstart/diabetes.csv.
2021-04-27 10:46:39,797 — zenml.datasources.csv_datasource — INFO — Header: ['times_pregnant', 'pgc', 'dbp', 'tst', 'insulin', 'bmi', 'pedigree', 'age', 'has_diabetes'].


Connecting anonymously.




2021-04-27 10:46:45,473 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component DataGen is finished.
2021-04-27 10:46:45,481 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component DataStatistics is running.
2021-04-27 10:46:46,799 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component DataStatistics is finished.
2021-04-27 10:46:46,802 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component DataSchema is running.


Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


2021-04-27 10:46:46,857 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component DataSchema is finished.
2021-04-27 10:46:46,992 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is running.
2021-04-27 10:46:47,040 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is finished.
2021-04-27 10:46:47,042 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is running.
2021-04-27 10:46:47,073 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is finished.
2021-04-27 10:46:47,075 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataStatistics is running.
2021-04-27 10:46:47,100 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataStatistics is finished.
2021-04-27 10:46:47,101 — zenml.ba

Instructions for updating:
Schema is a deprecated, use schema_utils.schema_from_feature_spec to create a `Schema`
Instructions for updating:
Use ref() instead.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'


2021-04-27 10:47:05,610 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Transform is finished.
2021-04-27 10:47:05,611 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Trainer is running.
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
age_xf (InputLayer)             [(None, 1)]          0                                            
__________________________________________________________________________________________________
bmi_xf (InputLayer)             [(None, 1)]          0                                            
__________________________________________________________________________________________________
dbp_xf (InputLayer)             [(None, 1)]          0                                            
____________________________________

  [n for n in tensors.keys() if n not in ref_input_names])


      1/Unknown - 0s 137us/step - loss: 0.5849 - accuracy: 0.7500

Instructions for updating:
use `tf.profiler.experimental.stop` instead.


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


  [n for n in tensors.keys() if n not in ref_input_names])
  [n for n in tensors.keys() if n not in ref_input_names])
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.


2021-04-27 10:47:11,798 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Trainer is finished.
2021-04-27 10:47:11,802 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Evaluator is running.
2021-04-27 10:47:16,256 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Evaluator is finished.


In [13]:
training_pipeline.view_statistics(magic=True)

2021-04-27 10:48:50,627 — zenml.pipelines.training_pipeline — INFO — Viewing statistics. If magic=False then a new window will open up with a notebook for evaluation. If magic=True, then an attempt will be made to append to the current notebook.


In [None]:
training_pipeline.evaluate(magic=True)

#### Inspect datasource

In [14]:
datasources = repo.get_datasources()
datasource = datasources[0]
print(datasource)

args:
  path: gs://zenml_quickstart/diabetes.csv
  schema: null
id: 9a9e2f4e-8b97-4fe0-bff4-3fc92b029ebb
name: Pima Indians Diabetes
source: zenml.datasources.csv_datasource.CSVDatasource@zenml_0.3.7



In [15]:
df = datasource.sample_data()
df.head()

2021-04-27 10:48:57,620 — zenml.utils.post_training.post_training_utils — INFO — Converting dataset to Pandas DataFrame..


Unnamed: 0,age,bmi,dbp,has_diabetes,insulin,pedigree,pgc,times_pregnant,tst
0,50,33.599998,72,1,0,0.627,148,6,35
1,31,26.6,66,0,0,0.351,85,1,29
2,32,23.299999,64,1,0,0.672,183,8,0
3,21,28.1,66,0,94,0.167,89,1,23
4,33,43.099998,40,1,168,2.288,137,0,35


In [16]:
df.shape

(768, 9)

In [17]:
df.columns

Index(['age', 'bmi', 'dbp', 'has_diabetes', 'insulin', 'pedigree', 'pgc',
       'times_pregnant', 'tst'],
      dtype='object')

## Skip preprocessing with your next (warm-starting) pipeline

#### Clone first experiment and only change one hyper-parameter

In [18]:
training_pipeline_2 = training_pipeline.copy('Experiment 2')
training_pipeline_2.add_trainer(TFFeedForwardTrainer(
    loss='binary_crossentropy',
    last_activation='sigmoid',
    output_units=1,
    metrics=['accuracy'],
    epochs=20))

2021-04-27 10:48:59,607 — zenml.pipelines.base_pipeline — INFO — Pipeline Experiment 2 created.


In [19]:
training_pipeline_2.run()

2021-04-27 10:49:00,340 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is running.
2021-04-27 10:49:00,375 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is finished.
2021-04-27 10:49:00,378 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is running.
2021-04-27 10:49:00,408 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is finished.
2021-04-27 10:49:00,410 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataStatistics is running.
2021-04-27 10:49:00,435 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataStatistics is finished.
2021-04-27 10:49:00,438 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component SplitGen is running.
2021-04-27 10:49:00,473 — zenml.backe

  [n for n in tensors.keys() if n not in ref_input_names])


      1/Unknown - 0s 81us/step - loss: 0.6498 - accuracy: 0.6250



Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


  [n for n in tensors.keys() if n not in ref_input_names])
  [n for n in tensors.keys() if n not in ref_input_names])


2021-04-27 10:49:07,145 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Trainer is finished.
2021-04-27 10:49:07,149 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Evaluator is running.




2021-04-27 10:49:11,245 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Evaluator is finished.


In [20]:
training_pipeline_2.evaluate(magic=True)

2021-04-27 10:49:11,279 — zenml.pipelines.training_pipeline — INFO — Evaluating pipeline. If magic=False then a new window will open up with a notebook for evaluation. If magic=True, then an attempt will be made to append to the current notebook.


## Post-training

#### Verify theres still only one datasource

In [None]:
datasources = repo.get_datasources()
print(f"We have {len(datasources)} datasources")

#### Compare pipelines

In [None]:
repo.compare_training_runs()

# Easily train on the cloud

In [7]:
training_pipeline_3 = training_pipeline.copy('Experiment 3')

# Add a trainer with a GCAIP backend
training_backend = SingleGPUTrainingGCAIPBackend(
    project=GCP_PROJECT,
    job_dir=TRAINING_JOB_DIR
)

training_pipeline.add_trainer(TFFeedForwardTrainer(
    loss='binary_crossentropy',
    last_activation='sigmoid',
    output_units=1,
    metrics=['accuracy'],
    epochs=20).with_backend(training_backend))

training_pipeline_3.run(artifact_store=artifact_store)

2021-04-27 10:58:22,184 — zenml.pipelines.base_pipeline — INFO — Pipeline Experiment 3 created.
2021-04-27 10:58:22,384 — zenml.backends.training.training_gcaip_backend — INFO — Launching GCAIP Trainer job train_1619513827
2021-04-27 10:58:22,386 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is running.
2021-04-27 10:58:22,440 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is finished.
2021-04-27 10:58:22,443 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is running.
2021-04-27 10:58:22,474 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is finished.
2021-04-27 10:58:22,477 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataStatistics is running.
2021-04-27 10:58:22,505 — zenml.backends.orchestrator.base.zenml_local_orchestrato

Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`




2021-04-27 10:58:33,267 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component SplitGen is finished.
2021-04-27 10:58:33,274 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component SplitStatistics is running.
2021-04-27 10:58:48,231 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component SplitStatistics is finished.
2021-04-27 10:58:48,233 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component SplitSchema is running.
2021-04-27 10:58:54,847 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component SplitSchema is finished.
2021-04-27 10:58:54,854 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Transform is running.


Instructions for updating:
Schema is a deprecated, use schema_utils.schema_from_feature_spec to create a `Schema`
Instructions for updating:
Use ref() instead.
Instructions for updating:
This function will only be available through the v1 compatibility library as tf.compat.v1.saved_model.utils.build_tensor_info or tf.compat.v1.saved_model.build_tensor_info.
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'
'Counter' object has no attribute 'name'


2021-04-27 11:00:46,466 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Transform is finished.
2021-04-27 11:00:46,473 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component Trainer is running.


Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.contrib.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/__init__.py", line 44, in autodetect
    from . import file_cache
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3

RuntimeError: Job 'projects/core-engine/jobs/ce_train_1619513827' did not succeed.  Detailed response {'jobId': 'ce_train_1619513827', 'trainingInput': {'scaleTier': 'CUSTOM', 'masterType': 'n1-standard-4', 'region': 'europe-west1', 'jobDir': 'gs://zenmlartifactstore/gcp_gcaip_training/staging', 'pythonVersion': '3.7', 'masterConfig': {'acceleratorConfig': {'count': '1', 'type': 'NVIDIA_TESLA_K80'}, 'imageUri': 'eu.gcr.io/maiot-zenml/zenml:cuda-0.3.7', 'containerCommand': ['python', '-m', 'tfx.scripts.run_executor', '--executor_class_path', 'tfx.components.trainer.executor.GenericExecutor', '--inputs', '{"examples": [{"artifact": {"id": "22", "type_id": "5", "uri": "gs://zenmlartifactstore/all_feature_demo/c23c99da5684ed052ebd33a635d37715/Transform/transformed_examples/36", "properties": {"split_names": {"string_value": "[\\"eval\\", \\"test\\", \\"train\\"]"}}, "custom_properties": {"name": {"string_value": "transformed_examples"}, "state": {"string_value": "published"}, "producer_component": {"string_value": "Transform"}}, "state": "LIVE", "create_time_since_epoch": "1619513942559", "last_update_time_since_epoch": "1619514046452"}, "artifact_type": {"id": "5", "name": "Examples", "properties": {"version": "INT", "span": "INT", "split_names": "STRING"}}, "__artifact_class_module__": "tfx.types.standard_artifacts", "__artifact_class_name__": "Examples"}], "schema": [{"artifact": {"id": "20", "type_id": "9", "uri": "gs://zenmlartifactstore/all_feature_demo/c23c99da5684ed052ebd33a635d37715/SplitSchema/schema/35", "custom_properties": {"producer_component": {"string_value": "SplitSchema"}, "name": {"string_value": "schema"}, "state": {"string_value": "published"}}, "state": "LIVE", "create_time_since_epoch": "1619513930926", "last_update_time_since_epoch": "1619513934834"}, "artifact_type": {"id": "9", "name": "Schema"}, "__artifact_class_module__": "tfx.types.standard_artifacts", "__artifact_class_name__": "Schema"}], "transform_graph": [{"artifact": {"id": "21", "type_id": "13", "uri": "gs://zenmlartifactstore/all_feature_demo/c23c99da5684ed052ebd33a635d37715/Transform/transform_graph/36", "custom_properties": {"name": {"string_value": "transform_graph"}, "state": {"string_value": "published"}, "producer_component": {"string_value": "Transform"}}, "state": "LIVE", "create_time_since_epoch": "1619513942557", "last_update_time_since_epoch": "1619514046451"}, "artifact_type": {"id": "13", "name": "TransformGraph"}, "__artifact_class_module__": "tfx.types.standard_artifacts", "__artifact_class_name__": "TransformGraph"}]}', '--outputs', '{"model": [{"artifact": {"id": "24", "type_id": "16", "uri": "gs://zenmlartifactstore/all_feature_demo/c23c99da5684ed052ebd33a635d37715/Trainer/model/37", "custom_properties": {"producer_component": {"string_value": "Trainer"}, "name": {"string_value": "model"}}}, "artifact_type": {"id": "16", "name": "Model"}, "__artifact_class_module__": "tfx.types.standard_artifacts", "__artifact_class_name__": "Model"}], "model_run": [{"artifact": {"id": "25", "type_id": "17", "uri": "gs://zenmlartifactstore/all_feature_demo/c23c99da5684ed052ebd33a635d37715/Trainer/model_run/37", "custom_properties": {"producer_component": {"string_value": "Trainer"}, "name": {"string_value": "model_run"}}}, "artifact_type": {"id": "17", "name": "ModelRun"}, "__artifact_class_module__": "tfx.types.standard_artifacts", "__artifact_class_name__": "ModelRun"}], "test_results": [{"artifact": {"id": "26", "type_id": "5", "uri": "gs://zenmlartifactstore/all_feature_demo/c23c99da5684ed052ebd33a635d37715/Trainer/test_results/37", "custom_properties": {"name": {"string_value": "test_results"}, "producer_component": {"string_value": "Trainer"}}}, "artifact_type": {"id": "5", "name": "Examples", "properties": {"version": "INT", "span": "INT", "split_names": "STRING"}}, "__artifact_class_module__": "tfx.types.standard_artifacts", "__artifact_class_name__": "Examples"}]}', '--exec-properties', '{"custom_config": "{\\"ai_platform_training_args\\": {\\"jobDir\\": \\"gs://zenmlartifactstore/gcp_gcaip_training/staging\\", \\"masterConfig\\": {\\"acceleratorConfig\\": {\\"count\\": 1, \\"type\\": \\"NVIDIA_TESLA_K80\\"}, \\"imageUri\\": \\"eu.gcr.io/maiot-zenml/zenml:cuda-0.3.7\\"}, \\"masterType\\": \\"n1-standard-4\\", \\"project\\": \\"core-engine\\", \\"pythonVersion\\": \\"3.7\\", \\"region\\": \\"europe-west1\\", \\"scaleTier\\": \\"CUSTOM\\", \\"scheduling\\": {\\"maxRunningTime\\": \\"7200s\\"}}, \\"ai_platform_training_job_id\\": \\"ce_train_1619513827\\", \\"args\\": {\\"batch_size\\": 8, \\"dropout_chance\\": 0.2, \\"epochs\\": 20, \\"hidden_activation\\": \\"relu\\", \\"hidden_layers\\": null, \\"last_activation\\": \\"sigmoid\\", \\"loss\\": \\"binary_crossentropy\\", \\"lr\\": 0.001, \\"metrics\\": [\\"accuracy\\"], \\"output_units\\": 1, \\"split_mapping\\": {\\"eval_splits\\": [\\"eval\\"], \\"test_splits\\": [\\"test\\"], \\"train_splits\\": [\\"train\\"]}}, \\"backend\\": {\\"args\\": {\\"gpu_type\\": \\"K80\\", \\"image\\": \\"eu.gcr.io/maiot-zenml/zenml:cuda-0.3.7\\", \\"job_dir\\": \\"gs://zenmlartifactstore/gcp_gcaip_training/staging\\", \\"job_name\\": \\"train_1619513827\\", \\"machine_type\\": \\"n1-standard-4\\", \\"max_running_time\\": 7200, \\"project\\": \\"core-engine\\", \\"python_version\\": \\"3.7\\", \\"region\\": \\"europe-west1\\"}, \\"source\\": \\"zenml.backends.training.training_gcaip_backend.SingleGPUTrainingGCAIPBackend@zenml_0.3.7\\", \\"type\\": \\"training\\"}, \\"source\\": \\"zenml.steps.trainer.tensorflow_trainers.tf_ff_trainer.FeedForwardTrainer@zenml_0.3.7\\"}", "run_fn": "zenml.components.trainer.trainer_module.run_fn"}']}, 'scheduling': {'maxRunningTime': '7200s'}}, 'createTime': '2021-04-27T09:00:57Z', 'startTime': '2021-04-27T09:07:18Z', 'endTime': '2021-04-27T09:09:51Z', 'state': 'FAILED', 'errorMessage': 'The replica master 0 exited with a non-zero status of 1. To find out more about why your job exited please check the logs: https://console.cloud.google.com/logs/viewer?project=973445798975&resource=ml_job%2Fjob_id%2Fce_train_1619513827&advancedFilter=resource.type%3D%22ml_job%22%0Aresource.labels.job_id%3D%22ce_train_1619513827%22', 'trainingOutput': {'consumedMLUnits': 0.22}, 'labels': {'tfx_executor': 'tfx-components-trainer-executor-genericexecutor', 'tfx_py_version': '3-6', 'tfx_version': '0-26-1'}, 'etag': 'L/bmhByB03A='}.

# Orchestrate every step on the Cloud

In [8]:
training_pipeline_4 = training_pipeline.copy('Experiment 4')

# Define the metadata store
metadata_store = MySQLMetadataStore(
    host=MYSQL_HOST,
    port=int(MYSQL_PORT),
    database=MYSQL_DB,
    username=MYSQL_USER,
    password=MYSQL_PWD,
)


# Define the orchestrator backend
orchestrator_backend = OrchestratorGCPBackend(
    cloudsql_connection_name=CONNECTION_NAME,
    project=GCP_PROJECT,
    preemptible=True,  # reduce costs by using preemptible instances
    machine_type='n1-standard-4',
    gpu='nvidia-tesla-k80',
    gpu_count=1,
)


# Run the pipeline
training_pipeline_4.run(
    backend=orchestrator_backend,
    metadata_store=metadata_store,
    artifact_store=artifact_store,
)

2021-04-27 12:22:19,311 — zenml.pipelines.base_pipeline — INFO — Pipeline Experiment 4 created.


Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.contrib.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/__init__.py", line 44, in autodetect
    from . import file_cache
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3

2021-04-27 12:22:20,914 — zenml.backends.orchestrator.gcp.orchestrator_gcp_backend — INFO — Orchestrating pipeline on GCP..
2021-04-27 12:22:23,761 — zenml.backends.orchestrator.gcp.orchestrator_gcp_backend — INFO — Created tar of current repository at: /home/hamza/workspace/maiot/github_temp/zenml/.zenml/zenml_working_1619518940.tar.gz
2021-04-27 12:22:55,459 — zenml.backends.orchestrator.gcp.orchestrator_gcp_backend — INFO — Copied tar to artifact store at: gs://zenmlartifactstore/all_feature_demo/staging/zenml_working_1619518940.tar.gz
2021-04-27 12:22:55,468 — zenml.backends.orchestrator.gcp.orchestrator_gcp_backend — INFO — Removed tar at: /home/hamza/workspace/maiot/github_temp/zenml/.zenml/zenml_working_1619518940.tar.gz
2021-04-27 12:22:55,479 — zenml.backends.orchestrator.gcp.orchestrator_gcp_backend — INFO — Launching instance zenml-1619518939 of type n1-standard-4 in project: core-engine in zone europe-west1-b


Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.contrib.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/__init__.py", line 44, in autodetect
    from . import file_cache
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3

2021-04-27 12:22:58,294 — zenml.backends.orchestrator.gcp.orchestrator_gcp_backend — INFO — Launched instance zenml-1619518939 with ID: 345204829920581871
2021-04-27 12:22:58,297 — zenml.backends.orchestrator.gcp.orchestrator_gcp_backend — INFO — View logs at: https://console.cloud.google.com/logs/query;query=logName%3D%22projects%2Fcore-engine%2Flogs%2Fgcplogs-docker-driver%22%0Aresource.labels.instance_id%3D%22345204829920581871%22?project=core-engine&folder=true&query=%0A


# Add a deployer step with different integrations

## Option 1: Deploy to Google Cloud AI Platform

In [12]:
training_pipeline_5 = training_pipeline.copy('Experiment 5')
training_pipeline_5.add_deployment(
    GCAIPDeployer(
        project_id=GCP_PROJECT,
        model_name=MODEL_NAME,
    )
)

training_pipeline_5.run(artifact_store=artifact_store)

2021-04-27 12:39:52,535 — zenml.pipelines.base_pipeline — INFO — Pipeline Experiment 11 created.
2021-04-27 12:39:52,799 — zenml.backends.training.training_gcaip_backend — INFO — Launching GCAIP Trainer job train_1619513827
2021-04-27 12:39:52,801 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is running.
2021-04-27 12:39:52,848 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataGen is finished.
2021-04-27 12:39:52,852 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is running.
2021-04-27 12:39:52,882 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataSchema is finished.
2021-04-27 12:39:52,884 — zenml.backends.orchestrator.base.zenml_local_orchestrator — INFO — Component ImporterNode.DataStatistics is running.
2021-04-27 12:39:52,918 — zenml.backends.orchestrator.base.zenml_local_orchestrat

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 33, in <module>
    from oauth2client.contrib.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.contrib.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/file_cache.py", line 37, in <module>
    from oauth2client.locked_file import LockedFile
ModuleNotFoundError: No module named 'oauth2client.locked_file'

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3.6/site-packages/googleapiclient/discovery_cache/__init__.py", line 44, in autodetect
    from . import file_cache
  File "/home/hamza/.virtualenvs/zenml_main/lib/python3

HttpError: <HttpError 409 when requesting https://ml.googleapis.com/v1/projects/core-engine/jobs?alt=json returned "Field: job.job_id Error: A job with this id already exists.". Details: "[{'@type': 'type.googleapis.com/google.rpc.BadRequest', 'fieldViolations': [{'field': 'job.job_id', 'description': 'A job with this id already exists.'}]}]">

## Option 2: Deploy to Kubernetes via Cortex

In [None]:
training_pipeline = repo.get_pipeline_by_name('Experiment 1')

In [None]:
training_pipeline_6 = training_pipeline.copy('Experiment 7')

# Add cortex deployer
api_config = {
    "name": MODEL_NAME,
    "kind": "RealtimeAPI",
    "predictor": {
        "type": "tensorflow",
        "models": {"signature_key": "serving_default"}}
}
training_pipeline.add_deployment(
    CortexDeployer(
        env=CORTEX_ENV,
        api_config=api_config,
        predictor=TensorFlowPredictor,
    )
)

training_pipeline_6.run(artifact_store=artifact_store)