In [None]:
import os
from datetime import datetime as dt
from typing import Optional

import click
from zenml.client import Client
from zenml.enums import ModelStages
from zenml.logger import get_logger
from zenml.model import ModelConfig

from pipelines import (
    sentinment_analysis_deploy_pipeline,
    sentinment_analysis_feature_engineering_pipeline,
    sentinment_analysis_promote_pipeline,
    sentinment_analysis_training_pipeline,
)

logger = get_logger(__name__)



   

In [4]:

from zenml.client import Client

artifact = Client().get_artifact('57be5ee8-5580-4821-801e-832e2a719dd1')
loaded_artifact = artifact.load()



In [34]:
import pandas as pd

pd.DataFrame({'label': loaded_artifact['train']['label'], 'text': loaded_artifact['train']['text']})

Unnamed: 0,label,text
0,0,@USAirways Hooray! I've now been on hold for o...
1,0,@VirginAmerica Is there anything going on with...
2,0,@united kind of unnerving to watch the guy dei...
3,1,@United is the best way to re-unite me with my...
4,0,@united education of that staff is needed - he...
...,...,...
1610,0,@united One of your workers refused to give me...
1611,1,@USAirways thanks for giving away my seat. Ano...
1612,0,@VirginAmerica status match program. I applie...
1613,0,Made it to #Costa #Rica and back @JetBlue Miss...


In [None]:
no_cache: bool = True
num_epochs: int = 3
train_batch_size: int = 8
eval_batch_size: int = 8
learning_rate: float = 2e-5
weight_decay: float = 0.01
max_seq_length: int = 512
dataset_artifact_id: Optional[str] = None
tokenizer_artifact_id: Optional[str] = None
dataset_name: str = "tokenized_dataset"
dataset_version_name: Optional[str] = None
feature_pipeline: bool = False
training_pipeline: bool = False
promoting_pipeline: bool = False
deploying_pipeline: bool = False
zenml_model_name: str = "distil_bert_sentiment_analysis"

In [None]:
# Run a pipeline with the required parameters. This executes
# all steps in the pipeline in the correct order using the orchestrator
# stack component that is configured in your active ZenML stack.
config_folder = os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "configs",
)
model_config = ModelConfig(
    name=zenml_model_name,
    license="Apache 2.0",
    description="Show case Model Control Plane.",
    create_new_model_version=True,
    delete_new_version_on_failure=True,
    tags=["sentiment_analysis", "huggingface"],
)

pipeline_args = {}

if no_cache:
    pipeline_args["enable_cache"] = False

# Execute Feature Engineering Pipeline
if feature_pipeline:
    pipeline_args["model_config"] = model_config
    pipeline_args["config_path"] = os.path.join(config_folder, "feature_engineering_config.yaml")
    run_args_feature = {
        "max_seq_length": max_seq_length,
    }
    pipeline_args[
        "run_name"
    ] = f"sentinment_analysis_feature_engineering_pipeline_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
    sentinment_analysis_feature_engineering_pipeline.with_options(**pipeline_args)(
        **run_args_feature
    )
    logger.info("Feature Engineering pipeline finished successfully!")

# Execute Training Pipeline
if training_pipeline:
    pipeline_args["config_path"] = os.path.join(config_folder, "trainer_config.yaml")

    run_args_train = {
        "num_epochs": num_epochs,
        "train_batch_size": train_batch_size,
        "eval_batch_size": eval_batch_size,
        "learning_rate": learning_rate,
        "weight_decay": weight_decay,
        "max_seq_length": max_seq_length,
        "dataset_artifact_id": dataset_artifact_id,
        "tokenizer_artifact_id": tokenizer_artifact_id,
    }

    # If dataset_version_name is specified, use versioned artifacts
    if dataset_version_name:
        client = Client()
        tokenized_dataset_artifact = client.get_artifact(
            dataset_name, dataset_version_name
        )
        # base tokenizer is always the same version
        # as the dataset version
        tokenized_tokenizer_artifact = client.get_artifact(
            "base_tokenizer", dataset_version_name
        )
        # Use versioned artifacts
        run_args_train["dataset_artifact_id"] = tokenized_dataset_artifact.id
        run_args_train["tokenizer_artifact_id"] = tokenized_tokenizer_artifact.id

    pipeline_args["model_config"] = model_config

    pipeline_args[
        "run_name"
    ] = f"sentinment_analysis_training_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"

    sentinment_analysis_training_pipeline.with_options(**pipeline_args)(
        **run_args_train
    )
    logger.info("Training pipeline finished successfully!")

# Execute Promoting Pipeline
if promoting_pipeline:
    run_args_promoting = {}
    model_config = ModelConfig(name=zenml_model_name)
    pipeline_args["config_path"] = os.path.join(config_folder, "promoting_config.yaml")

    pipeline_args["model_config"] = model_config

    pipeline_args[
        "run_name"
    ] = f"sentinment_analysis_promoting_pipeline_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
    sentinment_analysis_promote_pipeline.with_options(**pipeline_args)(
        **run_args_promoting
    )
    logger.info("Promoting pipeline finished successfully!")

if deploying_pipeline:
    pipeline_args["config_path"] = os.path.join(config_folder, "deploying_config.yaml")

    # Deploying pipeline has new ZenML model config
    model_config = ModelConfig(
        name=zenml_model_name,
        version=ModelStages.PRODUCTION,
    )
    pipeline_args["model_config"] = model_config
    pipeline_args["enable_cache"] = False
    run_args_deploying = {}
    pipeline_args[
        "run_name"
    ] = f"sentinment_analysis_deploy_pipeline_run_{dt.now().strftime('%Y_%m_%d_%H_%M_%S')}"
    sentinment_analysis_deploy_pipeline.with_options(**pipeline_args)(
        **run_args_deploying
    )
    logger.info("Deploying pipeline finished successfully!")
