In [None]:
import json
from pydantic.schema import schema
from olive.workflows.run.run import RunConfig
run_config_schema = schema([RunConfig])
json.dump(run_config_schema, open("run_config_schema.json", "w"), indent=2)

This tutorial is used to demonstrate how to use the Olive to optimize the BERT model one-by-one with Olive API.
## torch model evaluation

In [16]:
# new olive engine
from olive.engine import Engine
engine = Engine(execution_providers=["CUDAExecutionProvider"])
engine.initialize()

# wrapper model with olive
from olive.model import PyTorchModel
olive_model = PyTorchModel(hf_config={
    "model_name": "Intel/bert-base-uncased-mrpc",
    "task": "text-classification",
    "dataset": {
        "data_name":"glue",
        "subset": "mrpc",
        "split": "validation",
        "input_cols": ["sentence1", "sentence2"],
        "label_cols": ["label"],
        "batch_size": 1
    }
})
#[!] duplicated data configs

In [14]:
from olive.data.template import huggingface_data_config_template
evaluation_config = {
    "metrics":[
        {
            "name": "accuracy",
            "type": "accuracy",
            "backend": "huggingface_metrics",
            "sub_types": [
                {"name": "accuracy", "priority": 1, "goal": {"type": "max-degradation", "value": 0.01}},
                {"name": "f1"}
            ],
            "data_config": huggingface_data_config_template(
                model_name="bert-base-uncased",
                task="text-classification",
                **{
                    "data_name":"glue",
                    "subset": "mrpc",
                    "split": "validation",
                    "input_cols": ["sentence1", "sentence2"],
                    "label_cols": ["label"],
                    "batch_size": 1
                }
            )
        },
        {
            "name": "latency",
            "type": "latency",
            "sub_types": [
                {"name": "avg", "priority": 2, "goal": {"type": "percent-min-improvement", "value": 20}},
                {"name": "max"},
                {"name": "min"}
            ],
            "data_config": huggingface_data_config_template(
                model_name="bert-base-uncased",
                task="text-classification",
                **{
                    "data_name":"glue",
                    "subset": "mrpc",
                    "split": "validation",
                    "input_cols": ["sentence1", "sentence2"],
                    "label_cols": ["label"],
                    "batch_size": 1
                }
            )
        }
    ]
}

from olive.hardware.accelerator import DEFAULT_CPU_ACCELERATOR
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
evaluation_config = OliveEvaluatorConfig.parse_obj(evaluation_config)
torch_result = engine._evaluate_model(
    olive_model,
    model_id=engine._init_input_model(olive_model),
    data_root=None,
    evaluator_config=evaluation_config,
    accelerator_spec=DEFAULT_CPU_ACCELERATOR,
)

In [15]:
print(torch_result)

{'accuracy-accuracy': 0.8602941176470589, 'accuracy-f1': 0.9042016806722689, 'latency-avg': 42.24228, 'latency-max': 43.59108, 'latency-min': 41.78174}


## Convert torch model to onnx

In [27]:
from olive.passes import OnnxConversion
from olive.hardware.accelerator import DEFAULT_GPU_CUDA_ACCELERATOR
output_model_1 = OnnxConversion(
    DEFAULT_GPU_CUDA_ACCELERATOR,
    config={
        "target_opset": 13,
        "user_script": None,
        "script_dir": None,
    }
).run(
    olive_model, 
    data_root=None,
    output_model_path="./output_models/",
    point=None
)
#[!] for the pass which requires user_script, we must provide the script_dir and user_script

In [1]:
from olive.passes import OrtTransformersOptimization
output_model_2 = OrtTransformersOptimization(
    DEFAULT_GPU_CUDA_ACCELERATOR,
    config={
        "model_type": "bert",
        "num_heads": 12,
        "hidden_size": 768,
        "float16": True,
        "use_gpu": True,
        "opt_level":99,
    }
).run(output_model_1, None, "./output_models/trans_opt/", None)
#[!] can not run search along with the passes
#[!] cannot leverage the cache mechanism
#[!] cannot access the footprint of the model

2023-08-21 13:15:19.223388: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Packaging the model