Skip to content

Commit 4b48cf0

Browse files
committed
Update based on review feedback
1 parent d1b826f commit 4b48cf0

File tree

10 files changed

+294
-317
lines changed

10 files changed

+294
-317
lines changed

examples/llama2/llama2_auto_opt.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
},
7272
"auto_optimizer_config": {
7373
"precision": "fp32",
74+
"use_model_builder": true,
7475
"train_data_config": "wikitext2_train",
7576
"calibration_data_config": "transformer_token_dummy_data",
7677
"accelerator": { "accelerator_type": "cpu", "execution_provider": "CPUExecutionProvider" },

olive/auto_optimizer/auto_optimizer.py

Lines changed: 180 additions & 176 deletions
Large diffs are not rendered by default.

olive/cli/auto_opt.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ def register_subcommand(parser: ArgumentParser):
157157
nargs="*",
158158
default=None,
159159
help=(
160-
"Dictionary of name to precision. Has to be even number of entreis with even "
160+
"Dictionary of name to precision. Has to be even number of entries with even "
161161
"entries being the keys and odd entries being the values. "
162162
'Required only when output precision is "fp16" and MixedPrecisionOverrides pass is enabled.'
163163
),
@@ -167,6 +167,10 @@ def register_subcommand(parser: ArgumentParser):
167167
"--use_ort_genai", action="store_true", help="Use OnnxRuntime generate() API to run the model"
168168
)
169169

170+
sub_parser.add_argument(
171+
"--surgeries", type=str, nargs="*", default=None, help="List of graph surgeries to apply."
172+
)
173+
170174
add_search_options(sub_parser)
171175
add_remote_options(sub_parser)
172176
add_shared_cache_options(sub_parser)
@@ -293,6 +297,7 @@ def _get_passes_config(self, config: dict[str, Any], olive_config: OlivePackageC
293297
(("to_fixed_shape", "dim_param"), self.args.dynamic_to_fixed_shape_dim_param),
294298
(("to_fixed_shape", "dim_value"), self.args.dynamic_to_fixed_shape_dim_value),
295299
(("mixed_precision_overrides", "overrides_config"), mixed_precision_overrides_config),
300+
(("surgeries", "surgeries"), [{"surgeon": surgeon} for surgeon in self.args.surgeries]),
296301
]
297302
for keys, value in to_replace:
298303
if value is not None:
@@ -343,6 +348,10 @@ def _get_passes_config(self, config: dict[str, Any], olive_config: OlivePackageC
343348
# Remove QDQ encoding pass if not required
344349
passes_to_remove.add("mnb_to_qdq")
345350

351+
if not self.args.surgeries:
352+
# Remove surgeon if user hasn't provided any surgeries
353+
passes_to_remove.add("surgeries")
354+
346355
# remove passes that are incompatible with the selected precision, provider, or device
347356
for pass_name in list(passes_config.keys()):
348357
pass_run_config = passes_config[pass_name]
@@ -439,6 +448,7 @@ def _get_passes_config(self, config: dict[str, Any], olive_config: OlivePackageC
439448
("bnb4", {"type": "OnnxBnb4Quantization", "precision": Precision.NF4}),
440449
# post processing passes
441450
("mnb_to_qdq", {"type": "MatMulNBitsToQDQ"}),
451+
("surgeries", {"type": "GraphSurgeries", "surgeries": {}}),
442452
("split_model", {"type": "SplitModel"}),
443453
("extract_adapters", {"type": "ExtractAdapters"}),
444454
]

olive/olive_config.json

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -608,26 +608,5 @@
608608
"tf": [ "tensorflow==1.15.0" ],
609609
"torch-tensorrt": [ "torch-tensorrt" ],
610610
"tune-session-params": [ "psutil" ]
611-
},
612-
"auto_opt_passes": {
613-
"capture": [ "CaptureSplitInfo" ],
614-
"pt_finetune": [ "DoRA", "LoftQ", "LoHa", "LoKr", "LoRA", "QLoRA" ],
615-
"pt_quantize": [ "QuaRot", "SpinQuant", "AutoAWQQuantizer", "GptqQuantizer" ],
616-
"conversion": [ "OnnxConversion", "ModelBuilder" ],
617-
"peephole": [ "OnnxPeepholeOptimizer" ],
618-
"transformers": [ "OrtTransformersOptimization" ],
619-
"io_converter": [ "OnnxIODataTypeConverter" ],
620-
"prepare_qnn": [ "DynamicToFixedShape", "QNNPreprocess", "MixedPrecisionOverrides" ],
621-
"onnx_quantize": [
622-
"OnnxBnb4Quantization",
623-
"OnnxMatMul4Quantizer",
624-
"OnnxDynamicQuantization",
625-
"OnnxStaticQuantization",
626-
"NVModelOptQuantization",
627-
"IncDynamicQuantization",
628-
"IncStaticQuantization"
629-
],
630-
"onnx_finetune": [ "OrtSessionParamsTuning" ],
631-
"post_process": [ "MatMulNBitsToQDQ", "SplitModel", "ExtractAdapters" ]
632611
}
633612
}

olive/package_config.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,19 @@ def get_pass_module_config(self, pass_type: str) -> PassModuleConfig:
6464
def is_onnx_module(self, pass_type: str) -> bool:
6565
pass_module = self.get_pass_module_config(pass_type)
6666
return pass_module.module_path.startswith("olive.passes.onnx")
67+
68+
def is_openvino_module(self, pass_type: str) -> bool:
69+
pass_module = self.get_pass_module_config(pass_type)
70+
return pass_module.module_path.startswith("olive.passes.openvino")
71+
72+
def is_pytorch_module(self, pass_type: str) -> bool:
73+
pass_module = self.get_pass_module_config(pass_type)
74+
return pass_module.module_path.startswith("olive.passes.pytorch")
75+
76+
def is_qnn_module(self, pass_type: str) -> bool:
77+
pass_module = self.get_pass_module_config(pass_type)
78+
return pass_module.module_path.startswith("olive.passes.qnn")
79+
80+
def is_snpe_module(self, pass_type: str) -> bool:
81+
pass_module = self.get_pass_module_config(pass_type)
82+
return pass_module.module_path.startswith("olive.passes.snpe")

olive/passes/olive_pass.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -194,11 +194,7 @@ def default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConf
194194
return config
195195

196196
@classmethod
197-
def validate_config(
198-
cls,
199-
config: type[BasePassConfig],
200-
accelerator_spec: AcceleratorSpec,
201-
) -> bool:
197+
def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
202198
"""Validate the input config for the pass."""
203199
return True
204200

olive/passes/onnx/io_datatype_converter.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,19 @@ def _verify_elem_type(self, elem_type):
138138
"for details."
139139
)
140140

141+
@classmethod
142+
def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
143+
if not super().validate_config(config, accelerator_spec):
144+
return False
145+
146+
if config.target_dtype == onnx.TensorProto.FLOAT16 and (
147+
accelerator_spec.execution_provider in {"JsExecutionProvider", "WebGpuExecutionProvider"}
148+
):
149+
logger.info("Web execution providers don't support fp16.")
150+
return False
151+
152+
return True
153+
141154
def _run_for_config(
142155
self, model: ONNXModelHandler, config: type[BasePassConfig], output_model_path: str
143156
) -> ONNXModelHandler:

olive/passes/onnx/peephole_optimizer.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,17 @@ class OnnxPeepholeOptimizer(Pass):
266266
def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassConfigParam]:
267267
return get_external_data_config()
268268

269+
@classmethod
270+
def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
271+
if not super().validate_config(config, accelerator_spec):
272+
return False
273+
274+
if accelerator_spec.execution_provider == "QNNExecutionProvider":
275+
logger.info("QNNExecutionProvider doesn't support optimized model.")
276+
return False
277+
278+
return True
279+
269280
def _run_for_config(
270281
self, model: ONNXModelHandler, config: type[BasePassConfig], output_model_path: str
271282
) -> ONNXModelHandler:

olive/passes/onnx/transformer_optimization.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -136,11 +136,7 @@ def _default_config(cls, accelerator_spec: AcceleratorSpec) -> dict[str, PassCon
136136
return config
137137

138138
@classmethod
139-
def validate_config(
140-
cls,
141-
config: type[BasePassConfig],
142-
accelerator_spec: AcceleratorSpec,
143-
) -> bool:
139+
def validate_config(cls, config: type[BasePassConfig], accelerator_spec: AcceleratorSpec) -> bool:
144140
if not super().validate_config(config, accelerator_spec):
145141
return False
146142

@@ -157,6 +153,10 @@ def validate_config(
157153
if accelerator_spec.execution_provider == "CPUExecutionProvider":
158154
logger.info("CPUExecutionProvider does not support float16 very well, please avoid to use float16.")
159155
return False
156+
157+
if accelerator_spec.execution_provider == "QNNExecutionProvider":
158+
logger.info("QNNExecutionProvider doesn't support optimized model.")
159+
return False
160160
if not config.float16 and config.use_gqa:
161161
logger.info("use_gqa is only supported when float16 is True.")
162162
return False

test/unit_test/auto_optimizer/test_auto_optimizer.py

Lines changed: 56 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,21 @@
22
# Copyright (c) Microsoft Corporation. All rights reserved.
33
# Licensed under the MIT License.
44
# --------------------------------------------------------------------------
5-
from pathlib import Path
65

76
import pytest
8-
import yaml
97

108
from olive.auto_optimizer import AutoOptimizer, AutoOptimizerConfig
11-
from olive.auto_optimizer.template_mapping import get_pass_flows_by_accelerator_ep_precision
12-
from olive.evaluator.metric import AccuracySubType
13-
from olive.evaluator.olive_evaluator import OliveEvaluatorConfig
14-
from olive.hardware import DEFAULT_CPU_ACCELERATOR, DEFAULT_GPU_CUDA_ACCELERATOR, DEFAULT_GPU_TRT_ACCELERATOR
9+
from olive.constants import Precision
10+
from olive.hardware import DEFAULT_CPU_ACCELERATOR, DEFAULT_GPU_CUDA_ACCELERATOR
1511
from olive.model import ModelConfig
16-
from test.unit_test.utils import get_accuracy_metric, get_glue_huggingface_data_config
1712

1813
# pylint: disable=attribute-defined-outside-init
1914

2015

2116
class TestAutoOptimizer:
2217
@pytest.fixture(autouse=True)
2318
def setup(self):
24-
self.input_model_config = ModelConfig(
19+
self.model_config = ModelConfig(
2520
type="PyTorchModel",
2621
config={
2722
"hf_config": {
@@ -30,116 +25,68 @@ def setup(self):
3025
}
3126
},
3227
)
33-
self.data_configs = [get_glue_huggingface_data_config()]
3428

3529
@pytest.mark.parametrize(
36-
("accelerator_spec", "auto_optimizer_config", "expected_cuda_fp16", "expected_trt_fp16"),
30+
("optimizer_config", "expected_pass_types"),
3731
[
3832
(
39-
# running on gpu-cuda, enable cuda fp16, disable trt fp16
40-
DEFAULT_GPU_CUDA_ACCELERATOR,
41-
None,
42-
True,
43-
False,
33+
AutoOptimizerConfig(
34+
precision=Precision.FP16,
35+
accelerator=DEFAULT_CPU_ACCELERATOR,
36+
finetune=False,
37+
),
38+
{
39+
"CaptureSplitInfo",
40+
"ExtractAdapters",
41+
"MatMulNBitsToQDQ",
42+
"ModelBuilder",
43+
"OnnxIODataTypeConverter",
44+
"QuaRot",
45+
"SpinQuant",
46+
"SplitModel",
47+
},
4448
),
4549
(
46-
# running on gpu-trt, disable cuda fp16, enable trt fp16
47-
DEFAULT_GPU_TRT_ACCELERATOR,
48-
None,
49-
False,
50-
True,
50+
AutoOptimizerConfig(
51+
precision=Precision.FP32,
52+
accelerator=DEFAULT_CPU_ACCELERATOR,
53+
finetune=False,
54+
),
55+
{
56+
"CaptureSplitInfo",
57+
"ExtractAdapters",
58+
"MatMulNBitsToQDQ",
59+
"ModelBuilder",
60+
"OnnxIODataTypeConverter",
61+
"QuaRot",
62+
"SpinQuant",
63+
"SplitModel",
64+
},
5165
),
52-
],
53-
)
54-
def test_regulate_fp16(self, accelerator_spec, auto_optimizer_config, expected_cuda_fp16, expected_trt_fp16):
55-
metrics = [get_accuracy_metric(AccuracySubType.ACCURACY_SCORE, goal_type="max-degradation")]
56-
for metric in metrics:
57-
metric.data_config = self.data_configs[0]
58-
evaluator_config = OliveEvaluatorConfig(metrics=metrics)
59-
auto_optimizer = AutoOptimizer(
60-
input_model_config=self.input_model_config,
61-
evaluator_config=evaluator_config,
62-
accelerator_spec=accelerator_spec,
63-
auto_optimizer_config=auto_optimizer_config,
64-
data_configs=self.data_configs,
65-
)
66-
67-
pass_config, _ = auto_optimizer.suggest()
68-
trans_opt_name = "OrtTransformerOptimization_cuda_fp16" if expected_cuda_fp16 else "OrtTransformersOptimization"
69-
session_params_opt_name = "OrtSessionParamsTuning_trt_fp16" if expected_trt_fp16 else "OrtSessionParamsTuning"
70-
assert pass_config[trans_opt_name]["config"]["float16"] == expected_cuda_fp16
71-
assert pass_config[session_params_opt_name]["config"]["enable_cuda_graph"] == expected_cuda_fp16
72-
assert pass_config[session_params_opt_name]["config"]["trt_fp16_enable"] == expected_trt_fp16
73-
74-
@pytest.mark.parametrize(
75-
("metrics_configs", "accelerator_spec", "auto_optimizer_config", "expected_pass_flows"),
76-
[
7766
(
78-
[{"args": [AccuracySubType.ACCURACY_SCORE], "kwargs": {"goal_type": "max-degradation"}}],
79-
DEFAULT_CPU_ACCELERATOR,
80-
None,
81-
[
82-
["OnnxConversion", "OrtTransformersOptimization", "OrtSessionParamsTuning"],
83-
["OnnxConversion", "OrtTransformersOptimization", "OnnxQuantization", "OrtSessionParamsTuning"],
84-
["OnnxConversion", "OrtTransformersOptimization", "IncQuantization", "OrtSessionParamsTuning"],
85-
["OnnxConversion", "OrtTransformersOptimization", "OnnxMatMul4Quantizer", "OrtSessionParamsTuning"],
86-
["ModelBuilder_fp32", "OrtSessionParamsTuning"],
87-
["ModelBuilder_int4", "OrtSessionParamsTuning"],
88-
["ModelBuilder_int8", "OrtSessionParamsTuning"],
89-
["ModelBuilder_fp16", "OrtSessionParamsTuning"],
90-
],
91-
),
92-
(
93-
# cannot tolerate accuracy drop, then skip quantization
94-
[
95-
{
96-
"args": [AccuracySubType.ACCURACY_SCORE],
97-
"kwargs": {"goal_type": "max-degradation", "goal_value": 0},
98-
}
99-
],
100-
DEFAULT_CPU_ACCELERATOR,
101-
AutoOptimizerConfig(precisions=["fp32"]),
102-
[
103-
["OnnxConversion", "OrtTransformersOptimization", "OrtSessionParamsTuning"],
104-
["ModelBuilder_fp32", "OrtSessionParamsTuning"],
105-
],
106-
),
107-
(
108-
# running on gpu-cuda, skip quantization
109-
[{"args": [AccuracySubType.ACCURACY_SCORE], "kwargs": {"goal_type": "max-degradation"}}],
110-
DEFAULT_GPU_CUDA_ACCELERATOR,
111-
AutoOptimizerConfig(precisions=["fp16"], excluded_passes=["ModelBuilder"]),
112-
[
113-
["OnnxConversion", "OrtTransformerOptimization_cuda_fp16", "OrtSessionParamsTuning"],
114-
["OnnxConversion", "OrtTransformersOptimization", "OrtMixedPrecision", "OrtSessionParamsTuning"],
115-
],
67+
AutoOptimizerConfig(
68+
precision=Precision.FP16,
69+
accelerator=DEFAULT_GPU_CUDA_ACCELERATOR,
70+
finetune=False,
71+
excluded_passes=["ModelBuilder"]
72+
),
73+
{
74+
"CaptureSplitInfo",
75+
"ExtractAdapters",
76+
"MatMulNBitsToQDQ",
77+
"OnnxIODataTypeConverter",
78+
"QuaRot",
79+
"SpinQuant",
80+
"SplitModel",
81+
},
11682
),
11783
],
11884
)
119-
def test_regulate_pass(self, metrics_configs, accelerator_spec, auto_optimizer_config, expected_pass_flows):
120-
metrics = [get_accuracy_metric(*mc["args"], **mc["kwargs"]) for mc in metrics_configs]
121-
for metric in metrics:
122-
metric.data_config = self.data_configs[0]
123-
evaluator_config = OliveEvaluatorConfig(metrics=metrics)
124-
auto_optimizer = AutoOptimizer(
125-
input_model_config=self.input_model_config,
126-
evaluator_config=evaluator_config,
127-
accelerator_spec=accelerator_spec,
128-
auto_optimizer_config=auto_optimizer_config,
129-
data_configs=self.data_configs,
130-
)
131-
132-
pass_config, pass_flows = auto_optimizer.suggest()
133-
assert pass_config, "Expect pass_config to be populated by auto optimizer"
134-
assert sorted(pass_flows) == sorted(expected_pass_flows)
135-
136-
def test_pass_flows_generation_opt_level_0(self):
137-
pass_flows_map = Path(__file__).parent / "mock_data" / "available_pass_flows.yaml"
138-
with pass_flows_map.open() as f:
139-
pass_flows_map = yaml.safe_load(f)["mapping"]
85+
def test_generate_run_passes_configs(self, optimizer_config, expected_pass_types):
86+
auto_optimizer = AutoOptimizer(model_config=self.model_config, optimizer_config=optimizer_config)
87+
pass_configs = auto_optimizer.generate_run_passes_configs()
88+
assert pass_configs, "Expect pass_configs to be populated by auto optimizer"
14089

141-
for k, pf in pass_flows_map.items():
142-
k_list = k.split("_")
143-
accelerator, ep, precision = k_list[0], k_list[1], k_list[2]
144-
rls_pf = get_pass_flows_by_accelerator_ep_precision(0, accelerator, ep, precision)
145-
assert sorted(rls_pf) == sorted(pf)
90+
actual_pass_types = {pc.type for _, pcs in pass_configs.items() for pc in pcs}
91+
expected_pass_types = {pt.lower() for pt in expected_pass_types}
92+
assert sorted(actual_pass_types) == sorted(expected_pass_types)

0 commit comments

Comments
 (0)