2
2
# Copyright (c) Microsoft Corporation. All rights reserved.
3
3
# Licensed under the MIT License.
4
4
# --------------------------------------------------------------------------
5
- from pathlib import Path
6
5
7
6
import pytest
8
- import yaml
9
7
10
8
from olive .auto_optimizer import AutoOptimizer , AutoOptimizerConfig
11
- from olive .auto_optimizer .template_mapping import get_pass_flows_by_accelerator_ep_precision
12
- from olive .evaluator .metric import AccuracySubType
13
- from olive .evaluator .olive_evaluator import OliveEvaluatorConfig
14
- from olive .hardware import DEFAULT_CPU_ACCELERATOR , DEFAULT_GPU_CUDA_ACCELERATOR , DEFAULT_GPU_TRT_ACCELERATOR
9
+ from olive .constants import Precision
10
+ from olive .hardware import DEFAULT_CPU_ACCELERATOR , DEFAULT_GPU_CUDA_ACCELERATOR
15
11
from olive .model import ModelConfig
16
- from test .unit_test .utils import get_accuracy_metric , get_glue_huggingface_data_config
17
12
18
13
# pylint: disable=attribute-defined-outside-init
19
14
20
15
21
16
class TestAutoOptimizer :
22
17
@pytest .fixture (autouse = True )
23
18
def setup (self ):
24
- self .input_model_config = ModelConfig (
19
+ self .model_config = ModelConfig (
25
20
type = "PyTorchModel" ,
26
21
config = {
27
22
"hf_config" : {
@@ -30,116 +25,68 @@ def setup(self):
30
25
}
31
26
},
32
27
)
33
- self .data_configs = [get_glue_huggingface_data_config ()]
34
28
35
29
@pytest .mark .parametrize (
36
- ("accelerator_spec " , "auto_optimizer_config" , "expected_cuda_fp16" , "expected_trt_fp16 " ),
30
+ ("optimizer_config " , "expected_pass_types " ),
37
31
[
38
32
(
39
- # running on gpu-cuda, enable cuda fp16, disable trt fp16
40
- DEFAULT_GPU_CUDA_ACCELERATOR ,
41
- None ,
42
- True ,
43
- False ,
33
+ AutoOptimizerConfig (
34
+ precision = Precision .FP16 ,
35
+ accelerator = DEFAULT_CPU_ACCELERATOR ,
36
+ finetune = False ,
37
+ ),
38
+ {
39
+ "CaptureSplitInfo" ,
40
+ "ExtractAdapters" ,
41
+ "MatMulNBitsToQDQ" ,
42
+ "ModelBuilder" ,
43
+ "OnnxIODataTypeConverter" ,
44
+ "QuaRot" ,
45
+ "SpinQuant" ,
46
+ "SplitModel" ,
47
+ },
44
48
),
45
49
(
46
- # running on gpu-trt, disable cuda fp16, enable trt fp16
47
- DEFAULT_GPU_TRT_ACCELERATOR ,
48
- None ,
49
- False ,
50
- True ,
50
+ AutoOptimizerConfig (
51
+ precision = Precision .FP32 ,
52
+ accelerator = DEFAULT_CPU_ACCELERATOR ,
53
+ finetune = False ,
54
+ ),
55
+ {
56
+ "CaptureSplitInfo" ,
57
+ "ExtractAdapters" ,
58
+ "MatMulNBitsToQDQ" ,
59
+ "ModelBuilder" ,
60
+ "OnnxIODataTypeConverter" ,
61
+ "QuaRot" ,
62
+ "SpinQuant" ,
63
+ "SplitModel" ,
64
+ },
51
65
),
52
- ],
53
- )
54
- def test_regulate_fp16 (self , accelerator_spec , auto_optimizer_config , expected_cuda_fp16 , expected_trt_fp16 ):
55
- metrics = [get_accuracy_metric (AccuracySubType .ACCURACY_SCORE , goal_type = "max-degradation" )]
56
- for metric in metrics :
57
- metric .data_config = self .data_configs [0 ]
58
- evaluator_config = OliveEvaluatorConfig (metrics = metrics )
59
- auto_optimizer = AutoOptimizer (
60
- input_model_config = self .input_model_config ,
61
- evaluator_config = evaluator_config ,
62
- accelerator_spec = accelerator_spec ,
63
- auto_optimizer_config = auto_optimizer_config ,
64
- data_configs = self .data_configs ,
65
- )
66
-
67
- pass_config , _ = auto_optimizer .suggest ()
68
- trans_opt_name = "OrtTransformerOptimization_cuda_fp16" if expected_cuda_fp16 else "OrtTransformersOptimization"
69
- session_params_opt_name = "OrtSessionParamsTuning_trt_fp16" if expected_trt_fp16 else "OrtSessionParamsTuning"
70
- assert pass_config [trans_opt_name ]["config" ]["float16" ] == expected_cuda_fp16
71
- assert pass_config [session_params_opt_name ]["config" ]["enable_cuda_graph" ] == expected_cuda_fp16
72
- assert pass_config [session_params_opt_name ]["config" ]["trt_fp16_enable" ] == expected_trt_fp16
73
-
74
- @pytest .mark .parametrize (
75
- ("metrics_configs" , "accelerator_spec" , "auto_optimizer_config" , "expected_pass_flows" ),
76
- [
77
66
(
78
- [{"args" : [AccuracySubType .ACCURACY_SCORE ], "kwargs" : {"goal_type" : "max-degradation" }}],
79
- DEFAULT_CPU_ACCELERATOR ,
80
- None ,
81
- [
82
- ["OnnxConversion" , "OrtTransformersOptimization" , "OrtSessionParamsTuning" ],
83
- ["OnnxConversion" , "OrtTransformersOptimization" , "OnnxQuantization" , "OrtSessionParamsTuning" ],
84
- ["OnnxConversion" , "OrtTransformersOptimization" , "IncQuantization" , "OrtSessionParamsTuning" ],
85
- ["OnnxConversion" , "OrtTransformersOptimization" , "OnnxMatMul4Quantizer" , "OrtSessionParamsTuning" ],
86
- ["ModelBuilder_fp32" , "OrtSessionParamsTuning" ],
87
- ["ModelBuilder_int4" , "OrtSessionParamsTuning" ],
88
- ["ModelBuilder_int8" , "OrtSessionParamsTuning" ],
89
- ["ModelBuilder_fp16" , "OrtSessionParamsTuning" ],
90
- ],
91
- ),
92
- (
93
- # cannot tolerate accuracy drop, then skip quantization
94
- [
95
- {
96
- "args" : [AccuracySubType .ACCURACY_SCORE ],
97
- "kwargs" : {"goal_type" : "max-degradation" , "goal_value" : 0 },
98
- }
99
- ],
100
- DEFAULT_CPU_ACCELERATOR ,
101
- AutoOptimizerConfig (precisions = ["fp32" ]),
102
- [
103
- ["OnnxConversion" , "OrtTransformersOptimization" , "OrtSessionParamsTuning" ],
104
- ["ModelBuilder_fp32" , "OrtSessionParamsTuning" ],
105
- ],
106
- ),
107
- (
108
- # running on gpu-cuda, skip quantization
109
- [{"args" : [AccuracySubType .ACCURACY_SCORE ], "kwargs" : {"goal_type" : "max-degradation" }}],
110
- DEFAULT_GPU_CUDA_ACCELERATOR ,
111
- AutoOptimizerConfig (precisions = ["fp16" ], excluded_passes = ["ModelBuilder" ]),
112
- [
113
- ["OnnxConversion" , "OrtTransformerOptimization_cuda_fp16" , "OrtSessionParamsTuning" ],
114
- ["OnnxConversion" , "OrtTransformersOptimization" , "OrtMixedPrecision" , "OrtSessionParamsTuning" ],
115
- ],
67
+ AutoOptimizerConfig (
68
+ precision = Precision .FP16 ,
69
+ accelerator = DEFAULT_GPU_CUDA_ACCELERATOR ,
70
+ finetune = False ,
71
+ excluded_passes = ["ModelBuilder" ]
72
+ ),
73
+ {
74
+ "CaptureSplitInfo" ,
75
+ "ExtractAdapters" ,
76
+ "MatMulNBitsToQDQ" ,
77
+ "OnnxIODataTypeConverter" ,
78
+ "QuaRot" ,
79
+ "SpinQuant" ,
80
+ "SplitModel" ,
81
+ },
116
82
),
117
83
],
118
84
)
119
- def test_regulate_pass (self , metrics_configs , accelerator_spec , auto_optimizer_config , expected_pass_flows ):
120
- metrics = [get_accuracy_metric (* mc ["args" ], ** mc ["kwargs" ]) for mc in metrics_configs ]
121
- for metric in metrics :
122
- metric .data_config = self .data_configs [0 ]
123
- evaluator_config = OliveEvaluatorConfig (metrics = metrics )
124
- auto_optimizer = AutoOptimizer (
125
- input_model_config = self .input_model_config ,
126
- evaluator_config = evaluator_config ,
127
- accelerator_spec = accelerator_spec ,
128
- auto_optimizer_config = auto_optimizer_config ,
129
- data_configs = self .data_configs ,
130
- )
131
-
132
- pass_config , pass_flows = auto_optimizer .suggest ()
133
- assert pass_config , "Expect pass_config to be populated by auto optimizer"
134
- assert sorted (pass_flows ) == sorted (expected_pass_flows )
135
-
136
- def test_pass_flows_generation_opt_level_0 (self ):
137
- pass_flows_map = Path (__file__ ).parent / "mock_data" / "available_pass_flows.yaml"
138
- with pass_flows_map .open () as f :
139
- pass_flows_map = yaml .safe_load (f )["mapping" ]
85
+ def test_generate_run_passes_configs (self , optimizer_config , expected_pass_types ):
86
+ auto_optimizer = AutoOptimizer (model_config = self .model_config , optimizer_config = optimizer_config )
87
+ pass_configs = auto_optimizer .generate_run_passes_configs ()
88
+ assert pass_configs , "Expect pass_configs to be populated by auto optimizer"
140
89
141
- for k , pf in pass_flows_map .items ():
142
- k_list = k .split ("_" )
143
- accelerator , ep , precision = k_list [0 ], k_list [1 ], k_list [2 ]
144
- rls_pf = get_pass_flows_by_accelerator_ep_precision (0 , accelerator , ep , precision )
145
- assert sorted (rls_pf ) == sorted (pf )
90
+ actual_pass_types = {pc .type for _ , pcs in pass_configs .items () for pc in pcs }
91
+ expected_pass_types = {pt .lower () for pt in expected_pass_types }
92
+ assert sorted (actual_pass_types ) == sorted (expected_pass_types )
0 commit comments