-
Notifications
You must be signed in to change notification settings - Fork 238
Refined QDQ recipes of BERT/CLIP/VIT for QC and AMD. #1797
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
3.12.9 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
{ | ||
"input_model": { | ||
"type": "PytorchModel", | ||
"model_path": "google-bert/bert-base-multilingual-cased", | ||
"io_config": { | ||
"input_names": [ "input_ids", "attention_mask", "token_type_ids" ], | ||
"input_shapes": [ [ 1, 512 ], [ 1, 512 ], [ 1, 512 ] ], | ||
"input_types": [ "int32", "int32", "int32" ], | ||
"output_names": [ "logits" ] | ||
}, | ||
"model_loader": "load_bert_nsp_model", | ||
"model_script": "google_bert_script.py" | ||
}, | ||
"passes": { | ||
"conversion": { "type": "OnnxConversion", "target_opset": 20, "dynamic": true, "use_dynamo_exporter": false }, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. dynamic and use_dynamo_exporter options are not required |
||
"to_fixed_shape": { | ||
"type": "DynamicToFixedShape", | ||
"dim_param": [ "batch_size", "sequence_length" ], | ||
"dim_value": [ 1, 512 ] | ||
}, | ||
"surgery": { | ||
"type": "GraphSurgeries", | ||
"surgeries": [ | ||
{ "surgeon": "ReplaceAttentionMaskValue", "replacement": -100.0 }, | ||
{ "surgeon": "MatMulAddToGemm" } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this works well for qc. not sure if it's recommended for amd. |
||
] | ||
}, | ||
"transformer_optimizer": { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this might not be required? with opset 20, the model gets exported with layernorm operator and quant_preprocess in onnxstatic quantization performs gelu fusion automatically |
||
"type": "OrtTransformersOptimization", | ||
"model_type": "bert", | ||
"opt_level": 1, | ||
"optimization_options": { | ||
"enable_gelu": true, | ||
"enable_bias_gelu": false, | ||
"enable_layer_norm": true, | ||
"enable_skip_layer_norm": false, | ||
"enable_bias_skip_layer_norm": false, | ||
"enable_attention": false | ||
} | ||
}, | ||
"quantization": { | ||
"type": "OnnxStaticQuantization", | ||
"data_config": "calib_data", | ||
"quant_preprocess": true, | ||
"activation_type": "uint16", | ||
"precision": "uint8" | ||
}, | ||
"addmetadata": { | ||
"type": "VitisAIAddMetaData", | ||
"config_meta_data_keys": [ "architectures", "model_type" ], | ||
"activation_type": "uint16", | ||
"weight_type": "uint8", | ||
"quant_type": "OnnxStaticQuantization" | ||
} | ||
}, | ||
"data_configs": [ | ||
{ | ||
"name": "calib_data", | ||
"type": "HuggingfaceContainer", | ||
"load_dataset_config": { "data_name": "glue", "subset": "mrpc", "split": "train[:12]" }, | ||
"pre_process_data_config": { | ||
"model_name": "google-bert/bert-base-multilingual-cased", | ||
"input_cols": [ "sentence1", "sentence2" ], | ||
"max_length": 512, | ||
"padding": "max_length" | ||
}, | ||
"dataloader_config": { "batch_size": 1 } | ||
}, | ||
{ | ||
"name": "wiki_data", | ||
"type": "HuggingfaceContainer", | ||
"load_dataset_config": { | ||
"type": "dataset_to_nsp_dataset", | ||
"data_name": "wikitext", | ||
"subset": "wikitext-2-raw-v1", | ||
"split": "test", | ||
"input_cols": [ "sentence1", "sentence2" ], | ||
"label_col": "label" | ||
}, | ||
"pre_process_data_config": { | ||
"model_name": "google-bert/bert-base-multilingual-cased", | ||
"input_cols": [ "sentence1", "sentence2" ], | ||
"label_col": "label", | ||
"max_length": 512, | ||
"padding": "max_length" | ||
}, | ||
"post_process_data_config": { "type": "bert_scl_post_process" }, | ||
"dataloader_config": { "batch_size": 1 }, | ||
"user_script": "google_bert_script.py", | ||
"script_dir": "." | ||
} | ||
], | ||
"evaluators": { | ||
"nsp_evaluator": { | ||
"metrics": [ | ||
{ | ||
"name": "nsp", | ||
"type": "accuracy", | ||
"backend": "huggingface_metrics", | ||
"data_config": "wiki_data", | ||
"sub_types": [ { "name": "accuracy", "priority": 1 }, { "name": "f1" } ] | ||
}, | ||
{ "name": "latency", "type": "latency", "sub_types": [ { "name": "avg" } ] } | ||
] | ||
}, | ||
"performance": { | ||
"metrics": [ | ||
{ | ||
"name": "latency", | ||
"type": "latency", | ||
"sub_types": [ | ||
{ "name": "avg", "priority": 1, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }, | ||
{ "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } } | ||
] | ||
} | ||
] | ||
} | ||
}, | ||
"clean_cache": true, | ||
"clean_evaluation_cache": true, | ||
"evaluate_input_model": false, | ||
"output_dir": "models/google/bert_base_multilingual_cased" | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
{ | ||
"input_model": { | ||
"type": "HfModel", | ||
"model_path": "google-bert/bert-large-cased-whole-word-masking-finetuned-squad", | ||
"task": "question-answering", | ||
"io_config": { | ||
"input_names": [ "input_ids", "attention_mask" ], | ||
"input_shapes": [ [ 1, 512 ], [ 1, 512 ] ], | ||
"input_types": [ "int32", "int32" ], | ||
"output_names": [ "start_logits", "end_logits" ] | ||
} | ||
}, | ||
"passes": { | ||
"conversion": { "type": "OnnxConversion", "target_opset": 20, "dynamic": true, "use_dynamo_exporter": false }, | ||
"to_fixed_shape": { | ||
"type": "DynamicToFixedShape", | ||
"dim_param": [ "batch_size", "sequence_length" ], | ||
"dim_value": [ 1, 512 ] | ||
}, | ||
"surgery": { | ||
"type": "GraphSurgeries", | ||
"surgeries": [ | ||
{ "surgeon": "ReplaceAttentionMaskValue", "replacement": -100.0 }, | ||
{ "surgeon": "MatMulAddToGemm" } | ||
] | ||
}, | ||
"transformer_optimizer": { | ||
"type": "OrtTransformersOptimization", | ||
"model_type": "bert", | ||
"opt_level": 1, | ||
"optimization_options": { | ||
"enable_gelu": true, | ||
"enable_bias_gelu": false, | ||
"enable_layer_norm": true, | ||
"enable_skip_layer_norm": false, | ||
"enable_bias_skip_layer_norm": false, | ||
"enable_attention": false | ||
} | ||
}, | ||
"quantization": { | ||
"type": "OnnxStaticQuantization", | ||
"data_config": "calib_data", | ||
"quant_preprocess": true, | ||
"activation_type": "uint16", | ||
"precision": "uint8" | ||
}, | ||
"addmetadata": { | ||
"type": "VitisAIAddMetaData", | ||
"config_meta_data_keys": [ "architectures", "model_type" ], | ||
"activation_type": "uint16", | ||
"weight_type": "uint8", | ||
"quant_type": "OnnxStaticQuantization" | ||
} | ||
}, | ||
"data_configs": [ | ||
{ | ||
"name": "calib_data", | ||
"type": "HuggingfaceContainer", | ||
"load_dataset_config": { "data_name": "squad", "split": "train[:12]" }, | ||
"pre_process_data_config": { | ||
"input_cols": [ "question", "context" ], | ||
"label_col": "id", | ||
"padding": "max_length", | ||
"max_length": 512 | ||
}, | ||
"dataloader_config": { "batch_size": 1 }, | ||
"user_script": "google_bert_script.py" | ||
} | ||
], | ||
"evaluators": { | ||
"squad_evaluator": { | ||
"metrics": [ | ||
{ | ||
"name": "squad", | ||
"type": "custom", | ||
"sub_types": [ | ||
{ "name": "exact_match", "priority": 1, "higher_is_better": true }, | ||
{ "name": "f1", "higher_is_better": true } | ||
], | ||
"user_config": { | ||
"evaluate_func": "eval_squad", | ||
"evaluate_func_kwargs": { | ||
"model_name": "google-bert/bert-large-cased-whole-word-masking-finetuned-squad", | ||
"dataset_config": { "data_name": "squad", "split": "validation" } | ||
}, | ||
"user_script": "google_bert_script.py" | ||
} | ||
}, | ||
{ "name": "latency", "type": "latency", "sub_types": [ { "name": "avg" } ] } | ||
] | ||
}, | ||
"performance": { | ||
"metrics": [ | ||
{ | ||
"name": "latency", | ||
"type": "latency", | ||
"sub_types": [ | ||
{ "name": "avg", "priority": 1, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }, | ||
{ "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } } | ||
] | ||
} | ||
] | ||
} | ||
}, | ||
"clean_cache": true, | ||
"clean_evaluation_cache": true, | ||
"evaluate_input_model": false, | ||
"output_dir": "models/google/bert_large_cased_qa" | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
since these are all new files, are still keeping the old qdq config jsons?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bumping this again. Are we keeping both the old and new qdq configs?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tezheng
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We have already released parts of these in AITK and I created a pr #1893 to align the recipes. It is part of this pr: CLIP text + vision, bert and vit qdq.
Please take a look @jambayk CC @tezheng
I updated the previous qdq configs directly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For others, we could take time to merge if feel needed