Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/bert/qdq/.python-version
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3.12.9
123 changes: 123 additions & 0 deletions examples/bert/qdq/google_bert_base_qdq.json
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since these are all new files, are still keeping the old qdq config jsons?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bumping this again. Are we keeping both the old and new qdq configs?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have already released parts of these in AITK and I created a pr #1893 to align the recipes. It is part of this pr: CLIP text + vision, bert and vit qdq.
Please take a look @jambayk CC @tezheng

I updated the previous qdq configs directly

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For others, we could take time to merge if feel needed

Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
{
"input_model": {
"type": "PytorchModel",
"model_path": "google-bert/bert-base-multilingual-cased",
"io_config": {
"input_names": [ "input_ids", "attention_mask", "token_type_ids" ],
"input_shapes": [ [ 1, 512 ], [ 1, 512 ], [ 1, 512 ] ],
"input_types": [ "int32", "int32", "int32" ],
"output_names": [ "logits" ]
},
"model_loader": "load_bert_nsp_model",
"model_script": "google_bert_script.py"
},
"passes": {
"conversion": { "type": "OnnxConversion", "target_opset": 20, "dynamic": true, "use_dynamo_exporter": false },
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dynamic and use_dynamo_exporter options are not required

"to_fixed_shape": {
"type": "DynamicToFixedShape",
"dim_param": [ "batch_size", "sequence_length" ],
"dim_value": [ 1, 512 ]
},
"surgery": {
"type": "GraphSurgeries",
"surgeries": [
{ "surgeon": "ReplaceAttentionMaskValue", "replacement": -100.0 },
{ "surgeon": "MatMulAddToGemm" }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this works well for qc. not sure if it's recommended for amd.

]
},
"transformer_optimizer": {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this might not be required? with opset 20, the model gets exported with layernorm operator and quant_preprocess in onnxstatic quantization performs gelu fusion automatically

"type": "OrtTransformersOptimization",
"model_type": "bert",
"opt_level": 1,
"optimization_options": {
"enable_gelu": true,
"enable_bias_gelu": false,
"enable_layer_norm": true,
"enable_skip_layer_norm": false,
"enable_bias_skip_layer_norm": false,
"enable_attention": false
}
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "calib_data",
"quant_preprocess": true,
"activation_type": "uint16",
"precision": "uint8"
},
"addmetadata": {
"type": "VitisAIAddMetaData",
"config_meta_data_keys": [ "architectures", "model_type" ],
"activation_type": "uint16",
"weight_type": "uint8",
"quant_type": "OnnxStaticQuantization"
}
},
"data_configs": [
{
"name": "calib_data",
"type": "HuggingfaceContainer",
"load_dataset_config": { "data_name": "glue", "subset": "mrpc", "split": "train[:12]" },
"pre_process_data_config": {
"model_name": "google-bert/bert-base-multilingual-cased",
"input_cols": [ "sentence1", "sentence2" ],
"max_length": 512,
"padding": "max_length"
},
"dataloader_config": { "batch_size": 1 }
},
{
"name": "wiki_data",
"type": "HuggingfaceContainer",
"load_dataset_config": {
"type": "dataset_to_nsp_dataset",
"data_name": "wikitext",
"subset": "wikitext-2-raw-v1",
"split": "test",
"input_cols": [ "sentence1", "sentence2" ],
"label_col": "label"
},
"pre_process_data_config": {
"model_name": "google-bert/bert-base-multilingual-cased",
"input_cols": [ "sentence1", "sentence2" ],
"label_col": "label",
"max_length": 512,
"padding": "max_length"
},
"post_process_data_config": { "type": "bert_scl_post_process" },
"dataloader_config": { "batch_size": 1 },
"user_script": "google_bert_script.py",
"script_dir": "."
}
],
"evaluators": {
"nsp_evaluator": {
"metrics": [
{
"name": "nsp",
"type": "accuracy",
"backend": "huggingface_metrics",
"data_config": "wiki_data",
"sub_types": [ { "name": "accuracy", "priority": 1 }, { "name": "f1" } ]
},
{ "name": "latency", "type": "latency", "sub_types": [ { "name": "avg" } ] }
]
},
"performance": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [
{ "name": "avg", "priority": 1, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } },
{ "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }
]
}
]
}
},
"clean_cache": true,
"clean_evaluation_cache": true,
"evaluate_input_model": false,
"output_dir": "models/google/bert_base_multilingual_cased"
}
109 changes: 109 additions & 0 deletions examples/bert/qdq/google_bert_large_qdq.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
{
"input_model": {
"type": "HfModel",
"model_path": "google-bert/bert-large-cased-whole-word-masking-finetuned-squad",
"task": "question-answering",
"io_config": {
"input_names": [ "input_ids", "attention_mask" ],
"input_shapes": [ [ 1, 512 ], [ 1, 512 ] ],
"input_types": [ "int32", "int32" ],
"output_names": [ "start_logits", "end_logits" ]
}
},
"passes": {
"conversion": { "type": "OnnxConversion", "target_opset": 20, "dynamic": true, "use_dynamo_exporter": false },
"to_fixed_shape": {
"type": "DynamicToFixedShape",
"dim_param": [ "batch_size", "sequence_length" ],
"dim_value": [ 1, 512 ]
},
"surgery": {
"type": "GraphSurgeries",
"surgeries": [
{ "surgeon": "ReplaceAttentionMaskValue", "replacement": -100.0 },
{ "surgeon": "MatMulAddToGemm" }
]
},
"transformer_optimizer": {
"type": "OrtTransformersOptimization",
"model_type": "bert",
"opt_level": 1,
"optimization_options": {
"enable_gelu": true,
"enable_bias_gelu": false,
"enable_layer_norm": true,
"enable_skip_layer_norm": false,
"enable_bias_skip_layer_norm": false,
"enable_attention": false
}
},
"quantization": {
"type": "OnnxStaticQuantization",
"data_config": "calib_data",
"quant_preprocess": true,
"activation_type": "uint16",
"precision": "uint8"
},
"addmetadata": {
"type": "VitisAIAddMetaData",
"config_meta_data_keys": [ "architectures", "model_type" ],
"activation_type": "uint16",
"weight_type": "uint8",
"quant_type": "OnnxStaticQuantization"
}
},
"data_configs": [
{
"name": "calib_data",
"type": "HuggingfaceContainer",
"load_dataset_config": { "data_name": "squad", "split": "train[:12]" },
"pre_process_data_config": {
"input_cols": [ "question", "context" ],
"label_col": "id",
"padding": "max_length",
"max_length": 512
},
"dataloader_config": { "batch_size": 1 },
"user_script": "google_bert_script.py"
}
],
"evaluators": {
"squad_evaluator": {
"metrics": [
{
"name": "squad",
"type": "custom",
"sub_types": [
{ "name": "exact_match", "priority": 1, "higher_is_better": true },
{ "name": "f1", "higher_is_better": true }
],
"user_config": {
"evaluate_func": "eval_squad",
"evaluate_func_kwargs": {
"model_name": "google-bert/bert-large-cased-whole-word-masking-finetuned-squad",
"dataset_config": { "data_name": "squad", "split": "validation" }
},
"user_script": "google_bert_script.py"
}
},
{ "name": "latency", "type": "latency", "sub_types": [ { "name": "avg" } ] }
]
},
"performance": {
"metrics": [
{
"name": "latency",
"type": "latency",
"sub_types": [
{ "name": "avg", "priority": 1, "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } },
{ "name": "p90", "metric_config": { "warmup_num": 20, "repeat_test_num": 100 } }
]
}
]
}
},
"clean_cache": true,
"clean_evaluation_cache": true,
"evaluate_input_model": false,
"output_dir": "models/google/bert_large_cased_qa"
}
Loading
Oops, something went wrong.
Loading
Oops, something went wrong.