From 717e6e2586196624c95f044f866fd986ce74c95e Mon Sep 17 00:00:00 2001 From: vasiliy Date: Mon, 6 Oct 2025 05:55:58 -0700 Subject: [PATCH] refactor torchao qwen module filtering Summary: make it opt-out instead of opt-in, to match llm-compressor closer Test Plan: Reviewers: Subscribers: Tasks: Tags: --- .../quantize_hf_model_with_torchao.py | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/hf_torchao_vllm/quantize_hf_model_with_torchao.py b/hf_torchao_vllm/quantize_hf_model_with_torchao.py index 38114a9..777084f 100644 --- a/hf_torchao_vllm/quantize_hf_model_with_torchao.py +++ b/hf_torchao_vllm/quantize_hf_model_with_torchao.py @@ -73,14 +73,21 @@ def get_quantization_config(args): expert_fqn_to_config = {} # TODO(future PR): this is annoying, I should be able to use a regex here for layer_idx in range(24): - for expert_idx in range(60): - expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.gate_proj"] = single_config - expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.up_proj"] = single_config - expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.down_proj"] = single_config + expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.q_proj"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.k_proj"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.v_proj"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.o_proj"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.gate"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.gate_proj"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.up_proj"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.down_proj"] = None + expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert_gate"] = None + expert_fqn_to_config[f"lm_head"] = None module_fqn_to_config = ModuleFqnToConfig({ - "_default": None, + "_default": single_config, **expert_fqn_to_config, }) + return TorchAoConfig( quant_type=module_fqn_to_config, )