From 717e6e2586196624c95f044f866fd986ce74c95e Mon Sep 17 00:00:00 2001
From: vasiliy <vasiliy@fb.com>
Date: Mon, 6 Oct 2025 05:55:58 -0700
Subject: [PATCH] refactor torchao qwen module filtering

Summary:

make it opt-out instead of opt-in, to match llm-compressor closer

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .../quantize_hf_model_with_torchao.py           | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/hf_torchao_vllm/quantize_hf_model_with_torchao.py b/hf_torchao_vllm/quantize_hf_model_with_torchao.py
index 38114a9..777084f 100644
--- a/hf_torchao_vllm/quantize_hf_model_with_torchao.py
+++ b/hf_torchao_vllm/quantize_hf_model_with_torchao.py
@@ -73,14 +73,21 @@ def get_quantization_config(args):
                 expert_fqn_to_config = {}
                 # TODO(future PR): this is annoying, I should be able to use a regex here
                 for layer_idx in range(24):
-                    for expert_idx in range(60):
-                        expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.gate_proj"] = single_config
-                        expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.up_proj"] = single_config
-                        expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.experts.{expert_idx}.down_proj"] = single_config
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.q_proj"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.k_proj"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.v_proj"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.self_attn.o_proj"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.gate"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.gate_proj"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.up_proj"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert.down_proj"] = None
+                    expert_fqn_to_config[f"model.layers.{layer_idx}.mlp.shared_expert_gate"] = None
+                    expert_fqn_to_config[f"lm_head"] = None
                 module_fqn_to_config = ModuleFqnToConfig({
-                    "_default": None,
+                    "_default": single_config,
                     **expert_fqn_to_config,
                 })
+
                 return TorchAoConfig(
                     quant_type=module_fqn_to_config,
                 )