Properly register quantized out ops into AOT export flow (#3449)

larryliu0820 · facebook-github-bot · commit 43ed468241ce · 2024-05-01T15:55:00.000-07:00
Summary: Currently are registering all ops specified in quantized.yaml into AOT, that causes double registration problem because some of them are already registered in https://github.com/pytorch/executorch/blob/main/exir/passes/_quant_patterns_and_replacements.py#L185-L188 This PR changes the way we do selective build for these quantized ops, by listing out individual ops that we want to register. We also updated the current CI job to be able to error out when this registration doesn't work. Pull Request resolved: #3449 Reviewed By: mergennachin Differential Revision: D56844556 Pulled By: larryliu0820 fbshipit-source-id: 7b77aa7e019543cbdcce02b6f6662acf31dbb3f4
diff --git a/.github/workflows/pull.yml b/.github/workflows/pull.yml
@@ -208,7 +208,7 @@ jobs:
 
         BUILD_TOOL=${{ matrix.build-tool }}
         PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
-        PYTHON_EXECUTABLE=python bash .ci/scripts/test_quantized_aot_lib.sh
+        PYTHON_EXECUTABLE=python bash examples/xnnpack/quantization/test_quantize.sh "${BUILD_TOOL}" mv2
 
   test-pybind-build-linux:
     name: test-pybind-build-linux
diff --git a/codegen/tools/gen_oplist.py b/codegen/tools/gen_oplist.py
@@ -210,7 +210,12 @@ def gen_oplist(
     source_name = None
     et_kernel_metadata = {}
     if root_ops:
-        op_set.update(set(filter(lambda x: len(x) > 0, root_ops.split(","))))
+        # decide delimiter
+        delimiter = "," if "," in root_ops else " "
+        print(root_ops)
+        op_set.update(
+            set(filter(lambda x: len(x) > 0, map(str.strip, root_ops.split(delimiter))))
+        )
         et_kernel_metadata = merge_et_kernel_metadata(
             et_kernel_metadata, {op: ["default"] for op in op_set}
         )
diff --git a/examples/xnnpack/quantization/test_quantize.sh b/examples/xnnpack/quantization/test_quantize.sh
@@ -50,9 +50,11 @@ test_cmake_quantization() {
   (rm -rf cmake-out \
     && mkdir cmake-out \
     && cd cmake-out \
-    && retry cmake -DBUCK2="$BUCK" \
+    && retry cmake \
       -DCMAKE_BUILD_TYPE=Release \
       -DEXECUTORCH_BUILD_XNNPACK="$EXECUTORCH_BUILD_XNNPACK" \
+      -DEXECUTORCH_BUILD_QUANTIZED=ON \
+      -DEXECUTORCH_BUILD_QUANTIZED_OPS_AOT=ON \
       -DCMAKE_PREFIX_PATH="$CMAKE_PREFIX_PATH" \
       -DPYTHON_EXECUTABLE="$PYTHON_EXECUTABLE" ..)
 
diff --git a/kernels/quantized/CMakeLists.txt b/kernels/quantized/CMakeLists.txt
@@ -55,15 +55,27 @@ message("Generated files ${gen_command_sources}")
 # dependency of the other(s). This is not allowed by the Xcode "new build
 # system".
 if(NOT CMAKE_GENERATOR STREQUAL "Xcode" AND EXECUTORCH_BUILD_QUANTIZED_OPS_AOT)
-  gen_selected_ops(LIB_NAME "quantized_ops_aot_lib" OPS_SCHEMA_YAML
-                   "${_yaml_file}")
+  set(_quantized_aot_ops
+      "quantized_decomposed::add.out"
+      "quantized_decomposed::choose_qparams.Tensor_out"
+      "quantized_decomposed::dequantize_per_channel.out"
+      "quantized_decomposed::dequantize_per_tensor.out"
+      "quantized_decomposed::dequantize_per_tensor.Tensor_out"
+      "quantized_decomposed::mixed_linear.out"
+      "quantized_decomposed::mixed_mm.out"
+      "quantized_decomposed::quantize_per_channel.out"
+      "quantized_decomposed::quantize_per_tensor.out"
+      "quantized_decomposed::quantize_per_tensor.Tensor_out")
+  gen_selected_ops(LIB_NAME "quantized_ops_aot_lib" ROOT_OPS
+                   ${_quantized_aot_ops})
   # Expect gen_selected_ops output file to be
-  # quantized_aot_ops_lib/selected_operators.yaml
+  # quantized_ops_aot_lib/selected_operators.yaml
   generate_bindings_for_kernels(LIB_NAME "quantized_ops_aot_lib"
                                 CUSTOM_OPS_YAML "${_yaml_file}")
   # Build a AOT library to register quantized ops into PyTorch. This is a hack.
   set(_quantized_sources
       ${_quantized_kernels__srcs}
+      ${EXECUTORCH_ROOT}/kernels/portable/cpu/util/reduce_util.cpp
       ${EXECUTORCH_ROOT}/runtime/core/exec_aten/util/tensor_util_aten.cpp)
   gen_custom_ops_aot_lib(LIB_NAME "quantized_ops_aot_lib" KERNEL_SOURCES
                          "${_quantized_sources}")