Int8 Quantization fixes for quantizer rebased on 67c478e

This will probably break lots of things
ucb-bar · Feb 19, 2021 · 5458ce9 · 5458ce9
1 parent 73899db
commit 5458ce9
Show file tree

Hide file tree

Showing 26 changed files with 1,918 additions and 1,094 deletions.
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -67,8 +67,8 @@ option(onnxruntime_USE_FEATURIZERS "Build ML Featurizers support" OFF)
 option(onnxruntime_USE_SYSTOLIC "Whether to use Systolic to accelerate matmul" OFF)
 option(onnxruntime_USE_HWACHA "Whether to build with Hwacha support" OFF)
 option(onnxruntime_FOR_FIRESIM "Whether to build for Firesim benchmarking" OFF)
-option(onnxruntime_SYSTOLIC_INT8 "If Systolic is enabled, whether to use for int8 ops" OFF)
-option(onnxruntime_SYSTOLIC_FP32 "If Systolic is enabled, whether to use for fp32 ops" ON)
+option(onnxruntime_SYSTOLIC_INT8 "If Systolic is enabled, whether to use for int8 ops" ON)
+option(onnxruntime_SYSTOLIC_FP32 "If Systolic is enabled, whether to use for fp32 ops" OFF)
 if(onnxruntime_SYSTOLIC_FP32)
   message(STATUS "BUILDING FOR FP32 SYSTOLIC")
 endif()

diff --git a/systolic_runner/quantization/calibrate.py b/systolic_runner/quantization/calibrate.py
diff --git a/systolic_runner/quantization/quantizer/calibrate.py b/systolic_runner/quantization/quantizer/calibrate.py
diff --git a/systolic_runner/quantization/quantizer/onnx_model.py b/systolic_runner/quantization/quantizer/onnx_model.py
@@ -1,5 +1,6 @@
 import onnx
 from .quant_utils import find_by_name
+from pathlib import Path
 
 
 class ONNXModel:
@@ -49,6 +50,10 @@ def get_initializer(self, name):
     def remove_initializer(self, tensor):
         if tensor in self.model.graph.initializer:
             self.model.graph.initializer.remove(tensor)
+            for input in self.model.graph.input:
+                if input.name == tensor.name:
+                    self.model.graph.input.remove(input)
+                    break
 
     def remove_initializers(self, init_to_remove):
         for initializer in init_to_remove:
@@ -125,3 +130,127 @@ def find_nodes_by_initializer(self, graph, initializer):
                 if node_input == initializer.name:
                     nodes.append(node)
         return nodes
+
+    def replace_gemm_with_matmul(self):
+        new_nodes = []
+
+        for node in self.nodes():
+            if node.op_type == 'Gemm':
+                alpha = 1.0
+                beta = 1.0
+                transA = 0
+                transB = 0
+                for attr in node.attribute:
+                    if attr.name == 'alpha':
+                        alpha = onnx.helper.get_attribute_value(attr)
+                    elif attr.name == 'beta':
+                        beta = onnx.helper.get_attribute_value(attr)
+                    elif attr.name == 'transA':
+                        transA = onnx.helper.get_attribute_value(attr)
+                    elif attr.name == 'transB':
+                        transB = onnx.helper.get_attribute_value(attr)
+                if alpha == 1.0 and beta == 1.0 and transA == 0:
+                    inputB = node.input[1]
+                    if transB == 1:
+                        B = self.get_initializer(node.input[1])
+                        if B:
+                            # assume B is not used by any other node
+                            B_array = onnx.numpy_helper.to_array(B)
+                            B_trans = onnx.numpy_helper.from_array(B_array.T)
+                            B_trans.name = B.name
+                            self.remove_initializer(B)
+                            self.add_initializer(B_trans)
+                        else:
+                            inputB += '_Transposed'
+                            transpose_node = onnx.helper.make_node('Transpose',
+                                                                   inputs=[node.input[1]],
+                                                                   outputs=[inputB],
+                                                                   name=node.name + '_Transpose')
+                            new_nodes.append(transpose_node)
+
+                    matmul_node = onnx.helper.make_node(
+                        'MatMul',
+                        inputs=[node.input[0], inputB],
+                        outputs=[node.output[0] + ('_MatMul' if len(node.input) > 2 else '')],
+                        name=node.name + '_MatMul')
+                    new_nodes.append(matmul_node)
+
+                    if len(node.input) > 2:
+                        add_node = onnx.helper.make_node('Add',
+                                                         inputs=[node.output[0] + '_MatMul', node.input[2]],
+                                                         outputs=node.output,
+                                                         name=node.name + '_Add')
+                        new_nodes.append(add_node)
+
+                # unsupported
+                else:
+                    new_nodes.append(node)
+
+            # not GEMM
+            else:
+                new_nodes.append(node)
+
+        self.graph().ClearField('node')
+        self.graph().node.extend(new_nodes)
+
+    def save_model_to_file(self, output_path, use_external_data_format=False):
+        '''
+        Save model to external data, which is needed for model size > 2GB
+        '''
+        if use_external_data_format:
+            onnx.external_data_helper.convert_model_to_external_data(self.model,
+                                                                     all_tensors_to_one_file=True,
+                                                                     location=Path(output_path).name + ".data")
+        onnx.save_model(self.model, output_path)
+
+    @staticmethod
+    def replace_node_input(node, old_input_name, new_input_name):
+        assert isinstance(old_input_name, str) and isinstance(new_input_name, str)
+        for j in range(len(node.input)):
+            if node.input[j] == old_input_name:
+                node.input[j] = new_input_name
+
+    def replace_input_of_all_nodes(self, old_input_name, new_input_name):
+        for node in self.model.graph.node:
+            ONNXModel.replace_node_input(node, old_input_name, new_input_name)
+
+    @staticmethod
+    def replace_node_output(node, old_output_name, new_output_name):
+        assert isinstance(old_output_name, str) and isinstance(new_output_name, str)
+        for j in range(len(node.output)):
+            if node.output[j] == old_output_name:
+                node.output[j] = new_output_name
+
+    def replace_output_of_all_nodes(self, old_output_name, new_output_name):
+        for node in self.model.graph.node:
+            ONNXModel.replace_node_output(node, old_output_name, new_output_name)
+
+    def remove_unused_constant(self):
+        input_name_to_nodes = self.input_name_to_nodes()
+
+        #remove unused constant
+        unused_nodes = []
+        nodes = self.nodes()
+        for node in nodes:
+            if node.op_type == "Constant" and not self.is_graph_output(
+                    node.output[0]) and node.output[0] not in input_name_to_nodes:
+                unused_nodes.append(node)
+
+        self.remove_nodes(unused_nodes)
+
+        ununsed_weights = []
+        for w in self.initializer():
+            if w.name not in input_name_to_nodes and not self.is_graph_output(w.name):
+                ununsed_weights.append(w)
+                # Remove from graph.input
+                for graph_input in self.graph().input:
+                    if graph_input.name == w.name:
+                        self.graph().input.remove(graph_input)
+
+        self.remove_initializers(ununsed_weights)
+
+    def is_graph_output(self, output_name):
+        for output in self.model.graph.output:
+            if output.name == output_name:
+                return True
+        return False