Skip to content

Commit

Permalink
Int8 Quantization fixes for quantizer rebased on 67c478e
Browse files Browse the repository at this point in the history
This will probably break lots of things
  • Loading branch information
pranav-prakash committed Feb 19, 2021
1 parent 73899db commit 5458ce9
Show file tree
Hide file tree
Showing 26 changed files with 1,918 additions and 1,094 deletions.
4 changes: 2 additions & 2 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ option(onnxruntime_USE_FEATURIZERS "Build ML Featurizers support" OFF)
option(onnxruntime_USE_SYSTOLIC "Whether to use Systolic to accelerate matmul" OFF)
option(onnxruntime_USE_HWACHA "Whether to build with Hwacha support" OFF)
option(onnxruntime_FOR_FIRESIM "Whether to build for Firesim benchmarking" OFF)
option(onnxruntime_SYSTOLIC_INT8 "If Systolic is enabled, whether to use for int8 ops" OFF)
option(onnxruntime_SYSTOLIC_FP32 "If Systolic is enabled, whether to use for fp32 ops" ON)
option(onnxruntime_SYSTOLIC_INT8 "If Systolic is enabled, whether to use for int8 ops" ON)
option(onnxruntime_SYSTOLIC_FP32 "If Systolic is enabled, whether to use for fp32 ops" OFF)
if(onnxruntime_SYSTOLIC_FP32)
message(STATUS "BUILDING FOR FP32 SYSTOLIC")
endif()
Expand Down
393 changes: 55 additions & 338 deletions systolic_runner/quantization/calibrate.py

Large diffs are not rendered by default.

485 changes: 485 additions & 0 deletions systolic_runner/quantization/quantizer/calibrate.py

Large diffs are not rendered by default.

129 changes: 129 additions & 0 deletions systolic_runner/quantization/quantizer/onnx_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import onnx
from .quant_utils import find_by_name
from pathlib import Path


class ONNXModel:
Expand Down Expand Up @@ -49,6 +50,10 @@ def get_initializer(self, name):
def remove_initializer(self, tensor):
if tensor in self.model.graph.initializer:
self.model.graph.initializer.remove(tensor)
for input in self.model.graph.input:
if input.name == tensor.name:
self.model.graph.input.remove(input)
break

def remove_initializers(self, init_to_remove):
for initializer in init_to_remove:
Expand Down Expand Up @@ -125,3 +130,127 @@ def find_nodes_by_initializer(self, graph, initializer):
if node_input == initializer.name:
nodes.append(node)
return nodes

def replace_gemm_with_matmul(self):
new_nodes = []

for node in self.nodes():
if node.op_type == 'Gemm':
alpha = 1.0
beta = 1.0
transA = 0
transB = 0
for attr in node.attribute:
if attr.name == 'alpha':
alpha = onnx.helper.get_attribute_value(attr)
elif attr.name == 'beta':
beta = onnx.helper.get_attribute_value(attr)
elif attr.name == 'transA':
transA = onnx.helper.get_attribute_value(attr)
elif attr.name == 'transB':
transB = onnx.helper.get_attribute_value(attr)
if alpha == 1.0 and beta == 1.0 and transA == 0:
inputB = node.input[1]
if transB == 1:
B = self.get_initializer(node.input[1])
if B:
# assume B is not used by any other node
B_array = onnx.numpy_helper.to_array(B)
B_trans = onnx.numpy_helper.from_array(B_array.T)
B_trans.name = B.name
self.remove_initializer(B)
self.add_initializer(B_trans)
else:
inputB += '_Transposed'
transpose_node = onnx.helper.make_node('Transpose',
inputs=[node.input[1]],
outputs=[inputB],
name=node.name + '_Transpose')
new_nodes.append(transpose_node)

matmul_node = onnx.helper.make_node(
'MatMul',
inputs=[node.input[0], inputB],
outputs=[node.output[0] + ('_MatMul' if len(node.input) > 2 else '')],
name=node.name + '_MatMul')
new_nodes.append(matmul_node)

if len(node.input) > 2:
add_node = onnx.helper.make_node('Add',
inputs=[node.output[0] + '_MatMul', node.input[2]],
outputs=node.output,
name=node.name + '_Add')
new_nodes.append(add_node)

# unsupported
else:
new_nodes.append(node)

# not GEMM
else:
new_nodes.append(node)

self.graph().ClearField('node')
self.graph().node.extend(new_nodes)

def save_model_to_file(self, output_path, use_external_data_format=False):
'''
Save model to external data, which is needed for model size > 2GB
'''
if use_external_data_format:
onnx.external_data_helper.convert_model_to_external_data(self.model,
all_tensors_to_one_file=True,
location=Path(output_path).name + ".data")
onnx.save_model(self.model, output_path)

@staticmethod
def replace_node_input(node, old_input_name, new_input_name):
assert isinstance(old_input_name, str) and isinstance(new_input_name, str)
for j in range(len(node.input)):
if node.input[j] == old_input_name:
node.input[j] = new_input_name

def replace_input_of_all_nodes(self, old_input_name, new_input_name):
for node in self.model.graph.node:
ONNXModel.replace_node_input(node, old_input_name, new_input_name)

@staticmethod
def replace_node_output(node, old_output_name, new_output_name):
assert isinstance(old_output_name, str) and isinstance(new_output_name, str)
for j in range(len(node.output)):
if node.output[j] == old_output_name:
node.output[j] = new_output_name

def replace_output_of_all_nodes(self, old_output_name, new_output_name):
for node in self.model.graph.node:
ONNXModel.replace_node_output(node, old_output_name, new_output_name)

def remove_unused_constant(self):
input_name_to_nodes = self.input_name_to_nodes()

#remove unused constant
unused_nodes = []
nodes = self.nodes()
for node in nodes:
if node.op_type == "Constant" and not self.is_graph_output(
node.output[0]) and node.output[0] not in input_name_to_nodes:
unused_nodes.append(node)

self.remove_nodes(unused_nodes)

ununsed_weights = []
for w in self.initializer():
if w.name not in input_name_to_nodes and not self.is_graph_output(w.name):
ununsed_weights.append(w)
# Remove from graph.input
for graph_input in self.graph().input:
if graph_input.name == w.name:
self.graph().input.remove(graph_input)

self.remove_initializers(ununsed_weights)

def is_graph_output(self, output_name):
for output in self.model.graph.output:
if output.name == output_name:
return True
return False
Loading

0 comments on commit 5458ce9

Please sign in to comment.