Skip to content

Commit 6fa8ba1

Browse files
authored
Remove neural_compressor dependency in MatMulNBits (#24627)
### Description As titled. ### Motivation and Context Dependency no need.
1 parent c51d67b commit 6fa8ba1

File tree

1 file changed

+1
-17
lines changed

1 file changed

+1
-17
lines changed

onnxruntime/python/tools/quantization/matmul_nbits_quantizer.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,13 @@
88

99
import argparse
1010
import copy
11-
import importlib
1211
import logging
1312
import os
1413

1514
import numpy as np
1615
import numpy.typing as npt
1716
import onnx
1817
from onnx.onnx_pb import GraphProto, ModelProto, NodeProto, TensorProto
19-
from packaging import version
2018

2119
from onnxruntime.capi._pybind_state import quantize_matmul_4bits, quantize_matmul_8bits, quantize_qdq_matmul_4bits
2220

@@ -1356,21 +1354,7 @@ def process(self):
13561354
self.model = ONNXModel(self.model) # Ensure the model is wrapped back into ONNXModel
13571355
self.model.clean_initializers()
13581356
else:
1359-
# use Intel® Neural Compressor for RTN or GPTQ weight-only quantize algorithm
1360-
try:
1361-
importlib.import_module("neural_compressor")
1362-
except Exception as e:
1363-
logging.error(f"{e}.")
1364-
raise RuntimeError(
1365-
"neural-compressor is not correctly installed. Please check your environment."
1366-
) from e
1367-
1368-
import neural_compressor
1369-
1370-
assert version.parse(neural_compressor.__version__) >= version.parse("2.3.2"), (
1371-
"Require neural-compressor >= 2.3.2 to support weight only quantization!"
1372-
)
1373-
1357+
# RTN or GPTQ weight-only quantize algorithm
13741358
self.int4_quant_algo()
13751359

13761360

0 commit comments

Comments
 (0)