Skip to content

Commit

Permalink
Add support for CPU Inference
Browse files Browse the repository at this point in the history
Refer: AutoGPTQ/AutoGPTQ#385

Signed-Off By: Vivek Khandelwal <vivek@nod-labs.com>
  • Loading branch information
vivekkhandelwal1 committed Oct 31, 2023
1 parent 8e7588b commit 78342d1
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 3 deletions.
4 changes: 2 additions & 2 deletions optimum/gptq/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,10 +240,10 @@ def _replace_by_quant_layers(self, module: nn.Module, names: List[str], name: st
out_features = layer.weight.shape[1]
if not (self.desc_act) or self.group_size == -1:
new_layer = QuantLinear(
self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16
self.bits, self.group_size, in_features, out_features, True, use_cuda_fp16=self.use_cuda_fp16, weight_dtype=layer.weight.dtype
)
else:
new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True)
new_layer = QuantLinear(self.bits, self.group_size, in_features, out_features, True, weight_dtype=layer.weight.dtype)
new_layer.device = device
setattr(module, attr, new_layer.to(device))
for name1, child in module.named_children():
Expand Down
2 changes: 1 addition & 1 deletion optimum/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
TORCH_MINIMUM_VERSION = packaging.version.parse("1.11.0")
TRANSFORMERS_MINIMUM_VERSION = packaging.version.parse("4.25.0")
DIFFUSERS_MINIMUM_VERSION = packaging.version.parse("0.18.0")
AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.4.2")
AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.5.0")


# This is the minimal required version to support some ONNX Runtime features
Expand Down

0 comments on commit 78342d1

Please sign in to comment.