diff --git a/hf_torchao_vllm/torchao_hf_script.py b/hf_torchao_vllm/quantize_hf_model_with_torchao.py similarity index 99% rename from hf_torchao_vllm/torchao_hf_script.py rename to hf_torchao_vllm/quantize_hf_model_with_torchao.py index ef8f78d..f41f9a8 100644 --- a/hf_torchao_vllm/torchao_hf_script.py +++ b/hf_torchao_vllm/quantize_hf_model_with_torchao.py @@ -2,10 +2,8 @@ # SPDX-License-Identifier: Apache-2.0 """ -Script for quantizing LLM models with TorchAO. +Script for quantizing HuggingFace models with TorchAO. Supports various quantization configurations and model types. - -Copy of Driss's https://www.internalfb.com/phabricator/paste/view/P1838316614 """ diff --git a/hf_torchao_vllm/run_vllm.py b/hf_torchao_vllm/run_quantized_model_in_vllm.py similarity index 97% rename from hf_torchao_vllm/run_vllm.py rename to hf_torchao_vllm/run_quantized_model_in_vllm.py index 97e292b..5712bcb 100644 --- a/hf_torchao_vllm/run_vllm.py +++ b/hf_torchao_vllm/run_quantized_model_in_vllm.py @@ -1,7 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 -# copied from Driss's https://www.internalfb.com/phabricator/paste/view/P1858050926 - import os import random diff --git a/hf_torchao_vllm/inspect_llm_compressor_output.py b/hf_torchao_vllm/utils/inspect_llm_compressor_output.py similarity index 100% rename from hf_torchao_vllm/inspect_llm_compressor_output.py rename to hf_torchao_vllm/utils/inspect_llm_compressor_output.py diff --git a/hf_torchao_vllm/inspect_torchao_output.py b/hf_torchao_vllm/utils/inspect_torchao_output.py similarity index 100% rename from hf_torchao_vllm/inspect_torchao_output.py rename to hf_torchao_vllm/utils/inspect_torchao_output.py diff --git a/hf_torchao_vllm/run_llm_compressor.py b/hf_torchao_vllm/utils/quantize_hf_model_with_llm_compressor.py similarity index 100% rename from hf_torchao_vllm/run_llm_compressor.py rename to hf_torchao_vllm/utils/quantize_hf_model_with_llm_compressor.py