diff --git a/vllm/model_executor/weight_utils.py b/vllm/model_executor/weight_utils.py
index a9d899ad29e1..3127a36098e2 100644
--- a/vllm/model_executor/weight_utils.py
+++ b/vllm/model_executor/weight_utils.py
@@ -76,6 +76,8 @@ def hf_model_weights_iterator(
             state = torch.load(bin_file, map_location="cpu")
             for name, param in state.items():
                 yield name, param
+            del state
+            torch.cuda.empty_cache()
 
 
 def load_tensor_parallel_weights(