diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 40d13ab061d7..b467a0d28adb 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -322,6 +322,10 @@ def load_weights(self, model_name_or_path, cache_dir, load_format, revision): if "rotary_emb.inv_freq" in name: continue + if "rotary_emb.cos_cached" in name: + continue + if "rotary_emb.sin_cached" in name: + continue for (param_name, weight_name, shard_id) in stacked_params_mapping: if weight_name not in name: continue