Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions vllm/v1/worker/lora_model_runner_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,7 @@ def load_lora_model(self, model: nn.Module, model_config: ModelConfig,
def _set_active_loras(self, prompt_lora_mapping: tuple[int, ...],
token_lora_mapping: tuple[int, ...],
lora_requests: set[LoRARequest]) -> None:
if not self.lora_manager:
raise RuntimeError("LoRA is not enabled.")
self._ensure_lora_enabled()

# Set is_prefill to True, so we always use the SGMV kernels on
# non-cuda platforms.
Expand All @@ -75,6 +74,11 @@ def _set_active_loras(self, prompt_lora_mapping: tuple[int, ...],
is_prefill=True)
self.lora_manager.set_active_adapters(lora_requests, lora_mapping)

def _ensure_lora_enabled(self) -> None:
if not hasattr(self, "lora_manager"):
raise RuntimeError(
"LoRA is not enabled. Use --enable-lora to enable LoRA.")

def set_active_loras(self, input_batch: InputBatch,
num_scheduled_tokens: np.ndarray) -> None:

Expand Down Expand Up @@ -172,21 +176,17 @@ def maybe_remove_all_loras(self, lora_config: Optional[LoRAConfig]):
self.lora_manager.remove_all_adapters()

def add_lora(self, lora_request: LoRARequest) -> bool:
if not self.lora_manager:
raise RuntimeError("LoRA is not enabled.")
self._ensure_lora_enabled()
return self.lora_manager.add_adapter(lora_request)

def remove_lora(self, lora_id: int) -> bool:
if not self.lora_manager:
raise RuntimeError("LoRA is not enabled.")
self._ensure_lora_enabled()
return self.lora_manager.remove_adapter(lora_id)

def pin_lora(self, lora_id: int) -> bool:
if not self.lora_manager:
raise RuntimeError("LoRA is not enabled.")
self._ensure_lora_enabled()
return self.lora_manager.pin_adapter(lora_id)

def list_loras(self) -> set[int]:
if not self.lora_manager:
raise RuntimeError("LoRA is not enabled.")
self._ensure_lora_enabled()
return self.lora_manager.list_adapters()
Loading