You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
eventually hit below, after which all requests are not completed and are instead aborted:
ERROR: Exception in ASGI application
Traceback (most recent call last):
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/uvicorn/protocols/http/httptools_impl.py", line 436, in run_asgi
result = await app( # type: ignore[func-returns-value]
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/uvicorn/middleware/proxy_headers.py", line 78, in __call__
return await self.app(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/fastapi/applications.py", line 276, in __call__
await super().__call__(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/applications.py", line 122, in __call__
await self.middleware_stack(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/middleware/errors.py", line 184, in __call__
raise exc
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/middleware/errors.py", line 162, in __call__
await self.app(scope, receive, _send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/middleware/cors.py", line 84, in __call__
await self.app(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 79, in __call__
raise exc
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/middleware/exceptions.py", line 68, in __call__
await self.app(scope, receive, sender)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py", line 21, in __call__
raise e
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/fastapi/middleware/asyncexitstack.py", line 18, in __call__
await self.app(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/routing.py", line 718, in __call__
await route.handle(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/routing.py", line 276, in handle
await self.app(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/routing.py", line 69, in app
await response(scope, receive, send)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/responses.py", line 270, in __call__
async with anyio.create_task_group() as task_group:
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 662, in __aexit__
raise exceptions[0]
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/responses.py", line 273, in wrap
await func()
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/starlette/responses.py", line 262, in stream_response
async for chunk in self.body_iterator:
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/vllm/entrypoints/openai/api_server.py", line 438, in completion_stream_generator
async for res in result_generator:
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 148, in generate
await self.engine_step(request_id)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/vllm/engine/async_llm_engine.py", line 74, in engine_step
request_outputs = self.engine.step()
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 253, in step
self._decode_sequences(seq_groups)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/vllm/engine/llm_engine.py", line 270, in _decode_sequences
new_token, new_output_text = detokenize_incrementally(
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/vllm/transformers_utils/tokenizer.py", line 73, in detokenize_incrementally
output_text = tokenizer.convert_tokens_to_string(output_tokens)
File "/home/ubuntu/miniconda3/envs/h2ollm/lib/python3.10/site-packages/transformers/tokenization_utils_fast.py", line 533, in convert_tokens_to_string
return self.backend_tokenizer.decoder.decode(tokens)
TypeError: argument 'tokens': 'NoneType' object cannot be converted to 'PyString'
The text was updated successfully, but these errors were encountered:
I got the same error when I execute python benchmark_latency.py '--input-len', '32', '--output-len', '32', '--batch-size', '4', '--n', '1', '--tensor-parallel-size', '4', '--model', 'facebook/opt-66b', '--tokenizer', 'facebook/opt-66b'
When using vLLM with MPT-30b-chat, run on single A100 80GB:
eventually hit below, after which all requests are not completed and are instead aborted:
The text was updated successfully, but these errors were encountered: