Skip to content

Commit a286abc

Browse files
authored
Fix serving allocate block bug (#10101)
1 parent 9ce8c6f commit a286abc

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

llm/server/server/server/engine/infer.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,12 @@
3232
from server.engine.config import Config
3333
from server.utils import get_logger
3434
from task_queue_manager import TaskQueueManager
35-
from paddlenlp.trl import llm_utils
3635

3736
from paddlenlp.experimental.transformers import (
3837
EagleProposer,
3938
InferenceWithReferenceProposer,
4039
)
40+
from paddlenlp.trl import llm_utils
4141
from paddlenlp.trl.llm_utils import get_rotary_position_embedding
4242

4343
File_Path = os.path.realpath(sys.argv[0])
@@ -338,8 +338,12 @@ def init_inputs(self):
338338
self.share_inputs["need_block_len"] = paddle.full(shape=[1], fill_value=0, dtype="int32")
339339
self.share_inputs["used_list_len"] = paddle.full(shape=[self.args.max_batch_size], fill_value=0, dtype="int32")
340340
self.share_inputs["infer_seed"] = paddle.full(shape=[self.args.max_batch_size, 1], fill_value=0, dtype="int64")
341-
free_list = list(range(int(self.args.max_block_num * self.args.block_ratio)))
341+
342+
free_list = list(
343+
range(self.args.max_block_num - 1, int(self.args.max_block_num * self.args.block_ratio) - 1, -1)
344+
)
342345
self.free_list_len = len(free_list)
346+
343347
self.share_inputs["free_list"] = paddle.to_tensor(free_list, dtype="int32")
344348
self.share_inputs["free_list_len"] = paddle.full(shape=[1], fill_value=self.free_list_len, dtype="int32")
345349

0 commit comments

Comments
 (0)