File tree Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Expand file tree Collapse file tree 2 files changed +8
-3
lines changed Original file line number Diff line number Diff line change @@ -29,14 +29,21 @@ def __init__(self, model_path) -> None:
29
29
backend = backend_map [os .getenv ("backend" )]
30
30
enable_prefix_caching = bool (os .getenv ("enable_prefix_caching" , False ))
31
31
max_model_len = os .getenv ("max_model_len" , None )
32
+ dtype = os .getenv ("dtype" , "auto" )
32
33
logger .info (f"后端 { backend } " )
33
34
if backend == "pytorch" :
34
- backend_config = PytorchEngineConfig (tp = int (os .getenv ("num_gpus" , "1" )))
35
+ backend_config = PytorchEngineConfig (
36
+ tp = int (os .getenv ("num_gpus" , "1" )),
37
+ dtype = dtype ,
38
+ session_len = int (max_model_len ) if max_model_len else None ,
39
+ enable_prefix_caching = enable_prefix_caching ,
40
+ )
35
41
if backend == "turbomind" :
36
42
backend_config = TurbomindEngineConfig (
37
43
tp = int (os .getenv ("num_gpus" , "1" )),
38
44
enable_prefix_caching = enable_prefix_caching ,
39
45
session_len = int (max_model_len ) if max_model_len else None ,
46
+ dtype = dtype ,
40
47
)
41
48
pipeline_type , pipeline_class = get_task (model_path )
42
49
logger .info (f"模型架构:{ pipeline_type } " )
Original file line number Diff line number Diff line change @@ -27,9 +27,7 @@ class VllmBackend(ModelBackend):
27
27
def __init__ (self , model_path ) -> None :
28
28
lora = os .getenv ("lora" , None )
29
29
enable_prefix_caching = bool (os .getenv ("enable_prefix_caching" , False ))
30
-
31
30
max_model_len = os .getenv ("max_model_len" , None )
32
-
33
31
tensor_parallel_size = int (os .getenv ("num_gpus" , "1" ))
34
32
dtype = os .getenv ("dtype" , "auto" )
35
33
max_loras = 1
You can’t perform that action at this time.
0 commit comments