Skip to content

Commit fa4b95b

Browse files
authored
Merge pull request #85 from microsoft/user/vriveras/removing-dataparallel
Remove DataParallel for preview version
2 parents 8f6f68d + c872e81 commit fa4b95b

File tree

5 files changed

+65
-20
lines changed

5 files changed

+65
-20
lines changed

configs/llama-v2-7b/inference/utils.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
88
from peft import PeftModel
99

10+
def get_device_map():
11+
num_gpus = torch.cuda.device_count()
12+
13+
if num_gpus > 1:
14+
print("More than one GPU found. Setting device_map to use CUDA device 0.")
15+
return 'cuda:0'
16+
else:
17+
print("Using the available device (CUDA device 0).")
18+
return 'cuda'
19+
1020
def check_adapter_path(adapters_name):
1121
"""
1222
Checks if the adapter path is correctly set and not a placeholder.
@@ -26,7 +36,7 @@ def load_tokenizer(model_name):
2636
Returns:
2737
AutoTokenizer: The loaded tokenizer with special tokens added and padding side set.
2838
"""
29-
tok = AutoTokenizer.from_pretrained(model_name, device_map='auto', trust_remote_code=True)
39+
tok = AutoTokenizer.from_pretrained(model_name, device_map=get_device_map(), trust_remote_code=True)
3040
tok.add_special_tokens({'pad_token': '[PAD]'})
3141
tok.padding_side = 'right' # TRL requires right padding
3242
return tok
@@ -46,7 +56,7 @@ def load_model(model_name, torch_dtype, quant_type):
4656
model = AutoModelForCausalLM.from_pretrained(
4757
pretrained_model_name_or_path=model_name,
4858
trust_remote_code=True,
49-
device_map='auto',
59+
device_map=get_device_map(),
5060
torch_dtype=torch_dtype,
5161
quantization_config=BitsAndBytesConfig(
5262
load_in_4bit=True,
@@ -55,8 +65,7 @@ def load_model(model_name, torch_dtype, quant_type):
5565
bnb_4bit_quant_type=quant_type
5666
),
5767
)
58-
if torch.cuda.device_count() > 1:
59-
model = torch.nn.DataParallel(model)
68+
6069
return model
6170
except Exception as e:
6271
raise RuntimeError(f"Error loading model: {e}")

configs/mistral-7b/inference/utils.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
88
from peft import PeftModel
99

10+
def get_device_map():
11+
num_gpus = torch.cuda.device_count()
12+
13+
if num_gpus > 1:
14+
print("More than one GPU found. Setting device_map to use CUDA device 0.")
15+
return 'cuda:0'
16+
else:
17+
print("Using the available device (CUDA device 0).")
18+
return 'cuda'
19+
1020
def check_adapter_path(adapters_name):
1121
"""
1222
Checks if the adapter path is correctly set and not a placeholder.
@@ -26,7 +36,7 @@ def load_tokenizer(model_name):
2636
Returns:
2737
AutoTokenizer: The loaded tokenizer with special tokens added and padding side set.
2838
"""
29-
tok = AutoTokenizer.from_pretrained(model_name, device_map='auto', trust_remote_code=True)
39+
tok = AutoTokenizer.from_pretrained(model_name, device_map=get_device_map(), trust_remote_code=True)
3040
tok.add_special_tokens({'pad_token': '[PAD]'})
3141
tok.padding_side = 'right' # TRL requires right padding
3242
return tok
@@ -46,7 +56,7 @@ def load_model(model_name, torch_dtype, quant_type):
4656
model = AutoModelForCausalLM.from_pretrained(
4757
pretrained_model_name_or_path=model_name,
4858
trust_remote_code=True,
49-
device_map='auto',
59+
device_map=get_device_map(),
5060
torch_dtype=torch_dtype,
5161
quantization_config=BitsAndBytesConfig(
5262
load_in_4bit=True,
@@ -55,8 +65,7 @@ def load_model(model_name, torch_dtype, quant_type):
5565
bnb_4bit_quant_type=quant_type
5666
),
5767
)
58-
if torch.cuda.device_count() > 1:
59-
model = torch.nn.DataParallel(model)
68+
6069
return model
6170
except Exception as e:
6271
raise RuntimeError(f"Error loading model: {e}")

configs/phi-1_5/inference/utils.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
88
from peft import PeftModel
99

10+
def get_device_map():
11+
num_gpus = torch.cuda.device_count()
12+
13+
if num_gpus > 1:
14+
print("More than one GPU found. Setting device_map to use CUDA device 0.")
15+
return 'cuda:0'
16+
else:
17+
print("Using the available device (CUDA device 0).")
18+
return 'cuda'
19+
1020
def check_adapter_path(adapters_name):
1121
"""
1222
Checks if the adapter path is correctly set and not a placeholder.
@@ -26,7 +36,7 @@ def load_tokenizer(model_name):
2636
Returns:
2737
AutoTokenizer: The loaded tokenizer with special tokens added and padding side set.
2838
"""
29-
tok = AutoTokenizer.from_pretrained(model_name, device_map='auto', trust_remote_code=True)
39+
tok = AutoTokenizer.from_pretrained(model_name, device_map=get_device_map(), trust_remote_code=True)
3040
tok.add_special_tokens({'pad_token': '[PAD]'})
3141
tok.padding_side = 'right' # TRL requires right padding
3242
return tok
@@ -46,7 +56,7 @@ def load_model(model_name, torch_dtype, quant_type):
4656
model = AutoModelForCausalLM.from_pretrained(
4757
pretrained_model_name_or_path=model_name,
4858
trust_remote_code=True,
49-
device_map='auto',
59+
device_map=get_device_map(),
5060
torch_dtype=torch_dtype,
5161
quantization_config=BitsAndBytesConfig(
5262
load_in_4bit=True,
@@ -55,8 +65,7 @@ def load_model(model_name, torch_dtype, quant_type):
5565
bnb_4bit_quant_type=quant_type
5666
),
5767
)
58-
if torch.cuda.device_count() > 1:
59-
model = torch.nn.DataParallel(model)
68+
6069
return model
6170
except Exception as e:
6271
raise RuntimeError(f"Error loading model: {e}")

configs/phi-2/inference/utils.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
88
from peft import PeftModel
99

10+
def get_device_map():
11+
num_gpus = torch.cuda.device_count()
12+
13+
if num_gpus > 1:
14+
print("More than one GPU found. Setting device_map to use CUDA device 0.")
15+
return 'cuda:0'
16+
else:
17+
print("Using the available device (CUDA device 0).")
18+
return 'cuda'
19+
1020
def check_adapter_path(adapters_name):
1121
"""
1222
Checks if the adapter path is correctly set and not a placeholder.
@@ -26,7 +36,7 @@ def load_tokenizer(model_name):
2636
Returns:
2737
AutoTokenizer: The loaded tokenizer with special tokens added and padding side set.
2838
"""
29-
tok = AutoTokenizer.from_pretrained(model_name, device_map='auto', trust_remote_code=True)
39+
tok = AutoTokenizer.from_pretrained(model_name, device_map=get_device_map(), trust_remote_code=True)
3040
tok.add_special_tokens({'pad_token': '[PAD]'})
3141
tok.padding_side = 'right' # TRL requires right padding
3242
return tok
@@ -46,7 +56,7 @@ def load_model(model_name, torch_dtype, quant_type):
4656
model = AutoModelForCausalLM.from_pretrained(
4757
pretrained_model_name_or_path=model_name,
4858
trust_remote_code=True,
49-
device_map='auto',
59+
device_map=get_device_map(),
5060
torch_dtype=torch_dtype,
5161
quantization_config=BitsAndBytesConfig(
5262
load_in_4bit=True,
@@ -55,8 +65,7 @@ def load_model(model_name, torch_dtype, quant_type):
5565
bnb_4bit_quant_type=quant_type
5666
),
5767
)
58-
if torch.cuda.device_count() > 1:
59-
model = torch.nn.DataParallel(model)
68+
6069
return model
6170
except Exception as e:
6271
raise RuntimeError(f"Error loading model: {e}")

configs/zephyr-7b-beta/inference/utils.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,16 @@
77
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TextStreamer
88
from peft import PeftModel
99

10+
def get_device_map():
11+
num_gpus = torch.cuda.device_count()
12+
13+
if num_gpus > 1:
14+
print("More than one GPU found. Setting device_map to use CUDA device 0.")
15+
return 'cuda:0'
16+
else:
17+
print("Using the available device (CUDA device 0).")
18+
return 'cuda'
19+
1020
def check_adapter_path(adapters_name):
1121
"""
1222
Checks if the adapter path is correctly set and not a placeholder.
@@ -26,7 +36,7 @@ def load_tokenizer(model_name):
2636
Returns:
2737
AutoTokenizer: The loaded tokenizer with special tokens added and padding side set.
2838
"""
29-
tok = AutoTokenizer.from_pretrained(model_name, device_map='auto', trust_remote_code=True)
39+
tok = AutoTokenizer.from_pretrained(model_name, device_map=get_device_map(), trust_remote_code=True)
3040
tok.add_special_tokens({'pad_token': '[PAD]'})
3141
tok.padding_side = 'right' # TRL requires right padding
3242
return tok
@@ -46,7 +56,7 @@ def load_model(model_name, torch_dtype, quant_type):
4656
model = AutoModelForCausalLM.from_pretrained(
4757
pretrained_model_name_or_path=model_name,
4858
trust_remote_code=True,
49-
device_map='auto',
59+
device_map=get_device_map(),
5060
torch_dtype=torch_dtype,
5161
quantization_config=BitsAndBytesConfig(
5262
load_in_4bit=True,
@@ -55,8 +65,7 @@ def load_model(model_name, torch_dtype, quant_type):
5565
bnb_4bit_quant_type=quant_type
5666
),
5767
)
58-
if torch.cuda.device_count() > 1:
59-
model = torch.nn.DataParallel(model)
68+
6069
return model
6170
except Exception as e:
6271
raise RuntimeError(f"Error loading model: {e}")

0 commit comments

Comments
 (0)