Skip to content

Commit

Permalink
Merge pull request PaddlePaddle#6 from zhaoyinglia/AutoParallel/new_api
Browse files Browse the repository at this point in the history
[AutoParallel] Update to the new strategy impl
  • Loading branch information
aoyulong committed Sep 13, 2022
2 parents 23f6539 + d148cb8 commit e94e263
Show file tree
Hide file tree
Showing 42 changed files with 1,349 additions and 770 deletions.
4 changes: 2 additions & 2 deletions python/paddle/distributed/auto_parallel/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,7 @@ def _get_op_by_id(ops, id):
grad_op_dist_attr.set_output_dims_mapping(
output_name, ref_fwd_dims_mapping)

elif grad_op.type == 'fill_zeros_like':
elif grad_op.type == 'fill_any_like':
ref_var_name = grad_op.input_arg_names[0]
ref_var = vars[ref_var_name]
ref_dist_attr = self._dist_context.get_tensor_dist_attr_for_program(
Expand Down Expand Up @@ -1274,7 +1274,7 @@ def _get_op_by_id(ops, id):
grad_op_dist_attr.impl_type = "default"
grad_op_dist_attr.impl_idx = 0

elif grad_op.type == 'fill_zeros_like':
elif grad_op.type == 'fill_any_like':
ref_var_name = grad_op.input_arg_names[0]
ref_var = vars[ref_var_name]
ref_dist_attr = self._dist_context.get_tensor_dist_attr_for_program(
Expand Down
45 changes: 38 additions & 7 deletions python/paddle/distributed/auto_parallel/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,28 +41,35 @@ def set_field_default_config(category, field, default_value):
#########################################
BASE = "base"
set_field_default_config(BASE, "auto_mode", "semi")
set_field_default_config(BASE, "gradient_scale", True)
set_field_default_config(BASE, "use_cache", True)
set_field_default_config(BASE, "return_numpy", True)
set_field_default_config(BASE, "all_ranks", False)
set_field_default_config(BASE, "split_data", False)
set_field_default_config(BASE, "seed", None)

#########################################
# recompute configuration
#########################################
RECOMPUTE = "recompute"
set_field_default_config(RECOMPUTE, "enabled", False)
set_field_default_config(RECOMPUTE, "enable", False)
set_field_default_config(RECOMPUTE, "checkpoints", None)
set_field_default_config(RECOMPUTE, "enable_tuning", False)

#########################################
# AMP configuration
#########################################
AMP = "amp"
set_field_default_config(AMP, "enabled", False)
set_field_default_config(AMP, "enable", False)
set_field_default_config(AMP, "init_loss_scaling", 32768.0)
set_field_default_config(AMP, "incr_every_n_steps", 1000)
set_field_default_config(AMP, "decr_every_n_nan_or_inf", 2)
set_field_default_config(AMP, "incr_ratio", 2.0)
set_field_default_config(AMP, "decr_ratio", 0.8)
set_field_default_config(AMP, "use_dynamic_loss_scaling", True)
set_field_default_config(AMP, "custom_white_list", None)
set_field_default_config(AMP, "custom_black_list", None)
set_field_default_config(AMP, "custom_black_varnames", None)
set_field_default_config(AMP, "custom_white_list", [])
set_field_default_config(AMP, "custom_black_list", [])
set_field_default_config(AMP, "custom_black_varnames", [])
set_field_default_config(AMP, "use_pure_fp16", False)
set_field_default_config(AMP, "use_fp16_guard", True)
set_field_default_config(AMP, "use_optimizer_fp16", False)
Expand All @@ -71,16 +78,40 @@ def set_field_default_config(category, field, default_value):
# sharding configuration
#########################################
SHARDING = "sharding"
set_field_default_config(SHARDING, "enabled", False)
set_field_default_config(SHARDING, "enable", False)
set_field_default_config(SHARDING, "stage", 1)
set_field_default_config(SHARDING, "sharding_degree", 8)
set_field_default_config(SHARDING, "segment_broadcast_MB", 32.0)
set_field_default_config(SHARDING, "enable_tuning", False)
set_field_default_config(SHARDING, "tuning_range", [])

#########################################
# gradient merge configuration
#########################################
GRADIENT_MERGE = "gradient_merge"
set_field_default_config(GRADIENT_MERGE, "enabled", False)
set_field_default_config(GRADIENT_MERGE, "enable", False)
set_field_default_config(GRADIENT_MERGE, "k_steps", 1)
set_field_default_config(GRADIENT_MERGE, "avg", True)

#########################################
# quantization configuration
#########################################
QAT = "qat"
set_field_default_config(QAT, "enable", False)
set_field_default_config(QAT, "channel_wise_abs_max", True)
set_field_default_config(QAT, "weight_bits", 8)
set_field_default_config(QAT, "activation_bits", 8)
set_field_default_config(QAT, "not_quant_pattern", ['skip_quant'])
set_field_default_config(QAT, "algo", None)

# #########################################
# auto tuning configuration
# #########################################
TUNING = "tuning"
set_field_default_config(TUNING, "enable", False)
set_field_default_config(TUNING, "batch_size", 1)
set_field_default_config(TUNING, "dataset", None)
set_field_default_config(TUNING, "profile_start_step", 1)
set_field_default_config(TUNING, "profile_end_step", 1)
set_field_default_config(TUNING, "run_after_tuning", True)
set_field_default_config(TUNING, "verbose", True)
5 changes: 1 addition & 4 deletions python/paddle/distributed/auto_parallel/dist_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,15 +120,12 @@ def __init__(self,
self._backup_serial_main_program_stack = []
self._backup_serial_startup_program_stack = []

# flag whether scale gradient with dp size
# # flag whether scale gradient with dp size
self._gradient_scale = True

# A flag indicates whether the used parallelism is data parallel
self._data_parallel = False

# flag whether using `to_static`
self._dygraph_mode = False

@property
def serial_main_program(self):
return self._serial_main_program
Expand Down
Loading

0 comments on commit e94e263

Please sign in to comment.