From 9c3a35b9a92919a06c25eaf30d21872e80bba83d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:11:29 +0800 Subject: [PATCH 01/89] rm flags retain grad in pybind (#49888) * rm flags_retain grad in pybind * retain grads for xpu test * set retain grad for xpu * rm flag * lint --------- Co-authored-by: wanghuancoder --- .../eager_manual/forwards/add_n_fwd_func.cc | 1 - .../forwards/conv2d_fwd_function.cc | 1 - .../manual/eager_manual/nodes/conv2d_nodes.cc | 2 - .../forwards/fused_attention_fwd_func.cc | 14 ------ ...as_dropout_residual_layer_norm_fwd_func.cc | 1 - .../forwards/fused_feedforward_fwd_func.cc | 1 - .../forwards/fused_gate_attention_fwd_func.cc | 1 - .../forwards/fused_gemm_epilogue_fwd_func.cc | 1 - .../auto_code_generator/eager_generator.cc | 18 ------- .../generator/eager_gen.py | 14 +----- .../custom_operator/custom_operator_node.cc | 1 - .../eager/to_static/run_program_op_func.h | 1 - paddle/fluid/eager/utils.cc | 33 ------------ paddle/fluid/eager/utils.h | 8 --- paddle/fluid/pybind/eager_functions.cc | 1 - paddle/fluid/pybind/eager_py_layer.cc | 2 - .../paddle/fluid/tests/unittests/test_flip.py | 2 - .../unittests/xpu/test_zero_dim_tensor_xpu.py | 50 ++++++++++++++++++- 18 files changed, 51 insertions(+), 101 deletions(-) diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc index a6bc082715a39..a6f1b99e1f022 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc @@ -99,7 +99,6 @@ paddle::experimental::Tensor add_n_ad_func( egr::EagerUtils::SetHistory(out_autograd_meta, grad_node); } grad_node->SetGradInMeta(out, 0); - egr::EagerUtils::CheckAndRetainGrad(out); // Set TensorWrappers for Forward Outputs if needed } diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc index 52a2bd12f0a05..df5feab911f4f 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc @@ -162,7 +162,6 @@ paddle::experimental::Tensor conv2d_ad_func( egr::EagerUtils::SetHistory(out_autograd_meta, grad_node); } grad_node->SetGradInMeta(out, 0); - egr::EagerUtils::CheckAndRetainGrad(out); // Set TensorWrappers for Forward Outputs if needed } diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc index 647f6768bc6b1..8ba19d99cf458 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc @@ -159,8 +159,6 @@ Conv2dGradNodeFinal::operator()( } grad_node->SetGradInMeta(grad_input, 0); grad_node->SetGradInMeta(grad_filter, 1); - egr::EagerUtils::CheckAndRetainGrad(grad_input); - egr::EagerUtils::CheckAndRetainGrad(grad_filter); // Set TensorWrappers for Forward Outputs if needed } diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc index a84c53e33a106..db1d6c1d409d7 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_attention_fwd_func.cc @@ -432,7 +432,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_QKVBiasOut, QKVBiasOut_accumulation_node); QKVBiasOut_accumulation_node->SetGradInMeta(QKVBiasOut, 0); - egr::EagerUtils::CheckAndRetainGrad(QKVBiasOut); grad_node->SetGradOutMeta(QKVBiasOut, 11); } @@ -446,7 +445,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_SrcMaskOut, SrcMaskOut_accumulation_node); SrcMaskOut_accumulation_node->SetGradInMeta(SrcMaskOut, 0); - egr::EagerUtils::CheckAndRetainGrad(SrcMaskOut); grad_node->SetGradOutMeta(SrcMaskOut, 12); } @@ -473,7 +471,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_LnOut, LnOut_accumulation_node); LnOut_accumulation_node->SetGradInMeta(LnOut, 0); - egr::EagerUtils::CheckAndRetainGrad(LnOut); grad_node->SetGradOutMeta(LnOut, 13); } if (LnMean.initialized()) { @@ -505,7 +502,6 @@ fused_attention_dygraph_function( BiasDropoutResidualOut_accumulation_node); BiasDropoutResidualOut_accumulation_node->SetGradInMeta( BiasDropoutResidualOut, 0); - egr::EagerUtils::CheckAndRetainGrad(BiasDropoutResidualOut); grad_node->SetGradOutMeta(BiasDropoutResidualOut, 14); } @@ -524,17 +520,14 @@ fused_attention_dygraph_function( egr::EagerUtils::SetOutRankWithSlot(p_autograd_CacheKVOut, 18); egr::EagerUtils::SetHistory(p_autograd_CacheKVOut, grad_node); grad_node->SetGradInMeta(CacheKVOut, 18); - egr::EagerUtils::CheckAndRetainGrad(CacheKVOut); egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 19); egr::EagerUtils::SetHistory(p_autograd_Y, grad_node); grad_node->SetGradInMeta(Y, 19); - egr::EagerUtils::CheckAndRetainGrad(Y); auto QKVOut_accumulation_node = std::make_shared(p_autograd_QKVOut); egr::EagerUtils::SetOutRankWithSlot(p_autograd_QKVOut, 0); egr::EagerUtils::SetHistory(p_autograd_QKVOut, QKVOut_accumulation_node); QKVOut_accumulation_node->SetGradInMeta(QKVOut, 0); - egr::EagerUtils::CheckAndRetainGrad(QKVOut); grad_node->SetGradOutMeta(QKVOut, 15); auto QKTVOut_accumulation_node = @@ -543,7 +536,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_QKTVOut, QKTVOut_accumulation_node); QKTVOut_accumulation_node->SetGradInMeta(QKTVOut, 0); - egr::EagerUtils::CheckAndRetainGrad(QKTVOut); grad_node->SetGradOutMeta(QKTVOut, 16); auto TransposeOut2_accumulation_node = @@ -552,7 +544,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_TransposeOut2, TransposeOut2_accumulation_node); TransposeOut2_accumulation_node->SetGradInMeta(TransposeOut2, 0); - egr::EagerUtils::CheckAndRetainGrad(TransposeOut2); grad_node->SetGradOutMeta(TransposeOut2, 17); auto QKOut_accumulation_node = @@ -560,7 +551,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetOutRankWithSlot(p_autograd_QKOut, 0); egr::EagerUtils::SetHistory(p_autograd_QKOut, QKOut_accumulation_node); QKOut_accumulation_node->SetGradInMeta(QKOut, 0); - egr::EagerUtils::CheckAndRetainGrad(QKOut); grad_node->SetGradOutMeta(QKOut, 18); auto SoftmaxOut_accumulation_node = @@ -569,7 +559,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_SoftmaxOut, SoftmaxOut_accumulation_node); SoftmaxOut_accumulation_node->SetGradInMeta(SoftmaxOut, 0); - egr::EagerUtils::CheckAndRetainGrad(SoftmaxOut); grad_node->SetGradOutMeta(SoftmaxOut, 19); if (AttnDropoutOut.initialized()) { @@ -580,7 +569,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_AttnDropoutOut, AttnDropoutOut_accumulation_node); AttnDropoutOut_accumulation_node->SetGradInMeta(AttnDropoutOut, 0); - egr::EagerUtils::CheckAndRetainGrad(AttnDropoutOut); grad_node->SetGradOutMeta(AttnDropoutOut, 20); } @@ -590,7 +578,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_FMHAOut, FMHAOut_accumulation_node); FMHAOut_accumulation_node->SetGradInMeta(FMHAOut, 0); - egr::EagerUtils::CheckAndRetainGrad(FMHAOut); grad_node->SetGradOutMeta(FMHAOut, 21); auto OutLinearOut_accumulation_node = @@ -599,7 +586,6 @@ fused_attention_dygraph_function( egr::EagerUtils::SetHistory(p_autograd_OutLinearOut, OutLinearOut_accumulation_node); OutLinearOut_accumulation_node->SetGradInMeta(OutLinearOut, 0); - egr::EagerUtils::CheckAndRetainGrad(OutLinearOut); grad_node->SetGradOutMeta(OutLinearOut, 22); } } diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc index 2e87d0b740cc7..2544ad7b6e2da 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_bias_dropout_residual_layer_norm_fwd_func.cc @@ -221,7 +221,6 @@ fused_bias_dropout_residual_layer_norm_dygraph_function( egr::EagerUtils::SetOutRankWithSlot(p_autograd_Y, 4); egr::EagerUtils::SetHistory(p_autograd_Y, grad_node); grad_node->SetGradInMeta(Y, 4); - egr::EagerUtils::CheckAndRetainGrad(Y); } } diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc index 5b66eea7abb62..dce620fd32a4a 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_feedforward_fwd_func.cc @@ -363,7 +363,6 @@ fused_feedforward_dygraph_function( egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0); egr::EagerUtils::SetHistory(p_autograd_Out, grad_node); grad_node->SetGradInMeta(Out, 0); - egr::EagerUtils::CheckAndRetainGrad(Out); egr::EagerUtils::SetOutRankWithSlot(p_autograd_Dropout1Mask, 1); grad_node->SetGradInMeta(Dropout1Mask, 1); egr::EagerUtils::SetOutRankWithSlot(p_autograd_Dropout2Mask, 2); diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc index 641d62a58e864..1ad201a8f81ac 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gate_attention_fwd_func.cc @@ -372,7 +372,6 @@ fused_gate_attention_dygraph_function( egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 7); egr::EagerUtils::SetHistory(p_autograd_Out, grad_node); grad_node->SetGradInMeta(Out, 7); - egr::EagerUtils::CheckAndRetainGrad(Out); } } diff --git a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc index 8d9f6d769a37c..72dccb3bb0d15 100644 --- a/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc +++ b/paddle/fluid/eager/api/manual/fluid_manual/forwards/fused_gemm_epilogue_fwd_func.cc @@ -120,7 +120,6 @@ paddle::experimental::Tensor fused_gemm_epilogue_dygraph_function( egr::EagerUtils::SetOutRankWithSlot(p_autograd_Out, 0); egr::EagerUtils::SetHistory(p_autograd_Out, grad_node); grad_node->SetGradInMeta(Out, 0); - egr::EagerUtils::CheckAndRetainGrad(Out); } } diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 5305f4f984f62..5915494ebc3cd 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1305,15 +1305,6 @@ static std::string GenerateGradNodeCreationContent( paddle::string::Sprintf(SET_GRAD_IN_META_TEMPLATE, LegalizeVarName(inplace_input_name), output_position); - - // Intermediate Tensor does not require CheckAndRetainGrad - if (!output.intermediate()) { - VLOG(6) << "Generated Call RetainGradForTensor"; - const char* RETAIN_GRAD_TEMPLATE = - " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; - grad_node_creation_str += paddle::string::Sprintf( - RETAIN_GRAD_TEMPLATE, LegalizeVarName(inplace_input_name)); - } } else { const std::string& output_autograd_name = "p_autograd_" + LegalizeVarName(output_name); @@ -1363,15 +1354,6 @@ static std::string GenerateGradNodeCreationContent( LegalizeVarName(output_name), output_position); } - - // Intermediate Tensor does not require CheckAndRetainGrad - if (!output.intermediate()) { - VLOG(6) << "Generated Call RetainGradForTensor"; - const char* RETAIN_GRAD_TEMPLATE = - " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; - grad_node_creation_str += paddle::string::Sprintf( - RETAIN_GRAD_TEMPLATE, LegalizeVarName(output_name)); - } } } VLOG(6) << "Generated SetGradIn/OutMeta"; diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 499eb42ea5ca3..650bf0626f1ad 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -280,8 +280,7 @@ class {} : public egr::GradNodeBase {{ {} // SetGradOutMeta & SetEdges {} - // SetOutRank & SetHistory & SetGradInMeta & RetainGrad -{} + // SetOutRank & SetHistory & SetGradInMeta {} {} {} @@ -300,8 +299,7 @@ class {} : public egr::GradNodeBase {{ {} // SetGradOutMeta & SetEdges {} - // SetOutRank & SetHistory & SetGradInMeta & RetainGrad -{} + // SetOutRank & SetHistory & SetGradInMeta {} {} {} @@ -987,7 +985,6 @@ def GenerateNodeCreationCodes(self, for_backward=False): set_out_rank_list = [] set_history_list = [] set_grad_in_meta_list = [] - set_retain_grad_list = [] num_outputs = len(forward_outputs_position_map.keys()) for name, (_, pos) in forward_outputs_position_map.items(): output_autograd_meta_name = GetAutoGradMetaName(name) @@ -1002,19 +999,14 @@ def GenerateNodeCreationCodes(self, for_backward=False): set_grad_in_meta = ( f"{indent}grad_node->SetGradInMeta({name}, {pos});" ) - set_retain_grad = ( - f"{indent}egr::EagerUtils::CheckAndRetainGrad({name});" - ) set_out_rank_list.append(set_out_rank) set_history_list.append(set_history) set_grad_in_meta_list.append(set_grad_in_meta) - set_retain_grad_list.append(set_retain_grad) set_out_rank_str = "\n".join(set_out_rank_list) set_history_str = "\n".join(set_history_list) set_grad_in_meta_str = "\n".join(set_grad_in_meta_list) - set_retain_grad_str = "\n".join(set_retain_grad_list) node_event_name = forward_api_name + " node_creation" node_creation_event_str = f"{indent}paddle::platform::RecordEvent node_creation_record_event(\"{node_event_name}\", paddle::platform::TracerEventType::OperatorInner, 1);\n" @@ -1029,7 +1021,6 @@ def GenerateNodeCreationCodes(self, for_backward=False): set_out_rank_str, set_history_str, set_grad_in_meta_str, - set_retain_grad_str, set_output_tensor_wrappers_str, ) else: @@ -1043,7 +1034,6 @@ def GenerateNodeCreationCodes(self, for_backward=False): set_out_rank_str, set_history_str, set_grad_in_meta_str, - set_retain_grad_str, set_output_tensor_wrappers_str, ) ) diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc index 57932ec4c1e69..f70b402b566dd 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc @@ -310,7 +310,6 @@ RunCustomOpNode::operator()( egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i); egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node); grad_node->SetGradInMeta(out_tensors, i); - egr::EagerUtils::CheckAndRetainGrad(out_tensors); } // Prepare Grad inputs with fwd outputs diff --git a/paddle/fluid/eager/to_static/run_program_op_func.h b/paddle/fluid/eager/to_static/run_program_op_func.h index e58c9bd0c4e07..7305e79cd73fb 100644 --- a/paddle/fluid/eager/to_static/run_program_op_func.h +++ b/paddle/fluid/eager/to_static/run_program_op_func.h @@ -122,6 +122,5 @@ inline void run_program_ad_func( // Set History for output set current Grad Node for egr::EagerUtils::SetHistory(&p_autograd_outs, grad_node); - egr::EagerUtils::CheckAndRetainGrad(deref_out); } } diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 777929bbc7536..4b992f5acaaa7 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -27,10 +27,6 @@ #include "paddle/fluid/framework/phi_utils.h" #include "paddle/fluid/framework/variable.h" -PADDLE_DEFINE_EXPORTED_bool(retain_grad_for_all_tensor, - false, - "retain grad for all tensor"); - namespace egr { /** * Implementation of Eager Utils. @@ -409,35 +405,6 @@ std::vector EagerUtils::RecoverTensorWrapper( } return ret; } -// TODO(jiabin): remove all this when we fix all test using tmp grad -void EagerUtils::CheckAndRetainGrad( - const paddle::experimental::Tensor& tensor) { - VLOG(6) << "Check RetainGradForTensor: " << tensor.name(); - if (FLAGS_retain_grad_for_all_tensor) { - VLOG(6) << "RetainGradForTensor: " << tensor.name(); - egr::egr_utils_api::RetainGradForTensor(tensor); - } -} - -void EagerUtils::CheckAndRetainGrad( - const std::vector& tensors) { - if (FLAGS_retain_grad_for_all_tensor) { - for (auto& tensor : tensors) { - VLOG(6) << "RetainGradForTensor: " << tensor.name(); - egr::egr_utils_api::RetainGradForTensor(tensor); - } - } -} - -void EagerUtils::CheckAndRetainGrad( - const std::vector& tensors) { - if (FLAGS_retain_grad_for_all_tensor) { - for (auto& tensor : tensors) { - VLOG(6) << "RetainGradForTensor: " << tensor->name(); - egr::egr_utils_api::RetainGradForTensor(*tensor); - } - } -} std::shared_ptr EagerUtils::GetGradAccumulationNode( const paddle::experimental::Tensor& tensor) { diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index 339f7af80364b..a726528f53d05 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -223,14 +223,6 @@ class EagerUtils { const std::vector& out_var, std::vector* result); - // end Intermidate needed. - - static void CheckAndRetainGrad(const paddle::experimental::Tensor& tensor); - static void CheckAndRetainGrad( - const std::vector& tensors); - static void CheckAndRetainGrad( - const std::vector& tensors); - static std::shared_ptr GetGradAccumulationNode( const paddle::experimental::Tensor& tensor); diff --git a/paddle/fluid/pybind/eager_functions.cc b/paddle/fluid/pybind/eager_functions.cc index 2874c7b90f437..cc5a8d64e1234 100644 --- a/paddle/fluid/pybind/eager_functions.cc +++ b/paddle/fluid/pybind/eager_functions.cc @@ -575,7 +575,6 @@ static PyObject* eager_api_run_custom_op(PyObject* self, egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i); egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node); grad_node->SetGradInMeta(out_tensors, i); - egr::EagerUtils::CheckAndRetainGrad(out_tensors); } // Prepare Grad inputs with fwd outputs diff --git a/paddle/fluid/pybind/eager_py_layer.cc b/paddle/fluid/pybind/eager_py_layer.cc index de3cf80cef6cc..89bad2bfc924d 100644 --- a/paddle/fluid/pybind/eager_py_layer.cc +++ b/paddle/fluid/pybind/eager_py_layer.cc @@ -432,12 +432,10 @@ PyObject* pylayer_method_apply(PyObject* cls, for (auto t : outputs_tensor[i]) { grad_node->SetGradInMeta(*t, i); } - egr::EagerUtils::CheckAndRetainGrad(outputs_tensor[i]); } else { egr::EagerUtils::SetOutRankWithSlot(outputs_autograd_meta[i][0], i); egr::EagerUtils::SetHistory(outputs_autograd_meta[i][0], grad_node); grad_node->SetGradInMeta(*outputs_tensor[i][0], i); - egr::EagerUtils::CheckAndRetainGrad(*outputs_tensor[i][0]); } } VLOG(6) << "PyLayer construct backward node finish..."; diff --git a/python/paddle/fluid/tests/unittests/test_flip.py b/python/paddle/fluid/tests/unittests/test_flip.py index 1807199821eb7..4f095493f007b 100644 --- a/python/paddle/fluid/tests/unittests/test_flip.py +++ b/python/paddle/fluid/tests/unittests/test_flip.py @@ -152,7 +152,6 @@ def func(self, place): gradient_checker.double_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.double_grad_check_for_dygraph( self.flip_wrapper, [data], out, x_init=[data_arr], place=place ) @@ -184,7 +183,6 @@ def func(self, place): gradient_checker.triple_grad_check( [data], out, x_init=[data_arr], place=place, eps=eps ) - fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) gradient_checker.triple_grad_check_for_dygraph( self.flip_wrapper, [data], out, x_init=[data_arr], place=place ) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py index 43b44a07dcbb8..c0597d0ad53ea 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py @@ -20,7 +20,6 @@ import paddle.nn.functional as F paddle.set_device('xpu') -paddle.set_flags({"FLAGS_retain_grad_for_all_tensor": True}) unary_api_list = [ paddle.nn.functional.elu, @@ -102,6 +101,7 @@ def test_dygraph_unary(self): x = paddle.rand([]) x.stop_gradient = False out = api(x) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -147,6 +147,7 @@ def test_dygraph_reduce(self): x = paddle.rand([]) x.stop_gradient = False out = api(x, None) + out.retain_grads() out.backward() @@ -201,12 +202,15 @@ def test_dygraph_binary(self): y = paddle.rand([]) x.stop_gradient = False y.stop_gradient = False + x.retain_grads() + y.retain_grads() if isinstance(api, dict): out = api['func'](x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -228,6 +232,7 @@ def test_dygraph_binary(self): np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + out.retain_grads() out.backward() self.assertEqual(x.shape, [2, 3, 4]) @@ -243,12 +248,15 @@ def test_dygraph_binary(self): y = paddle.rand([2, 3, 4]) x.stop_gradient = False y.stop_gradient = False + x.retain_grads() + y.retain_grads() if isinstance(api, dict): out = api['func'](x, y) out_cls = getattr(paddle.Tensor, api['cls_method'])(x, y) np.testing.assert_array_equal(out_cls.numpy(), out.numpy()) else: out = api(x, y) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -265,6 +273,7 @@ def test_dygraph_binary(self): y = 0.5 if isinstance(api, dict): out = getattr(paddle.Tensor, api['cls_method'])(x, y) + out.retain_grads() out.backward() self.assertEqual(x.shape, []) @@ -381,7 +390,9 @@ def test_shape(self): def test_pow_factor(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.pow(x, 2.0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -391,7 +402,9 @@ def test_pow_factor(self): def test_cast(self): x = paddle.full([], 1.0, 'float32') x.stop_gradient = False + x.retain_grads() out = paddle.cast(x, 'int32') + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -401,7 +414,9 @@ def test_cast(self): def test_clip(self): x = paddle.uniform([], None, -10, 10) x.stop_gradient = False + x.retain_grads() out = paddle.clip(x, -5, 5) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -446,6 +461,7 @@ def test_transpose(self): x = paddle.rand([]) x.stop_gradient = False out = paddle.transpose(x, []) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -461,6 +477,7 @@ def test_moveaxis(self): x = paddle.rand([]) x.stop_gradient = False out = paddle.moveaxis(x, [], []) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -476,6 +493,7 @@ def test_gather_1D(self): x = paddle.to_tensor([1.0, 3.0, 5.0, 7.0, 9.0], stop_gradient=False) index = paddle.full([], 2, 'int64') out = paddle.gather(x, index) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -489,6 +507,7 @@ def test_gather_xD_axis_0(self): ) index = paddle.full([], 1, 'int64') out = paddle.gather(x, index) + out.retain_grads() out.backward() self.assertEqual(out.shape, [3]) @@ -541,10 +560,18 @@ def test_diagflat(self): x2.stop_gradient = False x3.stop_gradient = False + x1.retain_grads() + x2.retain_grads() + x3.retain_grads() + out1 = paddle.diagflat(x1, 1) out2 = paddle.diagflat(x2, -1) out3 = paddle.diagflat(x3, 0) + out1.retain_grads() + out2.retain_grads() + out3.retain_grads() + out1.backward() out2.backward() out3.backward() @@ -592,7 +619,9 @@ def test_flatten(self): def test_scale(self): x = paddle.rand([]) x.stop_gradient = False + x.retain_grads() out = paddle.scale(x, scale=2.0, bias=1.0) + out.retain_grads() out.backward() self.assertEqual(out.shape, []) @@ -674,24 +703,28 @@ def test_reshape_list(self): x.stop_gradient = False out = paddle.reshape(x, []) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, []) self.assertEqual(out.grad.shape, []) out = paddle.reshape(x, [1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1]) self.assertEqual(out.grad.shape, [1]) out = paddle.reshape(x, [-1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1]) self.assertEqual(out.grad.shape, [1]) out = paddle.reshape(x, [-1, 1]) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, []) self.assertEqual(out.shape, [1, 1]) @@ -702,6 +735,7 @@ def test_reshape_tensor(self): x.stop_gradient = False out = paddle.reshape(x, []) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, []) @@ -709,6 +743,7 @@ def test_reshape_tensor(self): new_shape = paddle.to_tensor([1, 1, 1], "int32") out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1, 1, 1]) @@ -716,6 +751,7 @@ def test_reshape_tensor(self): new_shape = paddle.to_tensor([-1], "int32") out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1]) @@ -723,6 +759,7 @@ def test_reshape_tensor(self): new_shape = [paddle.full([], -1, "int32"), paddle.full([], 1, "int32")] out = paddle.reshape(x, new_shape) + out.retain_grads() out.backward() self.assertEqual(x.grad.shape, [1, 1]) self.assertEqual(out.shape, [1, 1]) @@ -765,9 +802,15 @@ def test_sort(self): x1.stop_gradient = False x2.stop_gradient = False + x1.retain_grads() + x2.retain_grads() + out1 = paddle.sort(x1, axis=-1) out2 = paddle.sort(x2, axis=0) + out1.retain_grads() + out2.retain_grads() + out1.backward() out2.backward() @@ -787,10 +830,15 @@ def test_argsort(self): x2 = paddle.rand([]) x1.stop_gradient = False x2.stop_gradient = False + x1.retain_grads() + x2.retain_grads() out1 = paddle.argsort(x1, axis=-1) out2 = paddle.argsort(x2, axis=0) + out1.retain_grads() + out2.retain_grads() + out1.backward() out2.backward() From db83b53ae14f44654e74350f35a34a35746c1557 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:12:14 +0800 Subject: [PATCH 02/89] update erf gumbel_softmax ..ops (#50077) * update erf gumbel_softmax ..ops * lint * reset sequence_conv * reset exponetial&interp nearest --- python/paddle/fluid/tests/unittests/test_erf_op.py | 3 ++- .../fluid/tests/unittests/test_expand_v2_op.py | 11 ++++++++--- .../tests/unittests/test_gumbel_softmax_op.py | 6 +++++- .../fluid/tests/unittests/test_is_empty_op.py | 4 +++- .../fluid/tests/unittests/test_multiplex_op.py | 3 ++- .../tests/unittests/test_transfer_layout_op.py | 7 ++++++- .../fluid/tests/unittests/test_transpose_op.py | 14 ++++++++------ 7 files changed, 34 insertions(+), 14 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_erf_op.py b/python/paddle/fluid/tests/unittests/test_erf_op.py index 06b7f55069fb9..db5c48151c505 100644 --- a/python/paddle/fluid/tests/unittests/test_erf_op.py +++ b/python/paddle/fluid/tests/unittests/test_erf_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest from scipy.special import erf import paddle @@ -26,6 +26,7 @@ class TestErfOp(OpTest): def setUp(self): self.op_type = "erf" + self.python_api = paddle.erf self.dtype = self._init_dtype() self.x_shape = [11, 17] x = np.random.uniform(-1, 1, size=self.x_shape).astype(self.dtype) diff --git a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py index 0a5eda417e95c..0565be630a942 100644 --- a/python/paddle/fluid/tests/unittests/test_expand_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_expand_v2_op.py @@ -17,7 +17,7 @@ import gradient_checker import numpy as np from decorator_helper import prog_scope -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -42,10 +42,10 @@ def init_data(self): self.expand_times = [1] def test_check_output(self): - self.check_output(check_eager=True) + self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Out', check_eager=True) + self.check_grad(['X'], 'Out') class TestExpandV2OpRank2_DimExpanding(TestExpandV2OpRank1): @@ -80,6 +80,7 @@ def init_data(self): class TestExpandV2OpRank1_tensor_attr(OpTest): def setUp(self): self.op_type = "expand_v2" + self.python_api = paddle.expand self.init_data() expand_shapes_tensor = [] for index, ele in enumerate(self.expand_shape): @@ -120,6 +121,7 @@ def init_data(self): class TestExpandV2OpRank1_tensor(OpTest): def setUp(self): self.op_type = "expand_v2" + self.python_api = paddle.expand self.init_data() self.inputs = { @@ -146,6 +148,7 @@ def test_check_grad(self): class TestExpandV2OpInteger(OpTest): def setUp(self): self.op_type = "expand_v2" + self.python_api = paddle.expand self.inputs = { 'X': np.random.randint(10, size=(2, 4, 5)).astype("int32") } @@ -161,6 +164,7 @@ def test_check_output(self): class TestExpandV2OpBoolean(OpTest): def setUp(self): self.op_type = "expand_v2" + self.python_api = paddle.expand self.inputs = {'X': np.random.randint(2, size=(2, 4, 5)).astype("bool")} self.attrs = {'shape': [2, 4, 5]} output = np.tile(self.inputs['X'], (1, 1, 1)) @@ -174,6 +178,7 @@ def test_check_output(self): class TestExpandV2OpInt64_t(OpTest): def setUp(self): self.op_type = "expand_v2" + self.python_api = paddle.expand self.inputs = { 'X': np.random.randint(10, size=(2, 4, 5)).astype("int64") } diff --git a/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py b/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py index 60b4cd5783347..30e4d7943ff24 100644 --- a/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_gumbel_softmax_op.py @@ -13,10 +13,11 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid +import paddle.nn.functional as F paddle.enable_static() @@ -36,6 +37,7 @@ def verify_output(self, outs): def setUp(self): self.op_type = "gumbel_softmax" + self.python_api = F.gumbel_softmax self.init_attrs() np.random.seed(0) x = np.random.uniform(0.1, 1, self.shape).astype(self.dtype) @@ -53,6 +55,7 @@ def test_check_grad(self): class TestGumbelSoftmax_ZeroDim(OpTest): def setUp(self): self.op_type = "gumbel_softmax" + self.python_api = F.gumbel_softmax self.dtype = "float64" x = np.random.uniform(0.1, 1, []).astype(self.dtype) out = np.array(1.0).astype(self.dtype) @@ -123,6 +126,7 @@ def accumulate_output(self, outs): def setUp(self): self.op_type = "gumbel_softmax" + self.python_api = F.gumbel_softmax self.init_attrs() single_x = np.array([0.2, 0.3, 0.5]) batch_x = np.ones(self.shape) * single_x diff --git a/python/paddle/fluid/tests/unittests/test_is_empty_op.py b/python/paddle/fluid/tests/unittests/test_is_empty_op.py index 6cf410eaede84..f771c33cb67e6 100644 --- a/python/paddle/fluid/tests/unittests/test_is_empty_op.py +++ b/python/paddle/fluid/tests/unittests/test_is_empty_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle @@ -23,6 +23,7 @@ class TestEmpty(OpTest): def setUp(self): self.op_type = "is_empty" + self.python_api = paddle.is_empty self.inputs = {'X': np.array([1, 2, 3])} self.outputs = {'Out': np.array([False])} @@ -33,6 +34,7 @@ def test_check_output(self): class TestNotEmpty(TestEmpty): def setUp(self): self.op_type = "is_empty" + self.python_api = paddle.is_empty self.inputs = {'X': np.array([])} self.outputs = {'Out': np.array([True])} diff --git a/python/paddle/fluid/tests/unittests/test_multiplex_op.py b/python/paddle/fluid/tests/unittests/test_multiplex_op.py index 563a9fdb34b34..a0f8932ba23ab 100644 --- a/python/paddle/fluid/tests/unittests/test_multiplex_op.py +++ b/python/paddle/fluid/tests/unittests/test_multiplex_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -24,6 +24,7 @@ class TestMultiplexOp(OpTest): def setUp(self): self.op_type = "multiplex" + self.python_api = paddle.tensor.multiplex rows = 4 index = np.arange(0, rows).astype('int32') np.random.shuffle(index) diff --git a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py index 6c26a70694ac2..fd65ddfa48527 100644 --- a/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py +++ b/python/paddle/fluid/tests/unittests/test_transfer_layout_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -24,6 +24,10 @@ from paddle.fluid.layer_helper import LayerHelper +def transpose_layout(x, src_layout, dst_layout): + return x.transpose([0, 2, 3, 1]) + + # default kNCHW class TestTransferLayoutOpkNCHWTokNHWC(OpTest): def setUp(self): @@ -31,6 +35,7 @@ def setUp(self): self.inputs = {'X': ipt.astype('float32')} self.outputs = {'Out': ipt.transpose([0, 2, 3, 1])} self.attrs = {'src_layout': 0, 'dst_layout': 1} # kNHWC + self.python_api = transpose_layout self.op_type = 'transfer_layout' def test_check_output(self): diff --git a/python/paddle/fluid/tests/unittests/test_transpose_op.py b/python/paddle/fluid/tests/unittests/test_transpose_op.py index a2f922dcd8db4..a05b2a5c0a30f 100644 --- a/python/paddle/fluid/tests/unittests/test_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_transpose_op.py @@ -17,12 +17,12 @@ import gradient_checker import numpy as np from decorator_helper import prog_scope +from eager_op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid as fluid import paddle.fluid.core as core from paddle.fluid import Program, program_guard -from paddle.fluid.tests.unittests.op_test import OpTest, convert_float_to_uint16 paddle.enable_static() @@ -47,10 +47,10 @@ def init_op_type(self): self.use_mkldnn = False def test_check_output(self): - self.check_output(no_check_set=['XShape'], check_eager=True) + self.check_output(no_check_set=['XShape']) def test_check_grad(self): - self.check_grad(['X'], 'Out', check_eager=True) + self.check_grad(['X'], 'Out') def initTestCase(self): self.shape = (3, 40) @@ -150,11 +150,11 @@ def init_op_type(self): self.use_mkldnn = False def test_check_output(self): - self.check_output(no_check_set=['XShape'], check_eager=True) + self.check_output(no_check_set=['XShape']) fluid.core.disable_autotune() def test_check_grad(self): - self.check_grad(['X'], 'Out', check_eager=True) + self.check_grad(['X'], 'Out') class TestTransposeBF16Op(OpTest): @@ -162,6 +162,7 @@ def setUp(self): self.init_op_type() self.initTestCase() self.dtype = np.uint16 + self.python_api = paddle.transpose x = np.random.random(self.shape).astype("float32") self.inputs = {'X': convert_float_to_uint16(x)} @@ -580,7 +581,8 @@ def test_dygraph(self): x = paddle.rand([]) x.stop_gradient = False out = paddle.transpose(x, []) - out.retain_grads() + if hasattr(out, 'retain_grads'): + out.retain_grads() out.backward() self.assertEqual(out.shape, []) From a8078bbd7e00733cd8bfd7b553c78288f3469a26 Mon Sep 17 00:00:00 2001 From: LiYuRio <63526175+LiYuRio@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:31:06 +0800 Subject: [PATCH 03/89] add multi fetch (#50070) --- .../distributed/fleet_executor/carrier.cc | 25 ++++-- .../distributed/fleet_executor/carrier.h | 4 +- .../fleet_executor/fleet_executor.cc | 14 +++- .../fleet_executor/fleet_executor.h | 7 +- python/paddle/fluid/executor.py | 24 ++++++ .../test_fleet_executor_cond_interceptor.py | 79 ++++++++++++++----- 6 files changed, 120 insertions(+), 33 deletions(-) diff --git a/paddle/fluid/distributed/fleet_executor/carrier.cc b/paddle/fluid/distributed/fleet_executor/carrier.cc index 094afff577a9e..2b75c3ba066ec 100644 --- a/paddle/fluid/distributed/fleet_executor/carrier.cc +++ b/paddle/fluid/distributed/fleet_executor/carrier.cc @@ -15,6 +15,7 @@ #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include +#include #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/interceptor.h" @@ -24,6 +25,7 @@ #include "paddle/fluid/framework/garbage_collector.h" #include "paddle/fluid/framework/program_desc.h" #include "paddle/fluid/framework/scope.h" +#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/framework/variable_helper.h" namespace paddle { @@ -55,23 +57,34 @@ void Carrier::Init( framework::Scope* scope, int64_t num_micro_batches, const platform::Place& place, - const std::vector& inference_root_scope_vars) { + const std::vector& inference_root_scope_vars, + const std::vector& micro_scope_list) { rank_ = rank; interceptor_id_to_rank_ = interceptor_id_to_rank; interceptor_id_to_node_ = interceptor_id_to_node; place_ = place; root_scope_ = scope; dev_ctx_ = platform::DeviceContextPool::Instance().Get(place_); + bool need_create_scope = micro_scope_list.empty(); PADDLE_ENFORCE_NOT_NULL( root_scope_, platform::errors::InvalidArgument("root_scope can not be nullptr")); - minibatch_scope_ = &root_scope_->NewScope(); - microbatch_scopes_.resize(num_micro_batches); - for (int i = 0; i < num_micro_batches; ++i) { - microbatch_scopes_[i] = &minibatch_scope_->NewScope(); - CopyParameters(i, program, inference_root_scope_vars); + + if (need_create_scope) { + minibatch_scope_ = &root_scope_->NewScope(); + microbatch_scopes_.resize(num_micro_batches); + for (int i = 0; i < num_micro_batches; ++i) { + microbatch_scopes_[i] = &minibatch_scope_->NewScope(); + CopyParameters(i, program, inference_root_scope_vars); + } + } else { + microbatch_scopes_ = micro_scope_list; + for (int i = 0; i < num_micro_batches; ++i) { + CopyParameters(i, program, inference_root_scope_vars); + } } + // Add source and sink interceptor id to rank interceptor_id_to_rank_.emplace(SOURCE_ID, rank); interceptor_id_to_rank_.emplace(SINK_ID, rank); diff --git a/paddle/fluid/distributed/fleet_executor/carrier.h b/paddle/fluid/distributed/fleet_executor/carrier.h index 2523942e06223..8e7fad3e892d8 100644 --- a/paddle/fluid/distributed/fleet_executor/carrier.h +++ b/paddle/fluid/distributed/fleet_executor/carrier.h @@ -25,6 +25,7 @@ #include "paddle/fluid/distributed/fleet_executor/interceptor.h" #include "paddle/fluid/distributed/fleet_executor/interceptor_message.pb.h" #include "paddle/fluid/distributed/fleet_executor/task_loop_thread_pool.h" +#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/enforce.h" #include "paddle/fluid/platform/errors.h" @@ -60,7 +61,8 @@ class Carrier final { framework::Scope* scope, int64_t num_micro_batches, const platform::Place& place, - const std::vector& inference_root_scope_vars = {}); + const std::vector& inference_root_scope_vars = {}, + const std::vector& micro_scope_list = {}); void CopyParameters( int microbatch_id, diff --git a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc index 1f397a91746b9..88363696ede25 100644 --- a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc +++ b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc @@ -14,6 +14,7 @@ #include "paddle/fluid/distributed/fleet_executor/fleet_executor.h" #include +#include #include "paddle/fluid/distributed/fleet_executor/global.h" #include "paddle/fluid/distributed/fleet_executor/message_bus.h" @@ -24,6 +25,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/program_desc.h" +#include "paddle/fluid/framework/variable.h" namespace paddle { namespace distributed { @@ -59,7 +61,8 @@ void FleetExecutor::Init( int64_t num_micro_batches, const std::vector& task_nodes, const std::unordered_map& task_id_to_rank, - const std::vector& inference_root_scope_vars) { + const std::vector& inference_root_scope_vars, + const std::vector& micro_scope_list) { PADDLE_ENFORCE_GT(task_nodes.size(), 0, platform::errors::InvalidArgument( @@ -144,7 +147,8 @@ void FleetExecutor::Init( place, num_micro_batches, program_desc, - inference_root_scope_vars); + inference_root_scope_vars, + micro_scope_list); GlobalVal::Get()->Barrier(); } @@ -154,7 +158,8 @@ void FleetExecutor::InitCarrier( const platform::Place& place, int64_t num_micro_batches, const framework::ProgramDesc& program_desc, - const std::vector& inference_root_scope_vars) { + const std::vector& inference_root_scope_vars, + const std::vector& micro_scope_list) { carrier->Init(exe_desc_.cur_rank(), runtime_graph_->interceptor_id_to_rank(), runtime_graph_->interceptor_id_to_node(), @@ -162,7 +167,8 @@ void FleetExecutor::InitCarrier( scope, num_micro_batches, place, - inference_root_scope_vars); + inference_root_scope_vars, + micro_scope_list); } void FleetExecutor::InitMessageBus() { diff --git a/paddle/fluid/distributed/fleet_executor/fleet_executor.h b/paddle/fluid/distributed/fleet_executor/fleet_executor.h index f633dbbc3600f..e8123bea1e19f 100644 --- a/paddle/fluid/distributed/fleet_executor/fleet_executor.h +++ b/paddle/fluid/distributed/fleet_executor/fleet_executor.h @@ -18,6 +18,7 @@ #include "paddle/fluid/distributed/fleet_executor/carrier.h" #include "paddle/fluid/distributed/fleet_executor/fleet_executor_desc.pb.h" +#include "paddle/fluid/framework/variable.h" #include "paddle/fluid/platform/macros.h" #include "paddle/fluid/platform/place.h" @@ -45,7 +46,8 @@ class FleetExecutor final { int64_t num_micro_batches, const std::vector& task_nodes, const std::unordered_map& task_id_to_rank, - const std::vector& inference_root_scope_vars = {}); + const std::vector& inference_root_scope_vars = {}, + const std::vector& micro_scope_list = {}); void Run(const std::string& carrier_id); private: @@ -57,7 +59,8 @@ class FleetExecutor final { const platform::Place& place, int64_t num_micro_batches, const framework::ProgramDesc& program_desc, - const std::vector& inference_root_scope_vars = {}); + const std::vector& inference_root_scope_vars = {}, + const std::vector& micro_scope_list = {}); FleetExecutorDesc exe_desc_; std::shared_ptr runtime_graph_; std::unordered_set carrier_ids_; diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index 2822a87a02172..da9d12802434f 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -2464,6 +2464,7 @@ def _prepare_fleet_executor_carrier( program=None, scope=None, fleet_opt=None, + micro_scope_list=[], with_standalone_executor=False, ): num_micro_batches = ( @@ -2532,6 +2533,7 @@ def _prepare_fleet_executor_carrier( fleet_opt['task_id_to_rank'] = task_id_to_rank place = core.Place() place.set_place(self.place) + # NOTE: the last argument is used to force create some vars in root scope, # won't be used during train. self._fleet_executor.init( @@ -2543,6 +2545,7 @@ def _prepare_fleet_executor_carrier( tasks, task_id_to_rank, [], + micro_scope_list, ) def _run_using_fleet_executor( @@ -2624,11 +2627,20 @@ def _run_using_fleet_executor( ) fetch_task.set_program(fetch_program) + micro_scope_list = [] + if ( + "inference_generation" in fleet_opt + and fleet_opt["inference_generation"] + ): + for i in range(int(fleet_opt["num_micro_batches"])): + micro_scope_list.append(cached_scope.new_scope()) + self._prepare_fleet_executor_carrier( cache_key, program=cached_program, scope=cached_scope, fleet_opt=fleet_opt, + micro_scope_list=micro_scope_list, with_standalone_executor=with_standalone_executor, ) @@ -2653,6 +2665,18 @@ def _run_using_fleet_executor( self._fleet_executor.run(cache_key) + if "fetch_var" in fleet_opt: + # If we speed up the generation in evaluation, we need to generate + # multiple queries at the same time. Each query will in separate scope in order + # not mix up. It indicate that final result will in multiple scopes and need to + # fetch each. + result_list = [] + for scope in micro_scope_list: + for var in fleet_opt["fetch_var"]: + tensor = core.get_variable_tensor(scope, var) + result_list.append(as_numpy(tensor)) + return result_list + if fetch_list: arr = cached_scope.find_var(fetch_var_name).get_fetch_list() tensors = arr._move_to_list() diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py index d3a57898a0dce..1ca8c869a96bd 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py @@ -14,6 +14,8 @@ import unittest +import numpy as np + import paddle import paddle.fluid.core as core from paddle.distributed.fleet.fleet_executor_utils import TaskNode @@ -21,13 +23,26 @@ paddle.enable_static() -def cond(i, ten): +def cond(i, ten, data): return i < ten -def body(i, ten): +def body(i, ten, data): i = i + 1 - return [i, ten] + data = data + 1 + return [i, ten, data] + + +num_micro_batches = 3 + + +def batch_generator_creator(): + def __reader__(): + for i in range(num_micro_batches): + data = np.full(shape=[1, 1], fill_value=i, dtype=np.float32) + yield data + + return __reader__ class TestFleetExecutor(unittest.TestCase): @@ -41,7 +56,16 @@ def test_cond_interceptor(self): ten = paddle.full( shape=[1], fill_value=10, dtype='int64' ) # loop length - i, ten = paddle.static.nn.while_loop(cond, body, [i, ten]) + data = paddle.static.data(name='x', shape=[1]) + + loader = paddle.fluid.io.DataLoader.from_generator( + feed_list=[data], capacity=num_micro_batches * 4, iterable=False + ) + loader.set_batch_generator( + batch_generator_creator(), paddle.CUDAPlace(0) + ) + + paddle.static.nn.while_loop(cond, body, [i, ten, data]) program_a = paddle.static.Program() program_b = paddle.static.Program() @@ -49,18 +73,27 @@ def test_cond_interceptor(self): for var_name in main_program.block(0).vars: if var_name != "_generated_var_0": var = main_program.block(0).var(var_name) - program_a.block(0).create_var( - name=var_name, - shape=var.shape, - dtype=var.dtype, - stop_gradient=var.stop_gradient, - ) - program_b.block(0).create_var( - name=var_name, - shape=var.shape, - dtype=var.dtype, - stop_gradient=var.stop_gradient, - ) + if ( + var_name == "create_py_reader_0" + or var_name == "double_buffer_0" + ): + program_a.block(0).create_var( + name=var_name, + persistable=var.persistable, + ) + else: + program_a.block(0).create_var( + name=var_name, + shape=var.shape, + dtype=var.dtype, + stop_gradient=var.stop_gradient, + ) + program_b.block(0).create_var( + name=var_name, + shape=var.shape, + dtype=var.dtype, + stop_gradient=var.stop_gradient, + ) for op in main_program.block(0).ops: if op.type != "while": @@ -89,7 +122,6 @@ def test_cond_interceptor(self): ) cond_var_name = "tmp_0" - num_micro_batches = 3 task_a = TaskNode( 0, @@ -159,12 +191,19 @@ def test_cond_interceptor(self): task_e.task_id(): 0, }, 'num_micro_batches': num_micro_batches, + 'inference_generation': True, + 'fetch_var': ['x'], }, } - place = paddle.fluid.CUDAPlace(0) - exe = paddle.fluid.Executor(place) - exe.run(main_program) + place = paddle.CUDAPlace(0) + exe = paddle.static.Executor(place) + loader.start() + res = exe.run(main_program) + ref_res = np.full([1], 10, dtype="float32") + for data in res: + np.testing.assert_allclose(data, ref_res, rtol=1e-05) + ref_res = ref_res + 1 if __name__ == "__main__": From a34d85d9be073648c4e34ff8aba507203d09c2ee Mon Sep 17 00:00:00 2001 From: mjxs <52824616+kk-2000@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:31:30 +0800 Subject: [PATCH 04/89] np.unicode_ => np.str_ (#49975) --- .../fluid/tests/unittests/test_egr_string_tensor_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py b/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py index 3032dc5810dd6..0c2ad7517edda 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_string_tensor_api.py @@ -41,7 +41,7 @@ def test_constructor_with_args(self): self.assertEqual(ST2.name, "ST2") self.assertEqual(ST2.shape, shape) np.testing.assert_array_equal( - ST2.numpy(), np.empty(shape, dtype=np.unicode_) + ST2.numpy(), np.empty(shape, dtype=np.str_) ) ST3 = core.eager.StringTensor(self.str_arr, "ST3") # constructor 3 @@ -74,7 +74,7 @@ def test_constructor_with_kwargs(self): self.assertEqual(ST1.name, "ST1") self.assertEqual(ST1.shape, shape) np.testing.assert_array_equal( - ST1.numpy(), np.empty(shape, dtype=np.unicode_) + ST1.numpy(), np.empty(shape, dtype=np.str_) ) ST2 = core.eager.StringTensor(self.str_arr, name="ST2") # constructor 3 From e7deae2129583d5eb19d975a07904a6bfce7026f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:33:39 +0800 Subject: [PATCH 05/89] modify np.int with np.int64 (#49967) * change int -> int64 * Update python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_one_hot.py --- .../tests/unittests/ir/inference/test_trt_convert_one_hot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_one_hot.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_one_hot.py index 53574a3fd27dc..30446265a431d 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_one_hot.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_one_hot.py @@ -59,7 +59,7 @@ def generate_depth(dims, batch): }, "op_outputs": {"Out": ["output_data"]}, "op_attrs": dics[0], - "outputs_dtype": {"output_data": np.int64}, + "outputs_dtype": {"output_data": np.int_}, }, ] ops = self.generate_op_config(ops_config) From 3586e856c581f8e1ee1d924152a037357e3ccfb8 Mon Sep 17 00:00:00 2001 From: Yiqun Liu Date: Tue, 31 Jan 2023 10:35:17 +0800 Subject: [PATCH 06/89] Unify the gpu implementation of stack and unstack to reuse the optimization. (#49748) * Unify the gpu implementation of stack and unstack to reuse the optimization. * Optimize the cuda implementation of unstack. * Use GpuMemcpyAsync instead of memory::Copy. * Fix error of calculating the index. * Use FastDivMod to further imporve the performance of unstack. --- paddle/phi/kernels/funcs/segmented_array.h | 13 +- paddle/phi/kernels/funcs/stack_and_unstack.h | 276 ++++++++++++++++++ paddle/phi/kernels/gpu/stack_grad_kernel.cu | 153 +--------- paddle/phi/kernels/gpu/stack_kernel.cu | 78 +---- paddle/phi/kernels/gpu/unstack_grad_kernel.cu | 17 +- paddle/phi/kernels/gpu/unstack_kernel.cu | 31 +- paddle/phi/kernels/stack_grad_kernel.h | 2 +- paddle/phi/kernels/unstack_grad_kernel.h | 2 +- 8 files changed, 338 insertions(+), 234 deletions(-) create mode 100644 paddle/phi/kernels/funcs/stack_and_unstack.h diff --git a/paddle/phi/kernels/funcs/segmented_array.h b/paddle/phi/kernels/funcs/segmented_array.h index 0f03dbac591ec..aa03eb4e9fcd2 100644 --- a/paddle/phi/kernels/funcs/segmented_array.h +++ b/paddle/phi/kernels/funcs/segmented_array.h @@ -14,7 +14,7 @@ #pragma once -#include "paddle/phi/kernels/funcs/fast_divmod.h" +#include "paddle/phi/core/dense_tensor.h" namespace phi { namespace funcs { @@ -89,12 +89,11 @@ struct ArraySetterBase { ctx.GetPlace(), num_bytes, phi::Stream(reinterpret_cast(ctx.stream()))); - paddle::memory::Copy(ctx.GetPlace(), - allocation->ptr(), - phi::CPUPlace(), - src, - num_bytes, - ctx.stream()); + phi::backends::gpu::GpuMemcpyAsync(allocation->ptr(), + src, + num_bytes, + phi::gpuMemcpyHostToDevice, + ctx.stream()); return allocation->ptr(); } diff --git a/paddle/phi/kernels/funcs/stack_and_unstack.h b/paddle/phi/kernels/funcs/stack_and_unstack.h new file mode 100644 index 0000000000000..c516d4892bf62 --- /dev/null +++ b/paddle/phi/kernels/funcs/stack_and_unstack.h @@ -0,0 +1,276 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/kernels/funcs/fast_divmod.h" +#include "paddle/phi/kernels/funcs/segmented_array.h" + +namespace phi { +namespace funcs { + +template +__global__ void StackCudaKernel(ArrayT array, + GeneralDivMod divmoder, + IndexT split_size, + IndexT rows, + IndexT cols, + T* __restrict__ output) { + IndexT grid_x = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; + IndexT grid_x_stride = static_cast(blockDim.x) * gridDim.x; + IndexT grid_y_stride = static_cast(blockDim.y) * gridDim.y; + + for (; grid_x < cols; grid_x += grid_x_stride) { + IndexT grid_y = static_cast(blockIdx.y) * blockDim.y + threadIdx.y; + + auto divmod_rslt = divmoder.div_mod(grid_x); + IndexT split = divmod_rslt[0]; // grid_x / split_size + IndexT col_offset = divmod_rslt[1]; // grid_x % split_size + const T* input_ptr = array.data[split]; +#pragma unroll + for (; grid_y < rows; grid_y += grid_y_stride) { + output[grid_y * cols + grid_x] = + input_ptr[grid_y * split_size + col_offset]; + } + } +} + +template +void LaunchStackKernel(const Context& ctx, + const IndexT x_col, + const IndexT x_row, + const IndexT out_col, + const std::vector& x, + DenseTensor* out) { + T* out_ptr = ctx.template Alloc(out); + auto config = phi::backends::gpu::GetGpuLaunchConfig2D(ctx, out_col, x_row); + + ConstPointerArraySetter setter(ctx, x); + GeneralDivMod divmoder(x_col); + StackCudaKernel + <<>>( + setter.array, divmoder, x_col, x_row, out_col, out_ptr); +} + +template +void StackRawKernel(const Context& ctx, + const std::vector& x, + int axis, + DenseTensor* out) { + if (axis < 0) axis += (x[0]->dims().size() + 1); + int num = static_cast(x.size()); + + // Split x dim from axis to matrix of shape [x_row, x_col], and the output + // tensor's shape is [x_row, out_col]. + int64_t x_row = 1; + for (int i = 0; i < axis; ++i) { + x_row *= x[0]->dims()[i]; + } + int64_t x_col = x[0]->numel() / x_row; + int64_t out_col = x_col * num; + + if (out->numel() < std::numeric_limits::max()) { + switch (CalcArraySize(num)) { + SEGMENTED_ARRAY_KERNEL_HELPER( + LaunchStackKernel( + ctx, x_col, x_row, out_col, x, out)); + } + } else { + switch (CalcArraySize(num)) { + SEGMENTED_ARRAY_KERNEL_HELPER( + LaunchStackKernel( + ctx, x_col, x_row, out_col, x, out)); + } + } +} + +template +__global__ void UnStackCudaKernel(const T* __restrict__ input, + IndexT out_row, + IndexT split_dim, + IndexT out_col, + IndexT num_splits, + GeneralDivMod col_divmoder, + ArrayT array) { + assert(blockDim.y == 1); + assert(blockDim.z == 1); + // In this case they are equal + assert(split_dim % num_splits == 0); + + IndexT numel = out_row * split_dim * out_col; + IndexT each_dim_size = split_dim / num_splits; + IndexT split_dim_with_out_col = split_dim * out_col; + + IndexT offset = blockIdx.x * blockDim.x + threadIdx.x; + if (each_dim_size == 1) { + for (; offset < numel; offset += blockDim.x * gridDim.x) { + auto col_divmod_rslt = col_divmoder.div_mod(offset); + + IndexT i = offset / split_dim_with_out_col; + IndexT j = col_divmod_rslt[0] - i * split_dim; + IndexT k = col_divmod_rslt[1]; // offset % out_col + + T* output = array.data[j]; + if (output) { + IndexT output_idx = i * out_col + k; + *(output + output_idx) = input[offset]; + } + } + } else { + for (; offset < numel; offset += blockDim.x * gridDim.x) { + auto col_divmod_rslt = col_divmoder.div_mod(offset); + + IndexT i = offset / split_dim_with_out_col; + IndexT j = col_divmod_rslt[0] - i * split_dim; + IndexT k = col_divmod_rslt[1]; // offset % out_col + + T* output = array.data[j / each_dim_size]; + if (output) { + IndexT output_idx = (i + j % each_dim_size) * out_col + k; + *(output + output_idx) = input[offset]; + } + } + } +} + +template +__global__ void UnStackCudaKernelForLastDim(const T* __restrict__ in_data, + const IndexT cols, + const IndexT rows, + const IndexT tile_x_num, + ArrayT array) { + constexpr int buffer_size = 512; + __shared__ T s_buf[buffer_size]; + + for (IndexT tile_x = blockIdx.x; tile_x < tile_x_num; tile_x += gridDim.x) { + IndexT row_idx = tile_x * blockDim.x + threadIdx.x; + IndexT col_idx = blockIdx.y * blockDim.y + threadIdx.y; + int s_idx = threadIdx.y * blockDim.x + threadIdx.x; + bool is_valid = (col_idx < cols && row_idx < rows); + + if (is_valid) { + T data = in_data[row_idx * cols + col_idx]; + s_buf[s_idx] = data; + } + __syncthreads(); + if (is_valid) { + if (array.data[col_idx]) { + array.data[col_idx][row_idx] = s_buf[s_idx]; + } + } + } +} + +template +void LaunchUnStackKernel(const Context& ctx, + const IndexT out_row, + const IndexT split_dim, + const IndexT out_col, + const IndexT num_splits, + const DenseTensor& x, + std::vector* outs) { + // each tensor in outs should have same shape. + VLOG(6) << "out_row=" << out_row << ", split_dim=" << split_dim + << ", out_col=" << out_col << ", num_splits=" << num_splits; + + auto x_ptr = x.data(); + PointerArraySetter setter(ctx, outs); + + if (out_col == 1) { + // For the case axis == (x.dims().size() - 1) + constexpr int kThreads = 512; + constexpr int kWarpSize = 32; + constexpr int kMaxOut = 16; + + int tid_x = 0, tid_y = 0, bid_x = 0, bid_y = 1; + if (split_dim < kMaxOut) { + tid_y = split_dim; + tid_x = + std::min(backends::gpu::RoundToNextHighPowOfTwo(out_row, kWarpSize), + kThreads / backends::gpu::RoundToNextHighPowOfTwo(tid_y)); + } else { + tid_y = kMaxOut; + tid_x = kWarpSize; + bid_y = backends::gpu::DivUp(split_dim, kMaxOut); + } + int tile_x_num = backends::gpu::DivUp(out_row, tid_x); + bid_x = std::min(tile_x_num, backends::gpu::kMultiDimslimit); + dim3 blocks(tid_x, tid_y, 1); + dim3 grids(bid_x, bid_y, 1); + + UnStackCudaKernelForLastDim + <<>>( + x_ptr, split_dim, out_row, tile_x_num, setter.array); + } else { + GeneralDivMod col_divmoder(out_col); + auto config = phi::backends::gpu::GetGpuLaunchConfig1D( + ctx, out_row * split_dim * out_col); + + UnStackCudaKernel + <<>>(x_ptr, + out_row, + split_dim, + out_col, + num_splits, + col_divmoder, + setter.array); + } +} + +template +void UnStackRawKernel(const Context& ctx, + const DenseTensor& x, + int axis, + std::vector* outs) { + auto x_dims = x.dims(); + + // Input tensor is splited to split_dim tensors along split_dim dimension. + int64_t split_dim = x_dims[axis]; + + // Treat outs[i] as [out_row, out_col], and x as [out_row, split_dim, + // out_col]. + int64_t out_row = 1; + for (int i = 0; i < axis; ++i) { + out_row *= x_dims[i]; + } + + int64_t out_col = x.numel() / (split_dim * out_row); + + if (x.numel() < std::numeric_limits::max()) { + switch (CalcArraySize(split_dim)) { + SEGMENTED_ARRAY_KERNEL_HELPER( + LaunchUnStackKernel( + ctx, out_row, split_dim, out_col, split_dim, x, outs)); + } + } else { + switch (CalcArraySize(split_dim)) { + SEGMENTED_ARRAY_KERNEL_HELPER( + LaunchUnStackKernel( + ctx, out_row, split_dim, out_col, split_dim, x, outs)); + } + } +} + +} // namespace funcs +} // namespace phi diff --git a/paddle/phi/kernels/gpu/stack_grad_kernel.cu b/paddle/phi/kernels/gpu/stack_grad_kernel.cu index 572ed4a361b4e..6c72a3562e6a7 100644 --- a/paddle/phi/kernels/gpu/stack_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/stack_grad_kernel.cu @@ -13,125 +13,13 @@ // limitations under the License. #include "paddle/phi/kernels/stack_grad_kernel.h" -#include "paddle/fluid/memory/memory.h" -#include "paddle/phi/backends/gpu/gpu_launch_config.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/segmented_array.h" +#include "paddle/phi/kernels/funcs/stack_and_unstack.h" namespace phi { -template -__global__ void UnStackCudaKernel(const T* __restrict__ input, - IndexT pre_dim_size, - IndexT split_dim_size, - IndexT suf_dim_size, - IndexT num_split, - ArrayT array) { - assert(blockDim.y == 1); - assert(blockDim.z == 1); - // In this case they are equal - assert(split_dim_size % num_split == 0); - - IndexT size = pre_dim_size * split_dim_size * suf_dim_size; - IndexT each_dim_size = split_dim_size / num_split; - - for (IndexT offset = blockIdx.x * blockDim.x + threadIdx.x; offset < size; - offset += blockDim.x * gridDim.x) { - IndexT i = offset / (split_dim_size * suf_dim_size); - IndexT j = (offset % (split_dim_size * suf_dim_size)) / suf_dim_size; - IndexT k = offset % suf_dim_size; - - T* output = array.data[j / each_dim_size]; - if (output == nullptr) { - return; - } - IndexT output_ind = i * each_dim_size * suf_dim_size + - (j % each_dim_size) * suf_dim_size + k; - *(output + output_ind) = input[offset]; - } -} - -template -__global__ void UnStackCudaKernelForLastDim(const T* __restrict__ in_data, - const IndexT cols, - const IndexT rows, - const IndexT tile_x_num, - ArrayT array) { - constexpr int buffer_size = 512; - __shared__ T s_buf[buffer_size]; - - for (IndexT tile_x = blockIdx.x; tile_x < tile_x_num; tile_x += gridDim.x) { - IndexT row_idx = tile_x * blockDim.x + threadIdx.x; - IndexT col_idx = blockIdx.y * blockDim.y + threadIdx.y; - int s_idx = threadIdx.y * blockDim.x + threadIdx.x; - bool is_valid = (col_idx < cols && row_idx < rows); - - if (is_valid) { - T data = in_data[row_idx * cols + col_idx]; - s_buf[s_idx] = data; - } - __syncthreads(); - if (is_valid) { - if (array.data[col_idx]) { - array.data[col_idx][row_idx] = s_buf[s_idx]; - } - } - } -} - -template -void LaunchUnStackKernel(const Context& ctx, - const IndexT pre_dim, - const IndexT split_dim, - const IndexT suf_dim, - const IndexT num_splits, - const DenseTensor& out_grad, - std::vector* x_grad) { - // each x_grad should have same shape - auto dout_ptr = out_grad.data(); - funcs::PointerArraySetter setter(ctx, x_grad); - - if (suf_dim == 1) { - // For the case axis == (out_grad.dims().size() - 1) - constexpr int kThreads = 512; - constexpr int kWarpSize = 32; - constexpr int kMaxOut = 16; - - int tid_x = 0, tid_y = 0, bid_x = 0, bid_y = 1; - if (split_dim < kMaxOut) { - tid_y = split_dim; - tid_x = - std::min(backends::gpu::RoundToNextHighPowOfTwo(pre_dim, kWarpSize), - kThreads / backends::gpu::RoundToNextHighPowOfTwo(tid_y)); - } else { - tid_y = kMaxOut; - tid_x = kWarpSize; - bid_y = backends::gpu::DivUp(split_dim, kMaxOut); - } - int tile_x_num = backends::gpu::DivUp(pre_dim, tid_x); - bid_x = std::min(tile_x_num, backends::gpu::kMultiDimslimit); - dim3 blocks(tid_x, tid_y, 1); - dim3 grids(bid_x, bid_y, 1); - - UnStackCudaKernelForLastDim - <<>>( - dout_ptr, split_dim, pre_dim, tile_x_num, setter.array); - } else { - auto config = phi::backends::gpu::GetGpuLaunchConfig1D( - ctx, pre_dim * split_dim * suf_dim); - - UnStackCudaKernel - <<>>( - dout_ptr, pre_dim, split_dim, suf_dim, num_splits, setter.array); - } -} - template void StackGradKernel(const Context& ctx, const DenseTensor& out_grad, @@ -144,41 +32,12 @@ void StackGradKernel(const Context& ctx, split_dim, x_grad.size(), phi::errors::InvalidArgument( - "Output x_grad size should be equal to the split_dim, but" - " received split_dim is:%d x_grad size is:%d.", + "Output x_grad's size should be equal to the split_dim, but" + " received split_dim is:%d x_grad's size is:%d.", split_dim, x_grad.size())); - auto dout_dims = out_grad.dims(); - int64_t dout_pre = 1; - for (int i = 0; i < axis; ++i) { - dout_pre *= dout_dims[i]; - } - int64_t dout_suf = out_grad.numel() / (split_dim * dout_pre); - - if (out_grad.numel() < std::numeric_limits::max()) { - switch (funcs::CalcArraySize(split_dim)) { - SEGMENTED_ARRAY_KERNEL_HELPER( - LaunchUnStackKernel(ctx, - dout_pre, - split_dim, - dout_suf, - split_dim, - out_grad, - &x_grad)); - } - } else { - switch (funcs::CalcArraySize(split_dim)) { - SEGMENTED_ARRAY_KERNEL_HELPER( - LaunchUnStackKernel(ctx, - dout_pre, - split_dim, - dout_suf, - split_dim, - out_grad, - &x_grad)); - } - } + funcs::UnStackRawKernel(ctx, out_grad, axis, &x_grad); } } // namespace phi diff --git a/paddle/phi/kernels/gpu/stack_kernel.cu b/paddle/phi/kernels/gpu/stack_kernel.cu index a50396e7c9729..e1d7d4e6f389c 100644 --- a/paddle/phi/kernels/gpu/stack_kernel.cu +++ b/paddle/phi/kernels/gpu/stack_kernel.cu @@ -13,89 +13,19 @@ // limitations under the License. #include "paddle/phi/kernels/stack_kernel.h" -#include "paddle/fluid/memory/memory.h" -#include "paddle/phi/backends/gpu/gpu_launch_config.h" -#include "paddle/phi/core/dense_tensor.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/funcs/segmented_array.h" +#include "paddle/phi/kernels/funcs/stack_and_unstack.h" namespace phi { -template -__global__ void StackCUDAKernel(ArrayT array, - funcs::GeneralDivMod divmoder, - IndexT split_size, - IndexT rows, - IndexT cols, - T* __restrict__ output) { - IndexT grid_x = static_cast(blockIdx.x) * blockDim.x + threadIdx.x; - IndexT grid_x_stride = static_cast(blockDim.x) * gridDim.x; - IndexT grid_y_stride = static_cast(blockDim.y) * gridDim.y; - - for (; grid_x < cols; grid_x += grid_x_stride) { - IndexT grid_y = static_cast(blockIdx.y) * blockDim.y + threadIdx.y; - - auto divmod_rslt = divmoder.div_mod(grid_x); - IndexT split = divmod_rslt[0]; // grid_x / split_size - IndexT col_offset = divmod_rslt[1]; // grid_x % split_size - const T* input_ptr = array.data[split]; -#pragma unroll - for (; grid_y < rows; grid_y += grid_y_stride) { - output[grid_y * cols + grid_x] = - input_ptr[grid_y * split_size + col_offset]; - } - } -} - -template -void LaunchStackKernel(const Context& ctx, - const IndexT x_col, - const IndexT x_row, - const IndexT out_col, - const std::vector& x, - DenseTensor* out) { - T* out_ptr = ctx.template Alloc(out); - auto config = phi::backends::gpu::GetGpuLaunchConfig2D(ctx, out_col, x_row); - - funcs::ConstPointerArraySetter setter(ctx, x); - funcs::GeneralDivMod divmoder(x_col); - StackCUDAKernel - <<>>( - setter.array, divmoder, x_col, x_row, out_col, out_ptr); -} - template void StackKernel(const Context& ctx, const std::vector& x, int axis, DenseTensor* out) { - if (axis < 0) axis += (x[0]->dims().size() + 1); - int num = static_cast(x.size()); - - // Split x dim from axis to matrix - int64_t x_row = 1; - for (int i = 0; i < axis; ++i) { - x_row *= x[0]->dims()[i]; - } - int64_t x_col = x[0]->numel() / x_row; - int64_t out_col = x_col * num; - - if (out->numel() < std::numeric_limits::max()) { - switch (funcs::CalcArraySize(num)) { - SEGMENTED_ARRAY_KERNEL_HELPER( - LaunchStackKernel( - ctx, x_col, x_row, out_col, x, out)); - } - } else { - switch (funcs::CalcArraySize(num)) { - SEGMENTED_ARRAY_KERNEL_HELPER( - LaunchStackKernel( - ctx, x_col, x_row, out_col, x, out)); - } - } + funcs::StackRawKernel(ctx, x, axis, out); } } // namespace phi diff --git a/paddle/phi/kernels/gpu/unstack_grad_kernel.cu b/paddle/phi/kernels/gpu/unstack_grad_kernel.cu index b7c349de0df32..88bf155606c1b 100644 --- a/paddle/phi/kernels/gpu/unstack_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/unstack_grad_kernel.cu @@ -16,7 +16,19 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/impl/unstack_grad_kernel_impl.h" +#include "paddle/phi/kernels/funcs/stack_and_unstack.h" + +namespace phi { + +template +void UnStackGradKernel(const Context& ctx, + const std::vector& out_grad, + int axis, + DenseTensor* x_grad) { + funcs::StackRawKernel(ctx, out_grad, axis, x_grad); +} + +} // namespace phi PD_REGISTER_KERNEL(unstack_grad, GPU, @@ -26,4 +38,5 @@ PD_REGISTER_KERNEL(unstack_grad, double, int64_t, int, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/unstack_kernel.cu b/paddle/phi/kernels/gpu/unstack_kernel.cu index f147f4c0f0edf..4331322bdc202 100644 --- a/paddle/phi/kernels/gpu/unstack_kernel.cu +++ b/paddle/phi/kernels/gpu/unstack_kernel.cu @@ -16,7 +16,33 @@ #include "paddle/phi/backends/gpu/gpu_context.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/impl/unstack_kernel_impl.h" +#include "paddle/phi/kernels/funcs/stack_and_unstack.h" + +namespace phi { + +template +void UnStackKernel(const Context& ctx, + const DenseTensor& x, + int axis, + int num, + std::vector outs) { + if (x.numel() == 0) return; + if (axis < 0) axis += x.dims().size(); + + int64_t split_dim = x.dims()[axis]; + PADDLE_ENFORCE_EQ( + split_dim, + outs.size(), + phi::errors::InvalidArgument( + "Output outs's size should be equal to the split_dim, but" + " received split_dim is:%d outs's size is:%d.", + split_dim, + outs.size())); + + funcs::UnStackRawKernel(ctx, x, axis, &outs); +} + +} // namespace phi PD_REGISTER_KERNEL(unstack, GPU, @@ -26,4 +52,5 @@ PD_REGISTER_KERNEL(unstack, double, int64_t, int, - phi::dtype::float16) {} + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/stack_grad_kernel.h b/paddle/phi/kernels/stack_grad_kernel.h index 32451e606f26a..1e8f2d68399f8 100644 --- a/paddle/phi/kernels/stack_grad_kernel.h +++ b/paddle/phi/kernels/stack_grad_kernel.h @@ -20,7 +20,7 @@ namespace phi { template void StackGradKernel(const Context& dev_ctx, - const DenseTensor& out, + const DenseTensor& out_grad, int axis, std::vector x_grad); diff --git a/paddle/phi/kernels/unstack_grad_kernel.h b/paddle/phi/kernels/unstack_grad_kernel.h index de0e3004d8038..cb50f5ec9240c 100644 --- a/paddle/phi/kernels/unstack_grad_kernel.h +++ b/paddle/phi/kernels/unstack_grad_kernel.h @@ -20,7 +20,7 @@ namespace phi { template void UnStackGradKernel(const Context& dev_ctx, - const std::vector& x, + const std::vector& out_grad, int axis, DenseTensor* x_grad); From 4976153dd53605e0be3fecc4fec393396ab181c2 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 31 Jan 2023 10:38:32 +0800 Subject: [PATCH 07/89] add dims check for nms_kernel (#49993) --- paddle/phi/kernels/cpu/nms_kernel.cc | 12 ++++++++++++ paddle/phi/kernels/gpu/nms_kernel.cu | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/paddle/phi/kernels/cpu/nms_kernel.cc b/paddle/phi/kernels/cpu/nms_kernel.cc index 4b56f6bb95105..6743f13fff719 100644 --- a/paddle/phi/kernels/cpu/nms_kernel.cc +++ b/paddle/phi/kernels/cpu/nms_kernel.cc @@ -69,6 +69,18 @@ void NMSKernel(const Context& dev_ctx, const DenseTensor& boxes, float threshold, DenseTensor* output) { + PADDLE_ENFORCE_EQ( + boxes.dims().size(), + 2, + phi::errors::InvalidArgument("The shape [%s] of boxes must be (N, 4).", + boxes.dims())); + + PADDLE_ENFORCE_EQ( + boxes.dims()[1], + 4, + phi::errors::InvalidArgument("The shape [%s] of boxes must be (N, 4).", + boxes.dims())); + int64_t num_boxes = boxes.dims()[0]; DenseTensor output_tmp; output_tmp.Resize(phi::make_ddim({num_boxes})); diff --git a/paddle/phi/kernels/gpu/nms_kernel.cu b/paddle/phi/kernels/gpu/nms_kernel.cu index 79b0b8dfb1825..81f5ca8d1619f 100644 --- a/paddle/phi/kernels/gpu/nms_kernel.cu +++ b/paddle/phi/kernels/gpu/nms_kernel.cu @@ -59,6 +59,18 @@ void NMSKernel(const Context& dev_ctx, const DenseTensor& boxes, float threshold, DenseTensor* output) { + PADDLE_ENFORCE_EQ( + boxes.dims().size(), + 2, + phi::errors::InvalidArgument("The shape [%s] of boxes must be (N, 4).", + boxes.dims())); + + PADDLE_ENFORCE_EQ( + boxes.dims()[1], + 4, + phi::errors::InvalidArgument("The shape [%s] of boxes must be (N, 4).", + boxes.dims())); + const int64_t num_boxes = boxes.dims()[0]; const auto blocks_per_line = CeilDivide(num_boxes, threadsPerBlock); dim3 block(threadsPerBlock); From baf96a123b09a4755a3b4c787efaf256bf1f4cb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:42:40 +0800 Subject: [PATCH 08/89] fix the div 0 error of pixel_shuffle (#49996) --- paddle/phi/infermeta/unary.cc | 4 ++++ python/paddle/fluid/tests/unittests/test_pixel_shuffle.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 55e895c6622a6..8cea16f770631 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -2533,6 +2533,10 @@ void PixelShuffleInferMeta(const MetaTensor& x, "Input should be a 4-D tensor of format [N, C, H, W] " "or [N, H, W, C], but got %u.", input_dims.size())); + PADDLE_ENFORCE_NE( + upscale_factor, + 0, + phi::errors::InvalidArgument("upscale_factor should not be 0.")); const bool channel_last = (data_format == "NHWC"); diff --git a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py index 196a4ddbd4005..9600f5a872c56 100644 --- a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py +++ b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py @@ -227,6 +227,13 @@ def error_upscale_factor(): self.assertRaises(TypeError, error_upscale_factor) + def error_0_upscale_factor(): + with paddle.fluid.dygraph.guard(): + x = paddle.uniform([1, 1, 1, 1], dtype='float64') + pixel_shuffle = F.pixel_shuffle(x, 0) + + self.assertRaises(ValueError, error_0_upscale_factor) + def error_data_format(): with paddle.fluid.dygraph.guard(): x = np.random.random([2, 9, 4, 4]).astype("float64") From 66682be0c0f0ca80f115f053977a971951e736d3 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 31 Jan 2023 10:43:40 +0800 Subject: [PATCH 09/89] =?UTF-8?q?Fix=20=E5=A0=86=E6=A0=88=E6=BA=A2?= =?UTF-8?q?=E5=87=BA=20(stack=20overflow)=20of=20case9:=20paddle.repeat=5F?= =?UTF-8?q?interleave=20(#49982)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * support negative index in repeat_interleave * add unittest --- paddle/phi/infermeta/unary.cc | 39 ++++++++++++------- .../unittests/test_repeat_interleave_op.py | 20 ++++++++++ 2 files changed, 46 insertions(+), 13 deletions(-) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 8cea16f770631..e08f1769bef48 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -3075,27 +3075,40 @@ void RepeatInterleaveInferMeta(const MetaTensor& x, MetaTensor* out) { const auto& input_dim = x.dims(); auto output_dim = phi::vectorize(input_dim); + auto n_dim = dim; - PADDLE_ENFORCE_EQ( - dim < input_dim.size() && dim >= (0 - input_dim.size()), - true, + if (n_dim < 0) n_dim += input_dim.size(); + + PADDLE_ENFORCE_LT( + dim, + input_dim.size(), phi::errors::OutOfRange( "Attr(dim) is out of range, It's expected " - "to be in range of [-%d, %d]. But received Attr(dim) = %d.", - input_dim.size(), + "to be in range of [%d, %d]. But received Attr(dim) = %d.", + -input_dim.size(), input_dim.size() - 1, dim)); - PADDLE_ENFORCE_EQ( - repeats > 0, - true, + PADDLE_ENFORCE_GE( + dim, + (0 - input_dim.size()), + phi::errors::OutOfRange( + "Attr(dim) is out of range, It's expected " + "to be in range of [%d, %d]. But received Attr(dim) = %d.", + -input_dim.size(), + input_dim.size() - 1, + dim)); + + PADDLE_ENFORCE_GT( + repeats, + 0, phi::errors::InvalidArgument("repeats should be larger than zero")); - PADDLE_ENFORCE_NE(out, - nullptr, - phi::errors::InvalidArgument( - "repeat_interleave's output tensor can't be nullptr")); + PADDLE_ENFORCE_NOT_NULL( + out, + phi::errors::InvalidArgument( + "repeat_interleave's output tensor can't be nullptr")); - output_dim[dim] = input_dim[dim] * repeats; + output_dim[n_dim] = input_dim[n_dim] * repeats; out->set_dims(phi::make_ddim(output_dim)); out->share_lod(x); out->set_dtype(x.dtype()); diff --git a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py index 90877a3047e2c..4b5272c5a4bdf 100644 --- a/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py +++ b/python/paddle/fluid/tests/unittests/test_repeat_interleave_op.py @@ -188,6 +188,26 @@ def test_repeat_interleave_api(self): expect_out = np.repeat(self.data_zero_dim_x, repeats) np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05) + # case 4 negative axis: + with program_guard(Program(), Program()): + x = paddle.static.data(name='x', shape=[-1, 4], dtype='float32') + x.desc.set_need_check_feed(False) + index = paddle.static.data( + name='repeats_', + shape=[4], + dtype='int32', + ) + index.desc.set_need_check_feed(False) + z = paddle.repeat_interleave(x, index, axis=-1) + exe = fluid.Executor(fluid.CPUPlace()) + (res,) = exe.run( + feed={'x': self.data_x, 'repeats_': self.data_index}, + fetch_list=[z.name], + return_numpy=False, + ) + expect_out = np.repeat(self.data_x, self.data_index, axis=-1) + np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05) + def test_dygraph_api(self): self.input_data() # case axis none From fb74147c6aa3ae8a1256f8e84f46af3632190f44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:44:57 +0800 Subject: [PATCH 10/89] Fix the div 0 error of matrix_power (#49942) * add zero size check in matrix_power_kernel_impl.h * add zero size check in matrix_power_kernel_impl.h * add zero size check in unittest * bug_fix * bug_fix * bug_fix * bug_fix * bug_fix * bug fix * bug_fix * bug_fix * add static check * delete the dy codes --- paddle/phi/infermeta/unary.cc | 5 +++++ python/paddle/fluid/tests/unittests/test_matrix_power_op.py | 4 ++++ 2 files changed, 9 insertions(+) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index e08f1769bef48..8a3c33a4d6c72 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -1889,6 +1889,11 @@ void MatrixPowerInferMeta(const MetaTensor& x, int n, MetaTensor* out) { "The Input(X) should have at least 2 dimensions. But " "received a %d dimension tensor.", n_dim)); + for (int i = 0; i < n_dim; ++i) + PADDLE_ENFORCE_NE( + dims[i], + 0, + phi::errors::InvalidArgument("The size of Input(X) should not be 0.")); PADDLE_ENFORCE_EQ(dims[n_dim - 2], dims[n_dim - 1], phi::errors::InvalidArgument( diff --git a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py index 29f82b0350d65..7f26a7170191f 100644 --- a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py +++ b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py @@ -312,6 +312,10 @@ def test_errors(self): input = fluid.data(name="input_3", shape=[4, 5], dtype="float32") self.assertRaises(ValueError, paddle.linalg.matrix_power, input, 2) + # The size of input should not be 0 + input = fluid.data(name="input_4", shape=[1, 1, 0, 0], dtype="float32") + self.assertRaises(ValueError, paddle.linalg.matrix_power, input, 2) + class TestMatrixPowerSingularAPI(unittest.TestCase): def setUp(self): From 7bb67db3d53297df8cce4b30992bb1035ba3bf62 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 10:48:58 +0800 Subject: [PATCH 11/89] fix the div 0 errors in psroi_pool (#49965) * fix the div 0 errors in psroi_pool * fix case 7 * rool back sth. --- .../fluid/tests/unittests/test_psroi_pool_op.py | 16 ++++++++++++++++ python/paddle/vision/ops.py | 2 ++ 2 files changed, 18 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py index 40f3c52d4fc03..c33d218cd8621 100644 --- a/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py +++ b/python/paddle/fluid/tests/unittests/test_psroi_pool_op.py @@ -339,6 +339,22 @@ def test_channel_error(): self.assertRaises(ValueError, test_channel_error) +class TestPSROIPoolZeroDivError(unittest.TestCase): + def setUp(self): + paddle.disable_static() + self.x = paddle.uniform([2, 490, 28, 28], dtype='float32') + self.boxes = paddle.to_tensor( + [[1, 5, 8, 10], [4, 2, 6, 7], [12, 12, 19, 21]], dtype='float32' + ) + self.boxes_num = paddle.to_tensor([1, 2], dtype='int32') + + def test_errors(self): + def test_zero_div_error(): + paddle.vision.ops.psroi_pool(self.x, self.boxes, self.boxes_num, 0) + + self.assertRaises(ValueError, test_zero_div_error) + + class TestPSROIPoolStaticAPI(unittest.TestCase): def setUp(self): paddle.enable_static() diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 0696b5f7cc6a1..0d43bd0fc54ce 100755 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -1424,6 +1424,8 @@ def psroi_pool(x, boxes, boxes_num, output_size, spatial_scale=1.0, name=None): output_size = (output_size, output_size) pooled_height, pooled_width = output_size assert len(x.shape) == 4, "Input features with shape should be (N, C, H, W)" + if pooled_height * pooled_width == 0: + raise ValueError('output_size should not contain 0.') output_channels = int(x.shape[1] / (pooled_height * pooled_width)) if in_dygraph_mode(): return _C_ops.psroi_pool( From 82edc65ba2e533c25cf6cd34117f43268043ba44 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 31 Jan 2023 10:50:16 +0800 Subject: [PATCH 12/89] =?UTF-8?q?Fix=20=E7=A9=BA=E6=8C=87=E9=92=88=20(Null?= =?UTF-8?q?=20pointer)=20of=20case=2014=20paddle.atan2=20(#49973)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add elements count check in atan2 * add unittest and pre-check in inferMeta * add dimension check --- paddle/phi/infermeta/binary.cc | 20 +++++++++++++++++++ paddle/phi/kernels/impl/atan2_kernel_impl.h | 8 ++++++++ .../fluid/tests/unittests/test_atan2_op.py | 12 +++++++++++ 3 files changed, 40 insertions(+) diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index 561938adca80a..3ca56e0602c1d 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -142,6 +142,26 @@ void KLDivInferMeta(const MetaTensor& x, } void Atan2InferMeta(const MetaTensor& x, const MetaTensor& y, MetaTensor* out) { + auto x_dims = x.dims(); + auto y_dims = y.dims(); + + PADDLE_ENFORCE_EQ( + x_dims.size(), + y_dims.size(), + phi::errors::InvalidArgument("The rank (%d) of X shall be same as " + "rank (%d) of Y.", + x_dims.size(), + y_dims.size())); + + if (x_dims.size() > 0) + PADDLE_ENFORCE_LE(x_dims[0], + y_dims[0], + phi::errors::InvalidArgument( + "The count (%d) of elements of X shall not " + "greater than count (%d) of elements of Y.", + x_dims[0], + y_dims[0])); + out->share_meta(x); if (x.dtype() == DataType::INT32 || x.dtype() == DataType::INT64 || y.dtype() == DataType::INT32 || y.dtype() == DataType::INT64) { diff --git a/paddle/phi/kernels/impl/atan2_kernel_impl.h b/paddle/phi/kernels/impl/atan2_kernel_impl.h index 2cae914e2f615..b7799a777046f 100644 --- a/paddle/phi/kernels/impl/atan2_kernel_impl.h +++ b/paddle/phi/kernels/impl/atan2_kernel_impl.h @@ -77,6 +77,14 @@ void Atan2Kernel(const Context& ctx, auto x_data = x.data(); auto y_data = y.data(); + PADDLE_ENFORCE_LE( + numel, + y.numel(), + phi::errors::InvalidArgument("The count (%d) of elements of X shall not " + "greater than count (%d) of elements of Y.", + numel, + y.numel())); + auto* out_data = ctx.template Alloc::type>( out, size_t(x.numel() * sizeof(typename Atan2Out::type))); diff --git a/python/paddle/fluid/tests/unittests/test_atan2_op.py b/python/paddle/fluid/tests/unittests/test_atan2_op.py index 77ad77e3252b8..6b62b25ac5d8a 100644 --- a/python/paddle/fluid/tests/unittests/test_atan2_op.py +++ b/python/paddle/fluid/tests/unittests/test_atan2_op.py @@ -130,6 +130,18 @@ def run(place): run(place) +class TestAtan2Error(unittest.TestCase): + def test_mismatch(self): + paddle.enable_static() + + def test_mismatch_numel(): + X = paddle.fluid.data('X', (1,), dtype=np.float64) + Y = paddle.fluid.data('Y', (0,), dtype=np.float64) + out = paddle.atan2(X, Y) + + self.assertRaises(ValueError, test_mismatch_numel) + + if __name__ == '__main__': paddle.enable_static() unittest.main() From dbfdefa750bb7f8148b6b59277310888df89447a Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 31 Jan 2023 10:51:23 +0800 Subject: [PATCH 13/89] =?UTF-8?q?Fix=20=E5=A0=86=E6=A0=88=E6=BA=A2?= =?UTF-8?q?=E5=87=BA=20(stack=20overflow)=20of=20case10:=20paddle.unique?= =?UTF-8?q?=20(#49981)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add axis check in UniqueRawInferMeta * add unittest for negative axis * simplify check for unique --- paddle/phi/infermeta/unary.cc | 9 +++++++ .../fluid/tests/unittests/test_unique.py | 26 +++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 8a3c33a4d6c72..3b3202c291725 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -4648,6 +4648,7 @@ void UniqueRawInferMeta(const MetaTensor& x, if (axis_value < 0) { axis_value += x.dims().size(); } + PADDLE_ENFORCE_LT( axis_value, x.dims().size(), @@ -4655,6 +4656,14 @@ void UniqueRawInferMeta(const MetaTensor& x, "the dimension size(%d) of x.", axis_value, x.dims().size())); + PADDLE_ENFORCE_GE( + axis_value, + 0, + phi::errors::InvalidArgument( + "The axis(%d) + rank(x) (%d) should be greater than or equal to 0.", + axis_value, + -x.dims().size())); + auto out_dims = x.dims(); out_dims[axis_value] = -1; out->set_dims(out_dims); diff --git a/python/paddle/fluid/tests/unittests/test_unique.py b/python/paddle/fluid/tests/unittests/test_unique.py index 9183c1bd676bb..b3ae10a6c335e 100644 --- a/python/paddle/fluid/tests/unittests/test_unique.py +++ b/python/paddle/fluid/tests/unittests/test_unique.py @@ -190,6 +190,32 @@ def init_config(self): } +class TestUniqueOpAxisNeg(TestUniqueOp): + def init_config(self): + self.inputs = {'X': np.random.random((6, 1, 8)).astype('float64')} + unique, indices, inverse, counts = np.unique( + self.inputs['X'], + return_index=True, + return_inverse=True, + return_counts=True, + axis=-1, + ) + self.attrs = { + 'dtype': int(core.VarDesc.VarType.INT32), + "return_index": True, + "return_inverse": True, + "return_counts": True, + "axis": [-1], + "is_sorted": True, + } + self.outputs = { + 'Out': unique, + 'Indices': indices, + "Index": inverse, + "Counts": counts, + } + + class TestUniqueOpAxis1(TestUniqueOp): def init_config(self): self.inputs = {'X': np.random.random((3, 8, 8)).astype('float64')} From 52672ea5eeee47e57d1389528c48e2aae3c159ae Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 31 Jan 2023 10:52:06 +0800 Subject: [PATCH 14/89] Fix Python IndexError of case17: paddle.nn.functional.interpolate (#49992) * add dimension check for interpolate * modify dimension check for interpolate * add unittest to size check for interpolate * fix incorrect shape check for interpolate * split size check and add unittests --- .../tests/unittests/test_bicubic_interp_op.py | 7 +++++++ .../unittests/test_bicubic_interp_v2_op.py | 17 +++++++++++++++++ python/paddle/nn/functional/common.py | 17 +++++++++++++++++ 3 files changed, 41 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py b/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py index b5d1a7d0dfd5c..6482a5fddf9a8 100644 --- a/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py +++ b/python/paddle/fluid/tests/unittests/test_bicubic_interp_op.py @@ -390,6 +390,12 @@ def test_input_shape(): x, size=[12, 12], mode='BICUBIC', align_corners=False ) + def test_size_shape(): + x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") + out = interpolate( + x, size=[12], mode='BICUBIC', align_corners=False + ) + def test_align_corcers(): x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") interpolate(x, size=[12, 12], mode='BICUBIC', align_corners=3) @@ -481,6 +487,7 @@ def test_outshape_and_scale(): self.assertRaises(ValueError, test_mode_type) self.assertRaises(ValueError, test_input_shape) + self.assertRaises(ValueError, test_size_shape) self.assertRaises(TypeError, test_align_corcers) self.assertRaises(ValueError, test_attr_data_format) self.assertRaises(TypeError, test_actual_shape) diff --git a/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py index fed25ad18d258..a52a5b3f36d13 100644 --- a/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_bicubic_interp_v2_op.py @@ -610,6 +610,20 @@ def test_size_type(): x, size={2, 2}, mode='bicubic', align_corners=False ) + def test_size_length(): + x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") + out = interpolate(x, size=[2], mode='bicubic', align_corners=False) + + def test_size_tensor_ndim(): + x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") + size = paddle.to_tensor(np.array([[2, 2]])) + out = interpolate(x, size=size, mode='bicubic', align_corners=False) + + def test_size_tensor_length(): + x = fluid.data(name="x", shape=[2, 3, 6, 6], dtype="float32") + size = paddle.to_tensor(np.array([2])) + out = interpolate(x, size=size, mode='bicubic', align_corners=False) + def test_input_shape_1(): x = fluid.data(name="x", shape=[2, 1, 0, 0], dtype="float32") out = interpolate( @@ -633,6 +647,9 @@ def test_input_shape_1(): self.assertRaises(ValueError, test_size_and_scale) self.assertRaises(ValueError, test_size_and_scale2) self.assertRaises(TypeError, test_size_type) + self.assertRaises(ValueError, test_size_length) + self.assertRaises(ValueError, test_size_tensor_ndim) + self.assertRaises(ValueError, test_size_tensor_length) self.assertRaises(ValueError, test_input_shape_1) def test_errors(self): diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 6631682d0e949..d9f5b0b160dc0 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -397,6 +397,23 @@ def interpolate( if size is None and scale_factor is None: raise ValueError("One of size and scale_factor must not be None.") + if (isinstance(size, list) or isinstance(size, tuple)) and len( + size + ) != x.ndim - 2: + raise ValueError( + 'The x and size should satisfy rank(x) - 2 == len(size).' + ) + + if isinstance(size, Variable): + if size.ndim != 1: + raise ValueError( + f"If size is a tensor, it's rank must be 1, but received {size.ndim}." + ) + if size.shape[0] != x.ndim - 2: + raise ValueError( + 'The x and size should satisfy rank(x) - 2 == size.shape[0].' + ) + if not isinstance(align_corners, bool): raise TypeError("Attr align_corners should be a bool value") From 1755a1549987601af10a3e6228bfbe41b796ff2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 11:00:05 +0800 Subject: [PATCH 15/89] fix div 0 error in conv1_transpose (#50000) --- paddle/phi/kernels/funcs/concat_and_split_functor.cc | 5 +++++ .../unittests/test_functional_conv1d_transpose.py | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cc b/paddle/phi/kernels/funcs/concat_and_split_functor.cc index aa73ba5f68990..fd61484eb8526 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.cc +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cc @@ -37,6 +37,11 @@ struct ConcatFunctor { } int64_t out_rows = rows, out_cols = 0; + PADDLE_ENFORCE_NE( + rows, + 0, + phi::errors::InvalidArgument("The input size should not be 0.")); + std::vector input_cols(input.size()); for (size_t i = 0; i < num; ++i) { int64_t t_cols = input[i].numel() / rows; diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py index 1d4e079f9f84a..865c848f8ba1d 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv1d_transpose.py @@ -82,5 +82,17 @@ def setUp(self): self.data_format = "NCL" +class TestFunctionalConv1DErrorCase3(TestFunctionalConv1DError): + def setUp(self): + self.input = np.random.randn(6, 0, 6) + self.filter = np.random.randn(6, 0, 0) + self.bias = None + self.padding = 0 + self.stride = 1 + self.dilation = 1 + self.groups = 1 + self.data_format = "NCL" + + if __name__ == "__main__": unittest.main() From 0d32f554c17a97aa534b4ff9901dcfa9a9c77f97 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 11:01:20 +0800 Subject: [PATCH 16/89] fix the indexerror of conv2d_transpose (#50005) --- .../fluid/tests/unittests/test_conv2d_transpose_op.py | 11 +++++++++++ python/paddle/static/nn/common.py | 3 +++ 2 files changed, 14 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py index afbce517f6243..89339303567f2 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_op.py @@ -989,6 +989,17 @@ def error_groups(): self.assertRaises(ValueError, error_groups) + def error_0_filter_number(): + out = paddle.static.nn.conv2d_transpose( + input=data, + groups=1, + num_filters=0, + filter_size=3, + data_format='NCHW', + ) + + self.assertRaises(ValueError, error_0_filter_number) + class TestConv2DTransposeRepr(unittest.TestCase): def test_case(self): diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 5da81feb3369d..3b40153cbb797 100644 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -1542,6 +1542,9 @@ def conv2d_transpose( "but received {}".format(len(input.shape)) ) + if num_filters == 0: + raise ValueError("num of filters should not be 0.") + if data_format not in ['NCHW', 'NHWC']: raise ValueError( "Attr(data_format) of Op(paddle.static.nn.layers.conv2d_transpose) got wrong value: received " From da11aa40efa0e6ef4bfbcd72c9e3f8f86c39cd06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=AD=A3=E6=B5=B7?= <65210872+ccsuzzh@users.noreply.github.com> Date: Tue, 31 Jan 2023 11:02:11 +0800 Subject: [PATCH 17/89] Fix Python IndexError of case13: paddle.static.nn.batch_norm (#50011) * add channel_num check for paddle.static.nn.batch_norm * fix bugs * fix bugs --- python/paddle/fluid/tests/unittests/test_batch_norm_op.py | 4 ++++ python/paddle/fluid/tests/unittests/test_fold_op.py | 2 +- python/paddle/static/nn/common.py | 6 ++++++ 3 files changed, 11 insertions(+), 1 deletion(-) diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py index c2a6c468e5c8f..02171db3fca75 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op.py @@ -768,6 +768,10 @@ def test_errors(self): ) self.assertRaises(TypeError, paddle.static.nn.batch_norm, x2) + # the first dimension of input for batch_norm must between [2d, 5d]. + x3 = paddle.static.data("", shape=[0], dtype="float32") + self.assertRaises(ValueError, paddle.static.nn.batch_norm, x3) + class TestDygraphBatchNormAPIError(unittest.TestCase): def test_errors(self): diff --git a/python/paddle/fluid/tests/unittests/test_fold_op.py b/python/paddle/fluid/tests/unittests/test_fold_op.py index 1f3193fa1fd49..a86161cc45023 100644 --- a/python/paddle/fluid/tests/unittests/test_fold_op.py +++ b/python/paddle/fluid/tests/unittests/test_fold_op.py @@ -179,7 +179,7 @@ def test_errors(self): with program_guard(Program(), Program()): def test_input_shape(): - # input_shpae must be 3-D + # input_shape must be 3-D x = paddle.randn(shape=[2, 3, 6, 7], dtype="float32") out = fold(x, output_sizes=[2, 3], kernel_sizes=[2, 2]) diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 3b40153cbb797..c43385a8e9140 100644 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -2731,6 +2731,12 @@ def batch_norm( dtype = core.VarDesc.VarType.FP32 input_shape = input.shape + if len(input.shape) < 2 or len(input.shape) > 5: + raise ValueError( + 'expected 2D or 3D or 4D or 5D input (got {}D input, input shape is: {})'.format( + len(input.shape), input_shape + ) + ) if data_layout == 'NCHW': channel_num = input_shape[1] else: From a48ef36002c9f52bb7b4b6f6c3426cc913433ce1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 11:04:07 +0800 Subject: [PATCH 18/89] fix the NullPointerError of median (#50017) --- python/paddle/fluid/tests/unittests/test_median.py | 1 + python/paddle/tensor/stat.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_median.py b/python/paddle/fluid/tests/unittests/test_median.py index a62e722dd0496..1f90faeac018b 100644 --- a/python/paddle/fluid/tests/unittests/test_median.py +++ b/python/paddle/fluid/tests/unittests/test_median.py @@ -86,6 +86,7 @@ def test_median_exception(self): x = paddle.arange(12).reshape([3, 4]) self.assertRaises(ValueError, paddle.median, x, 1.0) self.assertRaises(ValueError, paddle.median, x, 2) + self.assertRaises(ValueError, paddle.median, paddle.to_tensor([])) if __name__ == '__main__': diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index e23f28aa76b1b..cc94aee415541 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -406,6 +406,9 @@ def median(x, axis=None, keepdim=False, name=None): if not isinstance(x, Variable): raise TypeError("In median, the input x should be a Tensor.") + if x.size == 0: + raise ValueError("In median, the size of input x should not be 0.") + if len(x.shape) == 0: return x.clone() From 48b3e86956fd8b25f11be60e04df0b63df857d4c Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Tue, 31 Jan 2023 11:27:46 +0800 Subject: [PATCH 19/89] [Decouple phi] Decouple custom_op in fluid and phi (#49866) * decouple phi custom_op * decouple phi custom_op, remove codes * delete custom symbol of inference --- paddle/fluid/inference/paddle_inference.map | 3 --- .../inference/paddle_inference_custom_device.map | 3 --- paddle/phi/api/ext/op_meta_info.h | 10 ---------- paddle/phi/api/lib/op_meta_info.cc | 12 ------------ 4 files changed, 28 deletions(-) diff --git a/paddle/fluid/inference/paddle_inference.map b/paddle/fluid/inference/paddle_inference.map index acbdcf5d78358..3d2dc85cb8368 100644 --- a/paddle/fluid/inference/paddle_inference.map +++ b/paddle/fluid/inference/paddle_inference.map @@ -39,9 +39,6 @@ *paddle::GPUContextResource*; *paddle::CPUContextResource*; - *paddle::LoadCustomOperatorLib*; - *paddle::RegisterAllCustomOperator*; - /* ut needs the following symbol, we need to modify all the ut to hidden such symbols */ /* Another question: the ut size will grow from 50M to 80M, why? */ diff --git a/paddle/fluid/inference/paddle_inference_custom_device.map b/paddle/fluid/inference/paddle_inference_custom_device.map index 83de7d9a77566..7434678d8679a 100644 --- a/paddle/fluid/inference/paddle_inference_custom_device.map +++ b/paddle/fluid/inference/paddle_inference_custom_device.map @@ -39,9 +39,6 @@ *paddle::GPUContextResource*; *paddle::CPUContextResource*; - *paddle::LoadCustomOperatorLib*; - *paddle::RegisterAllCustomOperator*; - /* ut needs the following symbol, we need to modify all the ut to hidden such symbols */ /* Another question: the ut size will grow from 50M to 80M, why? */ diff --git a/paddle/phi/api/ext/op_meta_info.h b/paddle/phi/api/ext/op_meta_info.h index 7d2be9c90d79e..978599c3bc299 100644 --- a/paddle/phi/api/ext/op_meta_info.h +++ b/paddle/phi/api/ext/op_meta_info.h @@ -619,16 +619,6 @@ class PADDLE_API OpMetaInfoBuilder { size_t index_; }; -/////////////////////// Op register API ///////////////////////// - -// For inference: compile directly with framework -// Call after PD_BUILD_OP(...) -void RegisterAllCustomOperator(); - -// Using this api to load compiled custom operator's dynamic library and -// register Custom Operator into it -void LoadCustomOperatorLib(const std::string& dso_name); - /////////////////////// Op register Macro ///////////////////////// #define PD_BUILD_OP(op_name) \ diff --git a/paddle/phi/api/lib/op_meta_info.cc b/paddle/phi/api/lib/op_meta_info.cc index 8d64246bdb69f..a6b7921c30c61 100644 --- a/paddle/phi/api/lib/op_meta_info.cc +++ b/paddle/phi/api/lib/op_meta_info.cc @@ -19,7 +19,6 @@ limitations under the License. */ #include #include "glog/logging.h" -#include "paddle/fluid/framework/custom_operator.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/enforce.h" @@ -244,17 +243,6 @@ OpMetaInfoBuilder& OpMetaInfoBuilder::SetInferDtypeFn(InferDtypeFunc func) { info_ptr_->SetInferDtypeFn(std::forward(func)); return *this; } - -/////////////////////// Op register API ///////////////////////// - -void RegisterAllCustomOperator() { - auto& op_meta_info_map = OpMetaInfoMap::Instance(); - framework::RegisterOperatorWithMetaInfoMap(op_meta_info_map); -} - -void LoadCustomOperatorLib(const std::string& dso_name) { - paddle::framework::LoadOpMetaInfoAndRegisterOp(dso_name); -} } // namespace paddle #ifdef __cplusplus From eba7b584fed4a1f55eb9422b390f2d90b8b505e1 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Tue, 31 Jan 2023 12:02:38 +0800 Subject: [PATCH 20/89] change no_event GC to fast GC for xpu (#49871) --- .../garbage_collector/garbage_collector.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc index 73e6664f66f1e..8ff8b9528322f 100644 --- a/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc +++ b/paddle/fluid/framework/new_executor/garbage_collector/garbage_collector.cc @@ -60,7 +60,16 @@ CreateInterpreterCoreGarbageCollector( return std::unique_ptr( new InterpreterCoreEventGarbageCollector(vec_instruction)); } - } else if (platform::is_xpu_place(place) || platform::is_ipu_place(place)) { + } else if (platform::is_xpu_place(place)) { + // Because there is no multi-stream on XPU device, fast GC can + // be used. + // Previously, XPU used no_event GC. But `Wait` in no_event GC + // may cause GC delayed, causing no enough memory problem. + // TODO(pangyoki): Multi-stream allocator and multi-stream GC + // are needed to be adapted for XPU. + return std::unique_ptr( + new InterpreterCoreFastGarbageCollector()); + } else if (platform::is_ipu_place(place)) { return std::unique_ptr( new InterpreterCoreNoEventGarbageCollector()); } else { From 7122760a5506fa1844893c0c3e97a23376ff855b Mon Sep 17 00:00:00 2001 From: LoneRanger <836253168@qq.com> Date: Tue, 31 Jan 2023 12:06:19 +0800 Subject: [PATCH 21/89] [Fix deprecation warning in test] np.float => np.float64 (#49970) --- python/paddle/optimizer/lr.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 258e69978a2ec..07420be8915d3 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -1406,7 +1406,7 @@ def step(self, metrics, epoch=None): metrics, (int, float, numpy.float32, numpy.float64) ): raise TypeError( - "metrics must be 'int', 'float', 'np.float', 'numpy.ndarray' or 'paddle.Tensor', but receive {}".format( + "metrics must be 'int', 'float', 'np.float64', 'numpy.ndarray' or 'paddle.Tensor', but receive {}".format( type(metrics) ) ) From 754ab7050282606b861fa27291561c3de12af2fd Mon Sep 17 00:00:00 2001 From: TeFeng Chen Date: Tue, 31 Jan 2023 12:38:07 +0800 Subject: [PATCH 22/89] support inplaced variable in cinn_launch (#49912) * support inplaced variable in cinn_launch * fix error hint when compiling * fix inplaced output variable of the subgraph * skip CinnCompiler check * using existed definition * fix namespace reference error * modify error message * update cinn tage * fix namespace * skip enforce check * fix unittest attribute throw --- cmake/external/cinn.cmake | 4 +- .../framework/paddle2cinn/cinn_compiler.cc | 10 ++- .../operators/cinn/cinn_instruction_run_op.h | 3 +- .../operators/cinn/cinn_launch_context.cc | 82 ++++++++++--------- .../operators/cinn/cinn_launch_context.h | 11 ++- .../cinn/cinn_launch_context_test.cc | 2 + paddle/fluid/operators/cinn/test_helper.h | 2 + 7 files changed, 69 insertions(+), 45 deletions(-) diff --git a/cmake/external/cinn.cmake b/cmake/external/cinn.cmake index 742219b53f19e..3ec194a6bfb37 100644 --- a/cmake/external/cinn.cmake +++ b/cmake/external/cinn.cmake @@ -17,8 +17,8 @@ if(NOT WITH_CINN) endif() if(NOT CINN_GIT_TAG) - # 2023.01.12 commit - set(CINN_GIT_TAG 5d1ae0f4b8e3f7cd5b16dfc76d2161bf77e938ac) + # 2023.01.28 commit + set(CINN_GIT_TAG 1449890f7724babf2a343c6f8073bd28a7bbc683) endif() message(STATUS "CINN version: " ${CINN_GIT_TAG}) diff --git a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc index 899d4177271c6..c01624a554961 100644 --- a/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc +++ b/paddle/fluid/framework/paddle2cinn/cinn_compiler.cc @@ -26,6 +26,7 @@ #include "cinn/auto_schedule/tuning.h" #include "cinn/common/target.h" #include "cinn/common/type.h" +#include "cinn/frontend/op_mapper_registry.h" #include "cinn/frontend/optimize.h" #include "cinn/frontend/syntax.h" #include "cinn/hlir/framework/graph.h" @@ -54,6 +55,7 @@ namespace paddle2cinn { using ::cinn::auto_schedule::AutoTuner; using ::cinn::common::Target; using ::cinn::frontend::Optimize; +using ::cinn::frontend::paddle::InplaceOutSuffix; using ::cinn::hlir::framework::BuildScope; using ::cinn::hlir::framework::GraphCompiler; using inference::analysis::Dot; @@ -239,11 +241,17 @@ void CinnCompiler::CheckCompiledValid( const std::map &input_tensors, const CinnCompiledObject &compiled_obj) const { const auto &input_var_names = graph.Get>(kInputVars); + const auto &inplace_var_names = + graph.Get>(kInplaceVarNames); const auto &output_var_names = graph.Get>(kOutputVars); auto *launch_context = compiled_obj.launch_context.get(); // 1. check all of the output variables will be assigned by compiled program - for (auto &&var_name : output_var_names) { + for (auto var_name : output_var_names) { + // inplace variables are renamed with a specified suffix + if (inplace_var_names.count(var_name)) { + var_name += InplaceOutSuffix; + } PADDLE_ENFORCE_EQ(launch_context->IsVariableUsed(var_name), true, platform::errors::PreconditionNotMet( diff --git a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h b/paddle/fluid/operators/cinn/cinn_instruction_run_op.h index d0011eec0d68f..f9d0002883ae0 100644 --- a/paddle/fluid/operators/cinn/cinn_instruction_run_op.h +++ b/paddle/fluid/operators/cinn/cinn_instruction_run_op.h @@ -59,7 +59,8 @@ class CinnInstructionRunOpKernel : public framework::OpKernel { auto share_argument_buffer_fn = [launch_context, &ctx](const std::string& var_name) { cinn_buffer_t* buffer = launch_context->GetCinnBufferOfVar(var_name); - framework::Variable* var = ctx.scope().GetVar(var_name); + std::string revise_var_name = launch_context->RedirectVarName(var_name); + framework::Variable* var = ctx.scope().GetVar(revise_var_name); auto* tensor = var->template GetMutable(); buffer->memory = reinterpret_cast(tensor->mutable_data( ctx.GetPlace(), diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index 2aa8c1b8b89ba..af429e0f01e33 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -19,6 +19,7 @@ #include #include +#include "cinn/frontend/op_mapper_registry.h" #include "cinn/hlir/framework/graph_compiler.h" #include "cinn/hlir/framework/instruction.h" #include "cinn/hlir/framework/scope.h" @@ -50,6 +51,8 @@ using framework::ParallelExecutor; using framework::Scope; using CinnInstruction = ::cinn::hlir::framework::Instruction; using CinnRuntimeProgram = ::cinn::hlir::framework::Program; +using ::cinn::frontend::paddle::InplaceOutSuffix; +using framework::paddle2cinn::kInplaceVarNames; using framework::paddle2cinn::kMemOptVarInfoFromMainGraph; using framework::paddle2cinn::kSkipGcVarNames; using framework::paddle2cinn::Name2VarInfoMap; @@ -72,6 +75,8 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph, graph.Get>(framework::paddle2cinn::kInputVars); const auto& output_var_names = graph.Get>(framework::paddle2cinn::kOutputVars); + inplace_var_names_ = + graph.Get>(kInplaceVarNames); internal_var_names_ = ExtractInternalVarNames(input_var_names, output_var_names); // initialize all execution arguments @@ -83,7 +88,13 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph, } } for (auto&& var_name : output_var_names) { - AssignExternalVariable(var_name); + if (inplace_var_names_.count(var_name)) { + VLOG(4) << "Inplaced variable:" << var_name << " -> " + << var_name + InplaceOutSuffix << " as paddle2cinn varmap key"; + AssignExternalVariable(var_name + InplaceOutSuffix); + } else { + AssignExternalVariable(var_name); + } } for (auto&& var_name : internal_var_names_) { AssignInternalVariable(var_name); @@ -124,14 +135,13 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph, "Distribution of variables in the graph compiled:" "input[%lu],internal[%lu],output[%lu]," "outer_eager_deletion[%lu],skip_eager_deletion[%lu]," - "skip_gc_vars_[%lu],initialized_beforehand[%lu]", + "skip_gc_vars_[%lu]", input_var_names.size(), internal_var_names_.size(), output_var_names.size(), outer_varinfo.size(), skip_eager_vars_.size(), - skip_gc_vars_.size(), - initialized_beforehand_vars_.size()); + skip_gc_vars_.size()); } void CinnLaunchContext::BuildVarNameMap( @@ -214,8 +224,12 @@ std::unordered_set CinnLaunchContext::ExtractInternalVarNames( [](const auto& name_pair) { return name_pair.first; }); // exclude the input variables and output variables - auto exclude_names_fn = [&remain_var_names](const std::string& var_name) { + auto exclude_names_fn = [this, + &remain_var_names](const std::string& var_name) { remain_var_names.erase(var_name); + if (inplace_var_names_.count(var_name)) { + remain_var_names.erase(var_name + InplaceOutSuffix); + } }; std::for_each( input_var_names.begin(), input_var_names.end(), exclude_names_fn); @@ -281,11 +295,12 @@ void CinnLaunchContext::AssignExternalVariable(const std::string& var_name) { platform::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); auto* cinn_buffer = GetCinnBufferOfVar(var_name); + std::string revise_var_name = RedirectVarName(var_name); // assign external malloc/free callbacks of cinn_buffer_t cinn_buffer->external_malloc = new std::function( - [this, var_name](void* ctx, cinn_buffer_t* buffer) { - auto* tensor = - cached_scope_->GetVar(var_name)->GetMutable(); + [this, revise_var_name](void* ctx, cinn_buffer_t* buffer) { + auto* tensor = cached_scope_->GetVar(revise_var_name) + ->GetMutable(); tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions)); buffer->memory = reinterpret_cast(tensor->mutable_data( *cached_place_, @@ -307,11 +322,12 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) { platform::errors::InvalidArgument( "Variable(%s) not applied in cinn", var_name)); auto* cinn_buffer = GetCinnBufferOfVar(var_name); + std::string revise_var_name = RedirectVarName(var_name); // assign external malloc/free callbacks of cinn_buffer_t cinn_buffer->external_malloc = new std::function( - [this, var_name](void* ctx, cinn_buffer_t* buffer) { - auto* tensor = - cached_temp_scope_->Var(var_name)->GetMutable(); + [this, revise_var_name](void* ctx, cinn_buffer_t* buffer) { + auto* tensor = cached_temp_scope_->Var(revise_var_name) + ->GetMutable(); tensor->Resize(framework::DDim(buffer->dims, buffer->dimensions)); buffer->memory = reinterpret_cast(tensor->mutable_data( *cached_place_, @@ -322,8 +338,8 @@ void CinnLaunchContext::AssignInternalVariable(const std::string& var_name) { // internal variables should release its buffer immediately // if no instruction use it cinn_buffer->external_free = new std::function( - [this, var_name](void* ctx, cinn_buffer_t* buffer) { - auto* tensor = cached_temp_scope_->GetVar(var_name) + [this, revise_var_name](void* ctx, cinn_buffer_t* buffer) { + auto* tensor = cached_temp_scope_->GetVar(revise_var_name) ->GetMutable(); tensor->clear(); return 0; @@ -359,7 +375,6 @@ std::unique_ptr CinnLaunchContext::BuildCompiledProgram( // are set by values of the corresponding compiled tensors, // including the in/out variables where the equiality between their tensors // and the CINN compiled ones is verified in corresponding cinn_launch_op. - std::unordered_set has_refer_vars; for (auto&& arg : cinn_argument_names_) { const std::string& var_name = cinn2paddle_varmap_.at(arg); framework::VarDesc* var_desc = block->Var(var_name); @@ -370,7 +385,6 @@ std::unique_ptr CinnLaunchContext::BuildCompiledProgram( auto* ori_desc = res->second; var_desc->SetPersistable(ori_desc->Persistable()); var_desc->SetIsParameter(ori_desc->IsParameter()); - has_refer_vars.insert(var_name); } auto cinn_tensor = GetCinnTensorOfVar(var_name); @@ -404,13 +418,6 @@ std::unique_ptr CinnLaunchContext::BuildCompiledProgram( auto* ins = instructions.at(ins_idx).get(); auto in_args = trans_and_pack_args_fn(ins->GetInArgs()); auto out_args = trans_and_pack_args_fn(ins->GetOutArgs()); - for (auto&& var_name : in_args) { - if (!has_refer_vars.count(var_name)) { - initialized_beforehand_vars_.emplace_back(var_name); - } - } - has_refer_vars.insert(out_args.begin(), out_args.end()); - auto* op_desc = block->AppendOp(); op_desc->SetType("cinn_instruction_run"); op_desc->SetInput(kX, in_args); @@ -453,14 +460,6 @@ ParallelExecutor* CinnLaunchContext::InitializePE(const platform::Place& place, framework::proto::VarType::LOD_TENSOR); } - for (auto&& var_name : initialized_beforehand_vars_) { - auto* var = scope->GetVar(var_name); - auto* buffer = GetCinnBufferOfVar(var_name); - auto dim = framework::DDim(buffer->dims, buffer->dimensions); - var->GetMutable()->Resize(dim); - var->GetMutable()->mutable_data( - place, framework::paddle2cinn::TransToPaddleDataType(buffer->type)); - } return parallel_executor_.get(); } @@ -493,17 +492,24 @@ framework::InterpreterCore* CinnLaunchContext::InitializeInterpreterCore( } UpdateCapturedEnv(*scope, place); } - for (auto&& var_name : initialized_beforehand_vars_) { - auto* var = scope->GetVar(var_name); - auto* buffer = GetCinnBufferOfVar(var_name); - auto dim = framework::DDim(buffer->dims, buffer->dimensions); - var->GetMutable()->Resize(dim); - var->GetMutable()->mutable_data( - place, framework::paddle2cinn::TransToPaddleDataType(buffer->type)); - } return interpreter_core_.get(); } +std::string CinnLaunchContext::RedirectVarName(const std::string& var_name) { + auto pos = var_name.find(InplaceOutSuffix); + if (pos == std::string::npos) { + return var_name; + } + std::string remove_suffix_name = var_name.substr(0, pos); + if (!inplace_var_names_.count(remove_suffix_name)) { + LOG(WARNING) << "Variable:" << remove_suffix_name + << " was not marked as inplaced by Paddle, but CINN does"; + } + VLOG(4) << "Inplaced variable:" << var_name << " redirect to " + << remove_suffix_name; + return remove_suffix_name; +} + cinn_buffer_t* CinnLaunchContext::GetCinnBufferOfVar( const std::string& var_name) { auto res = paddle2argument_.find(var_name); diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.h b/paddle/fluid/operators/cinn/cinn_launch_context.h index f4794e6335bb6..97016cc7f56f3 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.h +++ b/paddle/fluid/operators/cinn/cinn_launch_context.h @@ -96,6 +96,9 @@ class CinnLaunchContext { return skip_eager_vars_; } + // Redirect the name of a Paddle variable to the orignal if it was inplaced + std::string RedirectVarName(const std::string& var_name); + // Return internal variable names list const std::unordered_set& GetInternalVarNames() const { return internal_var_names_; @@ -151,11 +154,13 @@ class CinnLaunchContext { std::unordered_map cinn2paddle_varmap_; // a list of internal variable names in Paddle std::unordered_set internal_var_names_; + // In CINN, there are two variables(in/out) mapped to the one inplaced + // variable of Paddle. To resovle this conflict, we add a output counterpart + // in Paddle with the name suffixed by @InplaceOut. + // This set stores which Paddle variable names are inplaced. + std::unordered_set inplace_var_names_; // the names of the cinn arguments used in compiled executable program std::unordered_set cinn_argument_names_; - // TODO(CtfGo): remove this list after fixing batch_norm bug - // due to duplicate association in the same variable. - std::vector initialized_beforehand_vars_; // the variable scope compiled from cinn const std::shared_ptr cinn_scope_; diff --git a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc index 2b4bc9acf1284..c362650c15d71 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context_test.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context_test.cc @@ -90,6 +90,8 @@ const Graph& InitDefaultSubgraph() { new std::vector({"var5"})); graph->GetOrInit( framework::paddle2cinn::kMemOptVarInfoFromMainGraph); + graph->GetOrInit>( + framework::paddle2cinn::kInplaceVarNames); }); return *graph.get(); } diff --git a/paddle/fluid/operators/cinn/test_helper.h b/paddle/fluid/operators/cinn/test_helper.h index d35996771b4d9..040a185810136 100644 --- a/paddle/fluid/operators/cinn/test_helper.h +++ b/paddle/fluid/operators/cinn/test_helper.h @@ -84,6 +84,8 @@ std::unique_ptr CreateOnlyElementwiseAddGraph( new std::vector({out_name})); g->GetOrInit( framework::paddle2cinn::kMemOptVarInfoFromMainGraph); + g->GetOrInit>( + framework::paddle2cinn::kInplaceVarNames); return g; } From e24745957735baf64d6d03230e86b304da11fcbf Mon Sep 17 00:00:00 2001 From: Leo Guo <58431564+ZibinGuo@users.noreply.github.com> Date: Tue, 31 Jan 2023 12:55:24 +0800 Subject: [PATCH 23/89] [XPU] Add unitest for set_value_grad. (#50049) --- .../unittests/xpu/test_set_value_op_xpu.py | 563 +++++++++++++++++- 1 file changed, 558 insertions(+), 5 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py index cb9bacb48d7c0..72bb45da7ec9d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_set_value_op_xpu.py @@ -16,12 +16,10 @@ import sys import unittest +from functools import reduce import numpy as np -# from functools import reduce - - sys.path.append("../") from op_test_xpu import XPUOpTest from xpu.get_test_cover_info import ( @@ -31,8 +29,7 @@ ) import paddle - -# from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.layer_helper import LayerHelper class XPUTestSetValueOp(XPUOpTestWrapper): @@ -927,6 +924,562 @@ def test_error(self): self._bool_tensor_error() self._broadcast_mismatch() + # 5. Test backward + class XPUTestBackward(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_static(self): + paddle.enable_static() + main_program = paddle.static.Program() + startup_program = paddle.static.Program() + + x_np = np.random.random(size=(4, 4)).astype('float32') + y_np = np.random.random(size=(4, 4)).astype('float32') + label_np = np.random.randint(2, size=(4, 1)).astype('int64') + + with paddle.static.program_guard(main_program, startup_program): + x = paddle.static.data(name="x", shape=[4, 4], dtype='float32') + y = paddle.static.data(name="y", shape=[4, 4], dtype='float32') + + label = paddle.static.data( + name="label", shape=[4, 1], dtype='int64' + ) + + z = paddle.add(x, y) + var = y[0, :] + z[0, :] = var + + prediction = paddle.static.nn.fc( + x=z, size=2, activation='softmax' + ) + + cost = paddle.nn.functional.cross_entropy( + input=prediction, label=label + ) + loss = paddle.mean(cost) + sgd = paddle.optimizer.SGD(learning_rate=0.01) + sgd.minimize(loss) + + exe = paddle.static.Executor(self.place) + exe.run(startup_program) + + var_grad, z_grad = exe.run( + main_program, + feed={"x": x_np, "y": y_np, "label": label_np}, + fetch_list=[var.name + "@GRAD", z.name + "@GRAD"], + ) + + self.assertTrue((var_grad == z_grad[0, :]).all()) + paddle.disable_static() + + class XPUTestGradientTruncated(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_consistent_with_competitor(self): + paddle.disable_static() + + def set_value(t, value): + a = t * t + a[0, 1] = value + y = a * a + return y.sum() + + # case 1 + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [1, 2, 1, 3, 1, 4] + ) + value = np.arange(100, 104, dtype="float32").reshape(1, 4) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value(inps, value) + loss.backward() + + value_grad = np.array([[600.0, 606.0, 612.0, 618.0]]) + input_grad = np.array( + [ + [ + [ + [ + [[4.0, 32.0, 108.0, 256.0]], + [[500.0, 864.0, 1372.0, 2048.0]], + [[2916.0, 4000.0, 5324.0, 6912.0]], + ] + ], + [ + [ + [[0.0, 0.0, 0.0, 0.0]], + [[0.0, 0.0, 0.0, 0.0]], + [[0.0, 0.0, 0.0, 0.0]], + ] + ], + ] + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 2 + array = np.arange(1, 2 * 3 * 4 + 1, dtype="float32").reshape( + [4, 2, 3] + ) + value = np.arange(100, 100 + 1, dtype="float32") + + inps2 = paddle.to_tensor(array, stop_gradient=False) + value2 = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value(inps2, value2) + loss.backward() + + value_grad2 = np.array([600.0]) + input_grad2 = np.array( + [ + [[4.0, 32.0, 108.0], [0.0, 0.0, 0.0]], + [[1372.0, 2048.0, 2916.0], [4000.0, 5324.0, 6912.0]], + [[8788.0, 10976.0, 13500.0], [16384.0, 19652.0, 23328.0]], + [[27436.0, 32000.0, 37044.0], [42592.0, 48668.0, 55296.0]], + ] + ) + np.testing.assert_array_equal( + inps2.grad.numpy(), + input_grad2, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps2.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value2.grad.numpy(), + value_grad2, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value2.grad.numpy() + ), + ) + + # case 3 + def set_value3(t, value): + a = t * t + a[0, :, 0, :] = value + y = a * a + return y.sum() + + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [4, 3, 1, 1, 2, 1] + ) + value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value3(inps, value) + loss.backward() + + value_grad = np.array([[[600.0], [606.0]]]) + input_grad = np.array( + [ + [ + [[[[0.0], [0.0]]]], + [[[[0.0], [0.0]]]], + [[[[0.0], [0.0]]]], + ], + [ + [[[[1372.0], [2048.0]]]], + [[[[2916.0], [4000.0]]]], + [[[[5324.0], [6912.0]]]], + ], + [ + [[[[8788.0], [10976.0]]]], + [[[[13500.0], [16384.0]]]], + [[[[19652.0], [23328.0]]]], + ], + [ + [[[[27436.0], [32000.0]]]], + [[[[37044.0], [42592.0]]]], + [[[[48668.0], [55296.0]]]], + ], + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 4: step >0 + def set_value4(t, value): + a = t * t + a[0, :, 0, ::3] = value + y = a * a + return y.sum() + + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [2, 3, 1, 4, 1] + ) + value = np.arange(100, 100 + 2, dtype="float32").reshape(1, 2, 1) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value4(inps, value) + loss.backward() + + value_grad = np.array([[[600.0], [606.0]]]) + input_grad = np.array( + [ + [ + [[[0.0], [32.0], [108.0], [0.0]]], + [[[0.0], [864.0], [1372.0], [0.0]]], + [[[0.0], [4000.0], [5324.0], [0.0]]], + ], + [ + [[[8788.0], [10976.0], [13500.0], [16384.0]]], + [[[19652.0], [23328.0], [27436.0], [32000.0]]], + [[[37044.0], [42592.0], [48668.0], [55296.0]]], + ], + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 5:a[0].shape==value.shape + def set_value5(t, value): + a = t * t + a[0] = value + y = a * a + return y.sum() + + array = np.arange(1, 1 + 2 * 3 * 4, dtype="float32").reshape( + [2, 3, 4] + ) + value = np.arange(100, 100 + 12, dtype="float32").reshape(3, 4) + + inps = paddle.to_tensor(array, stop_gradient=False) + value = paddle.to_tensor(value, stop_gradient=False) + + loss = set_value5(inps, value) + loss.backward() + + value_grad = np.array( + [ + [200.0, 202.0, 204.0, 206.0], + [208.0, 210.0, 212.0, 214.0], + [216.0, 218.0, 220.0, 222.0], + ] + ) + input_grad = np.array( + [ + [ + [0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + [0.0, 0.0, 0.0, 0.0], + ], + [ + [8788.0, 10976.0, 13500.0, 16384.0], + [19652.0, 23328.0, 27436.0, 32000.0], + [37044.0, 42592.0, 48668.0, 55296.0], + ], + ] + ) + np.testing.assert_array_equal( + inps.grad.numpy(), + input_grad, + err_msg='The gradient of value should be \n{},\n but reveived {}'.format( + input_grad, inps.grad.numpy() + ), + ) + np.testing.assert_array_equal( + value.grad.numpy(), + value_grad, + err_msg='The gradient of input should be \n{},\n but reveived {}'.format( + value_grad, value.grad.numpy() + ), + ) + + # case 6: pass stop_gradient from value to x + x = paddle.zeros([8, 8], dtype='float32') + value = paddle.to_tensor([10], dtype='float32', stop_gradient=False) + + self.assertTrue(x.stop_gradient) + self.assertTrue(x.is_leaf) + + x[0, :] = value + + self.assertTrue(not x.stop_gradient) + self.assertTrue(not x.is_leaf) + + def test_static_graph(self): + paddle.enable_static() + + to_string = lambda x, i: x + '_' + str(i) + numel = lambda input_shape: reduce(lambda x, y: x * y, input_shape) + + def op1(x): + value = paddle.fluid.layers.fill_constant([1], "float32", 1) + # test stop_gradient + value.stop_gradient = True + x.stop_gradient = False + start = paddle.fluid.layers.fill_constant( + [1], "int32", 5, force_cpu=True + ) + end = paddle.fluid.layers.fill_constant( + [1], "int32", 0, force_cpu=True + ) + step = paddle.fluid.layers.fill_constant( + [1], "int32", -2, force_cpu=True + ) + + inputs = { + 'Input': x, + 'ValueTensor': value, + 'StartsTensorList': [ + start, + ], + 'EndsTensorList': [ + end, + ], + 'StepsTensorList': [ + step, + ], + } + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}, + ) + + return y, value + + def op2(x): + value = paddle.fluid.layers.fill_constant( + [1, 3, 2], "float32", 1 + ) + # test stop_gradient + value.stop_gradient = False + x.stop_gradient = False + attrs = { + 'axes': [0], + 'starts': [6], + 'ends': [0], + 'steps': [-4], + 'decrease_axes': [], + 'none_axes': [], + 'dtype': paddle.float32, + } + inputs = {'Input': x, 'ValueTensor': value} + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs=attrs, + ) + + return y, value + + def op3(x): + value = paddle.fluid.layers.fill_constant([1], "float32", 1) + x.stop_gradient = True + value.stop_gradient = False + start = paddle.fluid.layers.fill_constant( + [1], "int32", 0, force_cpu=True + ) + end = paddle.fluid.layers.fill_constant( + [1], "int32", 5, force_cpu=True + ) + step = paddle.fluid.layers.fill_constant( + [1], "int32", 3, force_cpu=True + ) + + inputs = { + 'Input': x, + 'ValueTensor': value, + 'StartsTensorList': [ + start, + ], + 'EndsTensorList': [ + end, + ], + 'StepsTensorList': [ + step, + ], + } + + helper = LayerHelper("set_value") + y = helper.create_variable_for_type_inference(dtype=x.dtype) + + helper.append_op( + type="set_value", + inputs=inputs, + outputs={'Out': y}, + attrs={'axes': [0]}, + ) + + return y, value + + def set_value(array, i, op): + name_x = to_string('x', i) + x = paddle.static.data( + name=name_x, shape=array.shape, dtype='float32' + ) + + # set_value_op in __get/setitem__ is an inplace operation. + # When `input.stop_gradient = True` and `value.stop_gradient = False`, + # set_value_grad_op will not be run during backward. + y, value = op(x) + y2 = y + 1 + loss = paddle.sum(y2) + sgd = paddle.optimizer.Adam() + sgd.minimize(loss) + place = self.place + + prog = paddle.static.default_main_program() + exe = paddle.static.Executor(place) + exe.run(paddle.static.default_startup_program()) + fetch_list = [] + if not x.stop_gradient: + fetch_list.append(x.grad_name) + if not value.stop_gradient: + fetch_list.append(value.grad_name) + out = exe.run(prog, feed={x.name: array}, fetch_list=fetch_list) + return out + + input_shape = [7, 6, 5, 4, 3, 2] + + array = np.arange(0, numel(input_shape), dtype="float32").reshape( + input_shape + ) + + for i in range(len(input_shape)): + program = paddle.static.Program() + with paddle.static.program_guard(program): + out1 = set_value(array, i, op1) + self.assertTrue((out1[0][5:0:-2] == 0).all()) + + if len(array.shape) > 2: + program2 = paddle.static.Program() + with paddle.static.program_guard(program2): + out2 = set_value(array, i, op2) + self.assertTrue((out2[0][6:0:-4] == 0).all()) + + program3 = paddle.static.Program() + with paddle.static.program_guard(program3): + out3 = set_value(array, i, op3) + self.assertTrue( + (numel(out1[0][0:5:3].shape) == out3[0]).all() + ) + + array = array[0] + paddle.disable_static() + + class XPUTestSetValueInplace(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_inplace(self): + paddle.disable_static() + with paddle.fluid.dygraph.guard(): + paddle.seed(100) + a = paddle.rand(shape=[1, 4]) + a.stop_gradient = False + b = a[:] + c = b + b[paddle.to_tensor(0)] = 1.0 + + self.assertTrue(id(b) == id(c)) + np.testing.assert_array_equal(b.numpy(), c.numpy()) + self.assertEqual(b.inplace_version, 0) + + paddle.enable_static() + + class XPUTestSetValueInplaceLeafVar(XPUOpTest): + def setUp(self): + self.__class__.op_type = "set_value" + self.__class__.no_need_check_grad = True + self.place = paddle.XPUPlace(0) + + def test_inplace_var_become_leaf_var(self): + paddle.disable_static() + + a_grad_1, b_grad_1, a_grad_2, b_grad_2 = 0, 1, 2, 3 + with paddle.fluid.dygraph.guard(): + paddle.seed(100) + a = paddle.rand(shape=[1, 4]) + b = paddle.rand(shape=[1, 4]) + a.stop_gradient = False + b.stop_gradient = False + c = a / b + c.sum().backward() + a_grad_1 = a.grad.numpy() + b_grad_1 = b.grad.numpy() + + with paddle.fluid.dygraph.guard(): + paddle.seed(100) + a = paddle.rand(shape=[1, 4]) + b = paddle.rand(shape=[1, 4]) + a.stop_gradient = False + b.stop_gradient = False + c = a / b + d = paddle.zeros((4, 4)) + self.assertTrue(d.stop_gradient) + d[0, :] = c + self.assertFalse(d.stop_gradient) + d[0, :].sum().backward() + a_grad_2 = a.grad.numpy() + b_grad_2 = b.grad.numpy() + + np.testing.assert_array_equal(a_grad_1, a_grad_2) + np.testing.assert_array_equal(b_grad_1, b_grad_2) + paddle.enable_static() + support_types = get_xpu_op_support_types('set_value') for stype in support_types: From 2e156ac8e9b7ab58580b2b60360bfc59f4ea2e39 Mon Sep 17 00:00:00 2001 From: xiaoting <31891223+tink2123@users.noreply.github.com> Date: Tue, 31 Jan 2023 13:15:28 +0800 Subject: [PATCH 24/89] support 0d tensor for interpolate (#49929) * support 0d tensor for interpolate * support 0d tensor for interpolate * add xpu unittest for interp * update unittest for interpolate * fix coverage * fix code style * fix for coverage * fix coverage --- paddle/phi/infermeta/multiary.cc | 46 ++++---- .../phi/kernels/funcs/interpolate_function.h | 14 +-- .../unittests/test_bilinear_interp_v2_op.py | 75 +++++++++++++ .../tests/unittests/test_zero_dim_tensor.py | 101 ++++++++++++++++++ .../unittests/xpu/test_zero_dim_tensor_xpu.py | 55 ++++++++++ python/paddle/nn/functional/common.py | 34 ++++-- 6 files changed, 291 insertions(+), 34 deletions(-) diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 6b238209d4ac2..ef94266b4ebe1 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1424,16 +1424,18 @@ static void Interpolate1DInferShapeCheck( if (scale_tensor) { auto scale_tensor_dim = scale_tensor.dims(); PADDLE_ENFORCE_EQ( - scale_tensor_dim.size(), - 1, + scale_tensor_dim.size() == 1 || scale_tensor_dim.size() == 0, + true, phi::errors::InvalidArgument( - "Scale's dimension size must be 1, but got dimension = %d .", + "Scale's dimension size must be 1 or 0, but got dimension = %d .", scale_tensor_dim.size())); - PADDLE_ENFORCE_EQ(scale_tensor_dim[0], - 1, - phi::errors::InvalidArgument( - "Scale's shape must be 1, but got shape = %d .", - scale_tensor_dim[0])); + if (scale_tensor_dim.size() == 1) { + PADDLE_ENFORCE_EQ(scale_tensor_dim[0], + 1, + phi::errors::InvalidArgument( + "Scale's shape must be 1, but got shape = %d .", + scale_tensor_dim[0])); + } out_w_tmp = -1; } else { if (scale.size() > 0) { @@ -1550,19 +1552,25 @@ static void Interpolate2DInferShapeCheck( } int out_h_tmp, out_w_tmp; + if (scale_tensor) { auto scale_tensor_dim = scale_tensor.dims(); PADDLE_ENFORCE_EQ( - scale_tensor_dim.size(), - 1, + scale_tensor_dim.size() == 1 || scale_tensor_dim.size() == 0, + true, phi::errors::InvalidArgument( - "Scale's dimension size must be 1, but got dimension = %d .", + "Scale's dimension size must be 1 or 0, but got dimension = %d .", scale_tensor_dim.size())); - PADDLE_ENFORCE_EQ(scale_tensor_dim[0] == 2 || scale_tensor_dim[0] == 1, - true, - phi::errors::InvalidArgument( - "Scale's shape must be 2 or 1, but got shape = %d .", - scale_tensor_dim[0])); + + if (scale_tensor_dim.size() == 1) { + PADDLE_ENFORCE_EQ( + scale_tensor_dim[0] == 2 || scale_tensor_dim[0] == 1, + true, + phi::errors::InvalidArgument( + "Scale's shape must be 2 or 1, but got shape = %d .", + scale_tensor_dim[0])); + } + out_h_tmp = -1; out_w_tmp = -1; } else { @@ -1695,10 +1703,10 @@ static void Interpolate3DInferShapeCheck( if (scale_tensor) { auto scale_tensor_dim = scale_tensor.dims(); PADDLE_ENFORCE_EQ( - scale_tensor_dim.size(), - 1, + scale_tensor_dim.size() == 1 || scale_tensor_dim.size() == 0, + true, phi::errors::InvalidArgument( - "Scale's dimension size must be 1, but got size = %d .", + "Scale's dimension size must be 1 or 0, but got size = %d .", scale_tensor_dim.size())); PADDLE_ENFORCE_EQ(scale_tensor_dim[0] == 3 || scale_tensor_dim[0] == 1, true, diff --git a/paddle/phi/kernels/funcs/interpolate_function.h b/paddle/phi/kernels/funcs/interpolate_function.h index 89b02317f3e95..53b0577fc29d7 100644 --- a/paddle/phi/kernels/funcs/interpolate_function.h +++ b/paddle/phi/kernels/funcs/interpolate_function.h @@ -85,12 +85,14 @@ inline std::vector get_new_shape( std::vector vec_new_shape; for (size_t i = 0; i < list_new_shape_tensor.size(); ++i) { auto tensor = list_new_shape_tensor[i]; - PADDLE_ENFORCE_EQ( - tensor->dims(), - phi::make_ddim({1}), - errors::InvalidArgument("The shape of dimension tensor should be [1]," - "but received d%.", - tensor->dims())); + PADDLE_ENFORCE_EQ(tensor->dims() == phi::make_ddim({1}) || + tensor->dims() == phi::make_ddim({}), + true, + errors::InvalidArgument( + "The shape of dimension tensor should be [1] or []," + "but received d%.", + tensor->dims())); + #ifdef PADDLE_WITH_XPU if (tensor->place().GetType() == phi::AllocationType::XPU) { DenseTensor temp; diff --git a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py index ed7b1375e54aa..f274752c1c875 100755 --- a/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_bilinear_interp_v2_op.py @@ -816,5 +816,80 @@ def test_main(self): np.testing.assert_allclose(x_g_np_1, x_g_np_2, atol=1e-2, rtol=1e-2) +class TestBilinearInterpOpAPI_0DTensorScale(unittest.TestCase): + def test_case(self): + import paddle + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + with fluid.dygraph.guard(place): + input_data = np.random.random((2, 3, 6, 6)).astype("float32") + input_x = paddle.to_tensor(input_data) + expect_res = bilinear_interp_np( + input_data, out_h=12, out_w=12, align_corners=False + ) + scale_0d = paddle.full([], 2) + out = interpolate( + x=input_x, + scale_factor=scale_0d, + mode="bilinear", + align_corners=False, + ) + np.testing.assert_allclose(out.numpy(), expect_res, rtol=1e-05) + + +class TestBilinearInterpOpAPI_0DTensorScale2(unittest.TestCase): + def test_case(self): + import paddle + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + with fluid.dygraph.guard(place): + input_data = np.random.random((2, 3, 6, 6)).astype("float32") + input_x = paddle.to_tensor(input_data) + expect_res = bilinear_interp_np( + input_data, out_h=12, out_w=12, align_corners=False + ) + scale_0d = [paddle.full([], 2), paddle.full([], 2)] + out = interpolate( + x=input_x, + scale_factor=scale_0d, + mode="bilinear", + align_corners=False, + ) + np.testing.assert_allclose(out.numpy(), expect_res, rtol=1e-05) + + +class TestBilinearInterpOpAPI_0DTensorOutSize(unittest.TestCase): + def test_case(self): + import paddle + + if core.is_compiled_with_cuda(): + place = core.CUDAPlace(0) + else: + place = core.CPUPlace() + with fluid.dygraph.guard(place): + input_data = np.random.random((2, 3, 6, 6)).astype("float32") + input_x = paddle.to_tensor(input_data) + expect_res = bilinear_interp_np( + input_data, out_h=12, out_w=12, align_corners=False + ) + output_size = [ + paddle.full([], 12, dtype="int32"), + paddle.full([], 12, dtype="int32"), + ] + out = interpolate( + x=input_x, + size=output_size, + mode="bilinear", + align_corners=False, + ) + np.testing.assert_allclose(out.numpy(), expect_res, rtol=1e-05) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py index 11d85b52446b2..2d07ab31334df 100644 --- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py @@ -1388,6 +1388,72 @@ def test_atan2(self): self.assertEqual(x1.grad.numpy(), 0.5) self.assertEqual(x2.grad.numpy(), 0) + def test_interpolate(self): + from paddle.nn.functional import interpolate + + input_x = paddle.rand([2, 3, 6, 6]) + input_x.stop_gradient = False + origin_result = interpolate( + x=input_x, size=[12, 12], mode="bilinear", align_corners=False + ) + + output_size = [ + paddle.full([], 12, dtype="int32"), + paddle.full([], 12, dtype="int32"), + ] + out1 = interpolate( + x=input_x, size=output_size, mode="bilinear", align_corners=False + ) + out1.backward() + + self.assertEqual(out1.shape, [2, 3, 12, 12]) + self.assertEqual(input_x.grad.shape, [2, 3, 6, 6]) + + scale_1 = [paddle.full([], 2), paddle.full([], 2)] + out2 = interpolate( + x=input_x, + scale_factor=scale_1, + mode="bilinear", + align_corners=False, + ) + out2.backward() + + self.assertEqual(out2.shape, [2, 3, 12, 12]) + self.assertEqual(input_x.grad.shape, [2, 3, 6, 6]) + + scale_2 = paddle.full([], 2) + out3 = interpolate( + x=input_x, + scale_factor=scale_2, + mode="bilinear", + align_corners=False, + ) + out3.backward() + + # for coverage + scale_3 = paddle.full([1], 2) + input_3d = paddle.rand([2, 3, 6]) + out4 = interpolate( + x=input_3d, + scale_factor=scale_3, + mode="LINEAR", + align_corners=False, + data_format="NCW", + ) + + self.assertEqual(out3.shape, [2, 3, 12, 12]) + self.assertEqual(input_x.grad.shape, [2, 3, 6, 6]) + + np.testing.assert_allclose( + origin_result.numpy(), out1.numpy(), rtol=1e-05 + ) + np.testing.assert_allclose( + origin_result.numpy(), out2.numpy(), rtol=1e-05 + ) + np.testing.assert_allclose( + origin_result.numpy(), out3.numpy(), rtol=1e-05 + ) + def test_maseked_select(self): x = paddle.rand([]) x.stop_gradient = False @@ -2223,6 +2289,41 @@ def test_atan2(self): self.assertEqual(res[0].shape, ()) + @prog_scope() + def test_interpolate(self): + from paddle.nn.functional import interpolate + + input_x = paddle.rand([2, 3, 6, 6]) + input_x.stop_gradient = False + + output_size = [ + paddle.full([], 12, dtype="int32"), + paddle.full([], 12, dtype="int32"), + ] + + out1 = interpolate( + x=input_x, size=output_size, mode="bilinear", align_corners=False + ) + paddle.static.append_backward(out1.sum()) + prog = paddle.static.default_main_program() + res1 = self.exe.run(prog, feed={}, fetch_list=[out1, input_x.grad_name]) + + scale_1 = paddle.full([], 2) + out2 = interpolate( + x=input_x, + scale_factor=scale_1, + mode="bilinear", + align_corners=False, + ) + paddle.static.append_backward(out2.sum()) + prog = paddle.static.default_main_program() + res2 = self.exe.run(prog, feed={}, fetch_list=[out2, input_x.grad_name]) + + self.assertEqual(res1[0].shape, (2, 3, 12, 12)) + self.assertEqual(res1[1].shape, (2, 3, 6, 6)) + self.assertEqual(res2[0].shape, (2, 3, 12, 12)) + self.assertEqual(res2[1].shape, (2, 3, 6, 6)) + @prog_scope() def test_maseked_select(self): x = paddle.rand([]) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py index c0597d0ad53ea..f6f64aefe9db7 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py @@ -883,6 +883,61 @@ def test_allclose(self): y = paddle.full([], 0.6) self.assertFalse(paddle.allclose(x, y)) + def test_interpolate(self): + from paddle.nn.functional import interpolate + + input_x = paddle.rand([2, 3, 6, 6]) + input_x.stop_gradient = False + origin_result = interpolate( + x=input_x, size=[12, 12], mode="bilinear", align_corners=False + ) + + output_size = [ + paddle.full([], 12, dtype="int32"), + paddle.full([], 12, dtype="int32"), + ] + out1 = interpolate( + x=input_x, size=output_size, mode="bilinear", align_corners=False + ) + out1.backward() + + self.assertEqual(out1.shape, [2, 3, 12, 12]) + self.assertEqual(input_x.grad.shape, [2, 3, 6, 6]) + + scale_1 = [paddle.full([], 2), paddle.full([], 2)] + out2 = interpolate( + x=input_x, + scale_factor=scale_1, + mode="bilinear", + align_corners=False, + ) + out2.backward() + + self.assertEqual(out2.shape, [2, 3, 12, 12]) + self.assertEqual(input_x.grad.shape, [2, 3, 6, 6]) + + scale_2 = paddle.full([], 2) + out3 = interpolate( + x=input_x, + scale_factor=scale_2, + mode="bilinear", + align_corners=False, + ) + out3.backward() + + self.assertEqual(out3.shape, [2, 3, 12, 12]) + self.assertEqual(input_x.grad.shape, [2, 3, 6, 6]) + + np.testing.assert_allclose( + origin_result.numpy(), out1.numpy(), rtol=1e-05 + ) + np.testing.assert_allclose( + origin_result.numpy(), out2.numpy(), rtol=1e-05 + ) + np.testing.assert_allclose( + origin_result.numpy(), out3.numpy(), rtol=1e-05 + ) + def test_equalall(self): x = paddle.full([], 0.5) y = paddle.full([], 0.6) diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index d9f5b0b160dc0..57a1e0023d4fc 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -12,6 +12,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy + import paddle from paddle import _C_ops, _legacy_C_ops from paddle.fluid.layer_helper import LayerHelper @@ -102,6 +104,10 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None): y = F.unfold(x, [3, 3], 1, 1, 1) """ + helper = LayerHelper("unfold", **locals()) + + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'unfold') + assert len(x.shape) == 4, "input should be the format of [N, C, H, W]" if isinstance(kernel_sizes, int): @@ -145,9 +151,6 @@ def unfold(x, kernel_sizes, strides=1, paddings=0, dilations=1, name=None): if in_dygraph_mode(): return _C_ops.unfold(x, kernel_sizes, strides, paddings, dilations) - helper = LayerHelper("unfold", **locals()) - - check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'unfold') out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op( type="unfold", @@ -432,9 +435,12 @@ def interpolate( ): if len(size) == 0: raise ValueError("output size can not be empty") + if size is None: + raise ValueError("output size can not be None in AREA mode") if len(x.shape) == 3: return paddle.nn.functional.adaptive_avg_pool1d(x, size) elif len(x.shape) == 4: + print("size :", size) return paddle.nn.functional.adaptive_avg_pool2d(x, size) elif len(x.shape) == 5: return paddle.nn.functional.adaptive_avg_pool3d(x, size) @@ -494,9 +500,10 @@ def _is_list_or_turple_(data): out_shape = list(out_shape.numpy()) else: out_shape = list(out_shape) + for i, dim in enumerate(out_shape): if isinstance(dim, Variable): - out_shape[i] = dim.numpy()[0] + out_shape[i] = dim.numpy().item() if not (_is_list_or_turple_(out_shape)): raise TypeError("size should be a list or tuple or Variable.") # Validate the shape @@ -568,11 +575,18 @@ def _is_list_or_turple_(data): else: if in_dynamic_mode() and isinstance(scale, Variable): - scale = list(scale.numpy()) + if scale.shape == []: + scale = float(scale) + else: + scale = list(scale.numpy()) if isinstance(scale, Variable): scale.stop_gradient = True inputs["Scale"] = scale - elif isinstance(scale, float) or isinstance(scale, int): + elif ( + isinstance(scale, float) + or isinstance(scale, int) + or isinstance(scale, numpy.ndarray) + ): if scale <= 0: raise ValueError("Attr(scale) should be greater than zero.") scale_list = [] @@ -2253,6 +2267,11 @@ def fold( # y.shape = [2,3,4,5] """ + + helper = LayerHelper("fold", **locals()) + + check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'fold') + assert len(x.shape) == 3, "input should be the format of [N, C, L]" def _is_list_or_turple_(data): @@ -2322,9 +2341,6 @@ def _is_list_or_turple_(data): dilations, ) else: - helper = LayerHelper("fold", **locals()) - - check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'fold') out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op( type="fold", From ce4637c1d9acba3356a3730d258aba121f3d79f8 Mon Sep 17 00:00:00 2001 From: 201716010711 <87008376+201716010711@users.noreply.github.com> Date: Mon, 30 Jan 2023 21:42:39 -0800 Subject: [PATCH 25/89] support fp16 squaredl2norm (#48315) --- .../gpu/squared_l2_norm_grad_kernel.cu | 38 +++++++++++++++++- .../phi/kernels/gpu/squared_l2_norm_kernel.cu | 29 ++++++++++++-- .../tests/unittests/test_gradient_clip.py | 6 +-- .../unittests/test_squared_l2_norm_op.py | 40 +++++++++++++++++++ python/paddle/nn/clip.py | 9 ++--- 5 files changed, 106 insertions(+), 16 deletions(-) mode change 100644 => 100755 python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py diff --git a/paddle/phi/kernels/gpu/squared_l2_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/squared_l2_norm_grad_kernel.cu index 908a7557d1b48..7fc355b51ac32 100644 --- a/paddle/phi/kernels/gpu/squared_l2_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/squared_l2_norm_grad_kernel.cu @@ -15,12 +15,46 @@ #include "paddle/phi/kernels/squared_l2_norm_grad_kernel.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/impl/squared_l2_norm_grad_kernel_impl.h" +#include "paddle/phi/kernels/funcs/broadcast_function.h" + +namespace phi { +/** + * x*y*2.0 + */ +template +struct DoubleMulFunctor { + __device__ __forceinline__ T operator()(const T a, const T b) const { + return b * a * static_cast(2.0f); + } +}; + +template +void SquaredL2NormGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& dout, + DenseTensor* dx) { + dev_ctx.template Alloc(dx); + + PADDLE_ENFORCE_EQ( + dout.numel(), + 1, + phi::errors::InvalidArgument( + "Input(GRAD@Out) of SquaredL2NormGradOP should be a scalar.")); + std::vector ins{&x, &dout}; + std::vector outs{dx}; + + funcs::BroadcastKernel( + dev_ctx, ins, &outs, -1, phi::DoubleMulFunctor()); +} +} // namespace phi PD_REGISTER_KERNEL(squared_l2_norm_grad, GPU, ALL_LAYOUT, phi::SquaredL2NormGradKernel, float, - double) {} + double, + phi::dtype::float16) {} diff --git a/paddle/phi/kernels/gpu/squared_l2_norm_kernel.cu b/paddle/phi/kernels/gpu/squared_l2_norm_kernel.cu index d585d209b42ca..81108145653e1 100644 --- a/paddle/phi/kernels/gpu/squared_l2_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/squared_l2_norm_kernel.cu @@ -15,9 +15,30 @@ #include "paddle/phi/kernels/squared_l2_norm_kernel.h" #include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/float16.h" +#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/kernels/impl/squared_l2_norm_kernel_impl.h" - -PD_REGISTER_KERNEL( - squared_l2_norm, GPU, ALL_LAYOUT, phi::SquaredL2NormKernel, float, double) { +#include "paddle/phi/kernels/funcs/reduce_function.h" +namespace phi { +template +void SquaredL2NormKernel(const Context& dev_ctx, + const DenseTensor& x, + DenseTensor* out) { + dev_ctx.template Alloc(out); + std::vector origin_reduce_dims; + for (size_t i = 0; i < x.dims().size(); i++) { + origin_reduce_dims.push_back(i); + } + phi::funcs::ReduceKernel>( + dev_ctx, x, out, kps::SquareFunctor(), origin_reduce_dims, false); } + +} // namespace phi + +PD_REGISTER_KERNEL(squared_l2_norm, + GPU, + ALL_LAYOUT, + phi::SquaredL2NormKernel, + float, + double, + phi::dtype::float16) {} diff --git a/python/paddle/fluid/tests/unittests/test_gradient_clip.py b/python/paddle/fluid/tests/unittests/test_gradient_clip.py index c74917c2a076a..66fe40bf8ab6d 100644 --- a/python/paddle/fluid/tests/unittests/test_gradient_clip.py +++ b/python/paddle/fluid/tests/unittests/test_gradient_clip.py @@ -254,10 +254,8 @@ def test_none_grad_fp16(self): self.assertListEqual( ops, [ - 'square', - 'reduce_sum', - 'square', - 'reduce_sum', + 'squared_l2_norm', + 'squared_l2_norm', 'sum', 'cast', 'sqrt', diff --git a/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py b/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py old mode 100644 new mode 100755 index 8124254e7b2cc..a7076e18a5821 --- a/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_squared_l2_norm_op.py @@ -30,6 +30,46 @@ def test_squared_l2_norm(x): return _legacy_C_ops.squared_l2_norm(x) +class TestSquaredL2NormF16Op(unittest.TestCase): + def init_test_case(self): + X = np.random.uniform(-0.1, 0.1, (8, 5, 10)).astype('float32') + return X + + def check_main(self, x_np, dtype): + paddle.disable_static() + x = paddle.to_tensor(x_np) + + x.stop_gradient = False + y = test_squared_l2_norm(x) + x_g = paddle.grad(y, [x]) + + paddle.enable_static() + return y, x_g + + def test_main(self): + x_np = self.init_test_case() + y_np_1, x_g_np_1 = self.check_main(x_np, 'float32') + y_np_2, x_g_np_2 = self.check_main(x_np, 'float16') + + def assert_equal(x, y): + np.testing.assert_allclose(x, y, rtol=1e-05, atol=0.0) + + assert_equal(y_np_1, y_np_2) + assert_equal(x_g_np_1, x_g_np_2) + + +class TestSquaredL2NormF16Op1(TestSquaredL2NormF16Op): + def init_test_case(self): + X = np.random.uniform(-2.0, 2.0, (30, 10)).astype('float32') + return X + + +class TestSquaredL2NormF16Op2(TestSquaredL2NormF16Op): + def init_test_case(self): + X = np.random.uniform(-5.0, 5.0, (20, 10, 20)).astype('float32') + return X + + class TestL2LossOp(OpTest): """Test squared_l2_norm""" diff --git a/python/paddle/nn/clip.py b/python/paddle/nn/clip.py index 10eeb6319063c..53eed3cae5802 100644 --- a/python/paddle/nn/clip.py +++ b/python/paddle/nn/clip.py @@ -207,11 +207,8 @@ def _squared_l2_norm(x): """ x = _cast_to_mp_type_if_enabled(x) - if ( - core.is_compiled_with_xpu() - or x.dtype == core.VarDesc.VarType.FP16 - or x.dtype == core.VarDesc.VarType.BF16 - ): + + if core.is_compiled_with_xpu(): square = paddle.square(x) sum_square = paddle.sum(square) return sum_square @@ -220,7 +217,7 @@ def _squared_l2_norm(x): return _C_ops.squared_l2_norm(x) op_type = 'squared_l2_norm' - check_variable_and_dtype(x, 'x', ['float32', 'float64'], op_type) + check_variable_and_dtype(x, 'x', ['float32', 'float64', 'float16'], op_type) helper = LayerHelper(op_type, **locals()) out = helper.create_variable_for_type_inference(x.dtype) From 0f173d5a62b41355b457c6434f6c1c1bf1ae4a79 Mon Sep 17 00:00:00 2001 From: wangxiaoning <71813629+wangxn12138@users.noreply.github.com> Date: Tue, 31 Jan 2023 14:11:37 +0800 Subject: [PATCH 26/89] support fp16 index_select (#50101) --- python/paddle/tensor/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index 9b1dce1df867d..3ec79b55b0bbe 100755 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -326,7 +326,7 @@ def index_select(x, index, axis=0, name=None): check_variable_and_dtype( x, 'x', - ['float32', 'float64', 'int32', 'int64'], + ['float16', 'float32', 'float64', 'int32', 'int64'], 'paddle.tensor.search.index_select', ) check_variable_and_dtype( From 3a7e470b3e1166a663e52081c797c2c12897723b Mon Sep 17 00:00:00 2001 From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com> Date: Tue, 31 Jan 2023 14:19:51 +0800 Subject: [PATCH 27/89] remove fluid.ir.RegisterPassHelper PassDesc and RegisterPass (#49578) * remove fluid.ir.RegisterPassHelper PassDesc and RegisterPass * proto import problems * change import way of pass_desc_pb2 * change sys.path * change the way of import framwork_pb2 * add fluid_path directory from path.dirname * fluid_path changed --- python/paddle/fluid/ir.py | 466 +---------------- .../unittests/ir/test_ir_generate_pass.py | 3 +- .../incubate/passes/fuse_resnet_unit_pass.py | 2 +- python/paddle/incubate/passes/ir.py | 483 ++++++++++++++++++ 4 files changed, 488 insertions(+), 466 deletions(-) create mode 100644 python/paddle/incubate/passes/ir.py diff --git a/python/paddle/fluid/ir.py b/python/paddle/fluid/ir.py index fb077ed8b5f0d..c444b4cedaafa 100644 --- a/python/paddle/fluid/ir.py +++ b/python/paddle/fluid/ir.py @@ -16,18 +16,8 @@ import inspect from os import path import paddle -from . import core, unique_name -from .framework import _apply_pass, OpProtoHolder - -from .proto import framework_pb2 - -try: - from .proto import pass_desc_pb2 -except ModuleNotFoundError: - import sys - - sys.path.append(path.join(path.dirname(__file__), 'proto')) - from .proto import pass_desc_pb2 +from . import core +from .framework import _apply_pass def get_data_vars(program): @@ -138,455 +128,3 @@ def apply_pass(name): build_strategy.enable_inplace = False build_strategy._clear_finalized() return build_strategy - - -class RegisterPassHelper: - _register_helpers = list() - - def __init__(self, pass_pairs, pass_type=str(), input_specs=dict()): - self._pass_type = pass_type - self._pass_pairs = pass_pairs - self._input_specs = input_specs - RegisterPassHelper._register_helpers.append(self) - - def _get_args_from_func(self, func): - args = list() - arg_specs = inspect.getfullargspec(func) - for arg_name in arg_specs.args: - input_spec = self._input_specs.get(arg_name) - if isinstance(input_spec, paddle.static.InputSpec): - args.append( - PassDesc.VarHelper( - arg_name, input_spec.shape, input_spec.dtype - ) - ) - elif isinstance(input_spec, paddle.ParamAttr): - args.append(paddle.ParamAttr(arg_name)) - else: - args.append(PassDesc.VarHelper(arg_name, [-1])) - return args - - def _prune_program_desc(self, ops): - for op_desc in ops: - default_attrs = core.get_op_attrs_default_value( - op_desc.type.encode() - ) - remove_attrs = list() - for attr in op_desc.attrs: - # attr must not in - if attr.name not in [ - "op_namescope", - "op_callstack", - "op_device", - ]: - attr_list_fields = attr.ListFields() - # attr format must be: name, type, value - if len(attr_list_fields) == 3: - attr_value = attr.ListFields()[-1][-1] - default_attr_value = default_attrs.get(attr.name) - # value must not default - if default_attr_value != attr_value: - continue - remove_attrs.append(attr) - for attr in remove_attrs: - op_desc.attrs.remove(attr) - - def _func_to_program_desc(self, func, ops): - vars = list() - program = paddle.static.Program() - startup_program = paddle.static.Program() - with paddle.static.program_guard(program, startup_program): - args = self._get_args_from_func(func) - vars.extend(args) - outs = func(*args) - if not isinstance(outs, (list, tuple)): - outs = [outs] - for out in outs: - if isinstance(out, PassDesc.OpHelper): - op_outs = out.Outputs() - if len(op_outs) != 1: - raise ValueError( - "Operator '{}' has multiple outputs, please specify one output variable.".format( - out._type - ) - ) - for op_out in op_outs.values(): - vars.extend(op_out) - else: - vars.append(out) - block_desc = program.current_block().desc - for i in range(block_desc.op_size()): - ops.add().ParseFromString(block_desc.op(i).serialize_to_string()) - self._prune_program_desc(ops) - return vars, program.current_block().ops - - def _convert_vars_to_pass_desc(self, patterns, replaces, desc): - def _add_element_conditions(conditions, elements): - for element in elements: - if element._condition: - conditions.append(element._condition) - _add_element_conditions(conditions, element._elements) - - for (pattern, replace) in zip(patterns, replaces): - # Convert maps of inputs and outputs. - var_map = desc.var_maps.add() - var_map.pattern_var = pattern.name - var_map.replace_var = replace.name - conditions = desc.var_attr_conditions - # Convert shape condition. - if pattern.name in self._input_specs: - condition = conditions.add() - pattern.Attr("shape")._to_pass_desc_attr(condition.attr) - condition.condition_value.name = "" - condition.condition_value.type = framework_pb2.AttrType.LONGS - condition.condition_value.longs.extend(pattern.shape) - condition.type = pass_desc_pb2.PassDesc.ConditionType.kEQ - # Convert attr conditions. - if PassDesc.VarHelper == pattern.__class__: - for attr in pattern._attrs.values(): - _add_element_conditions(conditions, [attr]) - - def _convert_ops_to_pass_desc(self, patterns, replaces, desc): - for replace in replaces: - if isinstance(replace, PassDesc.OpHelper): - for attr in replace._attrs.values(): - # Convert attr maps. - mapped = attr._mapped - if inspect.isfunction(mapped): - mapped = mapped(patterns) - attr_map = desc.op_attr_maps.add() - mapped._to_pass_desc_attr(attr_map.pattern_attr) - attr._to_pass_desc_attr(attr_map.replace_attr) - if mapped._operation is not None: - attr_map.operation.CopyFrom(mapped._operation) - - def SerializeMultiPassDesc(self): - switch_static_mode = paddle.in_dynamic_mode() - if switch_static_mode: - paddle.enable_static() - multi_pass_desc = pass_desc_pb2.MultiPassDesc() - multi_pass_desc.pass_type = self._pass_type - # Traverse all pass pairs and convert them to PassDesc data. - # Here need to add cache in the future. - for (pattern, replace) in self._pass_pairs: - pass_desc = multi_pass_desc.pass_descs.add() - # Convert ProgramDescs of pattern and replace subgraphs. - pattern_vars, pattern_ops = self._func_to_program_desc( - pattern, pass_desc.pattern - ) - replace_vars, replace_ops = self._func_to_program_desc( - replace, pass_desc.replace - ) - self._convert_vars_to_pass_desc( - pattern_vars, replace_vars, pass_desc - ) - self._convert_ops_to_pass_desc(pattern_ops, replace_ops, pass_desc) - if switch_static_mode: - paddle.disable_static() - return multi_pass_desc.SerializeToString() - - -class PassDesc: - class AttrHelper: - def __init__(self, obj, name, element_index=None): - self._obj = obj - self._name = name - self._operation_type = None - self._element_index = element_index - self._elements = list() - self._operation = None - self._condition = None - self._mapped = None - - def __getitem__(self, index): - element = PassDesc.AttrHelper( - self._obj, self._name, element_index=index - ) - self._elements.append(element) - return element - - def _to_pass_desc_attr(self, pass_desc_attr): - if isinstance(self._obj, PassDesc.VarHelper): - pass_desc_attr.role = pass_desc_pb2.PassDesc.RoleType.kVariable - pass_desc_attr.var_name = self._obj.name - else: - pass_desc_attr.role = pass_desc_pb2.PassDesc.RoleType.kOperator - pass_desc_attr.op_index = self._obj._index - pass_desc_attr.name = self._name - if self._operation_type is not None: - pass_desc_attr.operation = self._operation_type - if self._element_index is not None: - pass_desc_attr.element_index = self._element_index - - def _to_op_desc_attr(self, value, op_desc_attr): - op_desc_attr.name = "" - if isinstance(value, int): - op_desc_attr.type = framework_pb2.AttrType.INT - op_desc_attr.i = value - else: - raise NotImplementedError("Unimplemented transform operation.") - - def _clone_with_operation(self, type, value=None): - attr = PassDesc.AttrHelper( - self._obj, self._name, self._element_index - ) - self._elements.append(attr) - if value is None: - attr._operation_type = type - return attr - operation = pass_desc_pb2.PassDesc.Operation() - operation.type = type - if isinstance(value, PassDesc.AttrHelper): - value._to_pass_desc_attr(operation.attr) - else: - self._to_op_desc_attr(value, operation.value) - attr._operation = operation - attr._operation_type = self._operation_type - return attr - - def __sub__(self, value): - return self._clone_with_operation( - pass_desc_pb2.PassDesc.OperationType.kSub, value - ) - - def __add__(self, value): - return self._clone_with_operation( - pass_desc_pb2.PassDesc.OperationType.kAdd, value - ) - - def Mod(self, value): - return self._clone_with_operation( - pass_desc_pb2.PassDesc.OperationType.kMod, value - ) - - def Size(self): - return self._clone_with_operation( - pass_desc_pb2.PassDesc.OperationType.kSize - ) - - def _set_with_condition(self, type, value): - condition = pass_desc_pb2.PassDesc.AttrCondition() - self._to_pass_desc_attr(condition.attr) - condition.type = type - if isinstance(value, PassDesc.AttrHelper): - value._to_pass_desc_attr(condition.condition_attr) - else: - self._to_op_desc_attr(value, condition.condition_value) - if self._operation: - condition.operation.CopyFrom(self._operation) - self._condition = condition - - def EQ(self, value): - self._set_with_condition( - pass_desc_pb2.PassDesc.ConditionType.kEQ, value - ) - - def MappedPattern( - self, var=None, op=None, index=0, name=None, element_index=None - ): - if all([var, op]): - raise ValueError("Only mapped one of which var or op.") - - def mapped_var(pattern_ops): - raise NotImplementedError( - "Mapping to variable is not implemented." - ) - - def mapped_op(pattern_ops): - ops = [o for o in pattern_ops if o._type == op] - if len(ops) <= index: - raise ValueError( - "Index '{}' of operator '{}' is incorrect.".format( - index, op - ) - ) - return PassDesc.AttrHelper( - ops[index], name, element_index=element_index - ) - - self._mapped = mapped_op if var is None else mapped_var - - class VarHelper(paddle.static.Variable): - def __init__(self, *args, **kwargs): - block = paddle.static.default_main_program().current_block() - self._var = paddle.static.data(*args, **kwargs) - self._attrs = dict() - - def __getattr__(self, name): - return getattr(self._var, name) - - def Attr(self, name): - attr = self._attrs.get(name) - if attr is None: - attr = PassDesc.AttrHelper(self, name) - self._attrs[name] = attr - return attr - - class OpHelper: - def __init__(self, type=None): - self._type = type - - def __getattr__(self, name): - op = PassDesc.OpHelper(name) - op.Init() - return op - - def __call__(self, *args, **kwargs): - if len(args) > 0: - raise ValueError( - "Each input argument needs to specify a parameter name." - ) - for (in_name, in_args) in kwargs.items(): - op_input = self._inputs.get(in_name) - if op_input is None: - raise ValueError( - "Operator '{}' does not have input named '{}'.".format( - self._type, in_name - ) - ) - if isinstance(in_args, (list, tuple)): - if len(in_args) == 0: - raise ValueError( - "Input '{}' of operator '{}' cannot be empty.".format( - in_name, self._type - ) - ) - else: - in_args = [in_args] - for in_arg in in_args: - if isinstance(in_arg, PassDesc.OpHelper): - op_outs = in_arg.Outputs() - if len(op_outs) != 1: - raise ValueError( - "The size of outputs of operator '{}' is not equal 1, please specify one output variable.".format( - in_arg._type - ) - ) - for op_out in op_outs.values(): - op_input.extend(op_out) - else: - op_input.append(in_arg) - self._desc.set_input(in_name, [i.name for i in op_input]) - block = paddle.static.default_main_program().current_block() - for out_name, op_output in self._outputs.items(): - op_output_name = unique_name.generate(self._type) - op_output.append(block.create_var(name=op_output_name)) - self._desc.set_output(out_name, [op_output_name]) - return self - - def Init(self): - block = paddle.static.default_main_program().current_block() - self._proto = OpProtoHolder.instance().op_proto_map.get(self._type) - if self._proto is None: - raise AttributeError( - "type object 'OpHelper' has no attribute '{}'".format( - self._type - ) - ) - self._index = len(block.ops) - self._desc = block.desc.append_op() - self._desc.set_type(self._type) - self._attrs = dict() - self._inputs = {i.name: list() for i in self._proto.inputs} - self._outputs = {o.name: list() for o in self._proto.outputs} - block.ops.append(self) - - def Attr(self, name): - attr = self._attrs.get(name) - if attr is None: - attr = PassDesc.AttrHelper(self, name) - self._attrs[name] = attr - return attr - - def SetAttr(self, name, value): - if isinstance(value, PassDesc.AttrHelper): - self.Attr(name)._mapped = value - else: - self._desc._set_attr(name, value) - - def Output(self, name): - output = self._outputs.get(name) - if output is None: - raise ValueError( - "Operator '{}' does not have output named '{}'.".format( - self._type, name - ) - ) - return output - - def Outputs(self): - return self._outputs - - def SetOutputs(self, **kwargs): - for param, arg in kwargs.items(): - if arg is None: - self._desc.remove_output(param) - else: - self._desc.set_output(param, [arg.name]) - - OP = OpHelper() - - -def RegisterPass(function=None, input_specs=dict()): - """ - The function decorator of Register Pass. Decorator @RegisterPass handles - the function and register it into a core.Pass instance. Use name of function - as Pass type. - - Args: - function (callable): The function with return of callable pair(s) that - represents the pattern subgraph and the replace subgraph. - input_specs (dict[str, InputSpec]): Dict of InputSpec to specific the shape/dtype - information of Tensor. Some operators limit the shape and dtype of datas when - create subgraph with Paddle APIs. So user need specify InputSpec of data to - ensure create a correctly subgraph. Of course, this argument is not limited to - matching subgraph. The default is dict(). - - Returns: - callables: Callable pair(s). - - Examples: - .. code-block:: python - - import paddle - from paddle.fluid.ir import RegisterPass - - @RegisterPass - def multi_add_to_addn(): - def pattern(x, y, z): - return paddle.add(paddle.add(x, y), z) - def replace(x, y, z): - return paddle.add_n([x, y, z]) - return pattern, replace - """ - - def _is_pass_pair(check_pair): - if isinstance(check_pair, (list, tuple)): - if len(check_pair) == 2: - if all(map(inspect.isfunction, check_pair)): - return True - return False - - def decorated(python_func): - pass_type = python_func.__name__ - signature = inspect.signature(python_func) - if len(signature.parameters) > 0: - raise NotImplementedError( - "Pass function with parameter is not supported now." - ) - elif len(signature.parameters) == 0: - pass_pairs = python_func() - if _is_pass_pair(pass_pairs): - pass_pairs = [pass_pairs] - elif not all(map(_is_pass_pair, pass_pairs)): - raise ValueError( - "Return value of Pass function must be (callable, callable)." - ) - helper = RegisterPassHelper(pass_pairs, pass_type, input_specs) - core.register_pass(pass_type, helper.SerializeMultiPassDesc) - return python_func - - if inspect.isfunction(function): - return decorated(function) - - return decorated diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py index 2025f94ffd439..2f3a2f2d771c2 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_generate_pass.py @@ -17,7 +17,8 @@ import numpy as np import paddle -from paddle.fluid import core, ir +from paddle.fluid import core +from paddle.incubate.passes import ir from paddle.static import InputSpec diff --git a/python/paddle/incubate/passes/fuse_resnet_unit_pass.py b/python/paddle/incubate/passes/fuse_resnet_unit_pass.py index 6441427f469d6..7acf28eecb334 100644 --- a/python/paddle/incubate/passes/fuse_resnet_unit_pass.py +++ b/python/paddle/incubate/passes/fuse_resnet_unit_pass.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -import paddle.fluid.ir as ir +import paddle.incubate.passes.ir as ir def set_resnet_unit_attrs(resnet_unit, has_shortcut): diff --git a/python/paddle/incubate/passes/ir.py b/python/paddle/incubate/passes/ir.py new file mode 100644 index 0000000000000..cf6568a545f39 --- /dev/null +++ b/python/paddle/incubate/passes/ir.py @@ -0,0 +1,483 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +from os import path + +import paddle +from paddle.fluid.proto import framework_pb2 + +from ...fluid import core, unique_name +from ...fluid.framework import OpProtoHolder + +try: + from paddle.fluid.proto import pass_desc_pb2 +except ModuleNotFoundError: + import sys + + fluid_path = path.dirname(__file__) + '/../../fluid' + sys.path.append(path.join(fluid_path, 'proto')) + from paddle.fluid.proto import pass_desc_pb2 + + +class RegisterPassHelper: + _register_helpers = list() + + def __init__(self, pass_pairs, pass_type=str(), input_specs=dict()): + self._pass_type = pass_type + self._pass_pairs = pass_pairs + self._input_specs = input_specs + RegisterPassHelper._register_helpers.append(self) + + def _get_args_from_func(self, func): + args = list() + arg_specs = inspect.getfullargspec(func) + for arg_name in arg_specs.args: + input_spec = self._input_specs.get(arg_name) + if isinstance(input_spec, paddle.static.InputSpec): + args.append( + PassDesc.VarHelper( + arg_name, input_spec.shape, input_spec.dtype + ) + ) + elif isinstance(input_spec, paddle.ParamAttr): + args.append(paddle.ParamAttr(arg_name)) + else: + args.append(PassDesc.VarHelper(arg_name, [-1])) + return args + + def _prune_program_desc(self, ops): + for op_desc in ops: + default_attrs = core.get_op_attrs_default_value( + op_desc.type.encode() + ) + remove_attrs = list() + for attr in op_desc.attrs: + # attr must not in + if attr.name not in [ + "op_namescope", + "op_callstack", + "op_device", + ]: + attr_list_fields = attr.ListFields() + # attr format must be: name, type, value + if len(attr_list_fields) == 3: + attr_value = attr.ListFields()[-1][-1] + default_attr_value = default_attrs.get(attr.name) + # value must not default + if default_attr_value != attr_value: + continue + remove_attrs.append(attr) + for attr in remove_attrs: + op_desc.attrs.remove(attr) + + def _func_to_program_desc(self, func, ops): + vars = list() + program = paddle.static.Program() + startup_program = paddle.static.Program() + with paddle.static.program_guard(program, startup_program): + args = self._get_args_from_func(func) + vars.extend(args) + outs = func(*args) + if not isinstance(outs, (list, tuple)): + outs = [outs] + for out in outs: + if isinstance(out, PassDesc.OpHelper): + op_outs = out.Outputs() + if len(op_outs) != 1: + raise ValueError( + "Operator '{}' has multiple outputs, please specify one output variable.".format( + out._type + ) + ) + for op_out in op_outs.values(): + vars.extend(op_out) + else: + vars.append(out) + block_desc = program.current_block().desc + for i in range(block_desc.op_size()): + ops.add().ParseFromString(block_desc.op(i).serialize_to_string()) + self._prune_program_desc(ops) + return vars, program.current_block().ops + + def _convert_vars_to_pass_desc(self, patterns, replaces, desc): + def _add_element_conditions(conditions, elements): + for element in elements: + if element._condition: + conditions.append(element._condition) + _add_element_conditions(conditions, element._elements) + + for (pattern, replace) in zip(patterns, replaces): + # Convert maps of inputs and outputs. + var_map = desc.var_maps.add() + var_map.pattern_var = pattern.name + var_map.replace_var = replace.name + conditions = desc.var_attr_conditions + # Convert shape condition. + if pattern.name in self._input_specs: + condition = conditions.add() + pattern.Attr("shape")._to_pass_desc_attr(condition.attr) + condition.condition_value.name = "" + condition.condition_value.type = framework_pb2.AttrType.LONGS + condition.condition_value.longs.extend(pattern.shape) + condition.type = pass_desc_pb2.PassDesc.ConditionType.kEQ + # Convert attr conditions. + if PassDesc.VarHelper == pattern.__class__: + for attr in pattern._attrs.values(): + _add_element_conditions(conditions, [attr]) + + def _convert_ops_to_pass_desc(self, patterns, replaces, desc): + for replace in replaces: + if isinstance(replace, PassDesc.OpHelper): + for attr in replace._attrs.values(): + # Convert attr maps. + mapped = attr._mapped + if inspect.isfunction(mapped): + mapped = mapped(patterns) + attr_map = desc.op_attr_maps.add() + mapped._to_pass_desc_attr(attr_map.pattern_attr) + attr._to_pass_desc_attr(attr_map.replace_attr) + if mapped._operation is not None: + attr_map.operation.CopyFrom(mapped._operation) + + def SerializeMultiPassDesc(self): + switch_static_mode = paddle.in_dynamic_mode() + if switch_static_mode: + paddle.enable_static() + multi_pass_desc = pass_desc_pb2.MultiPassDesc() + multi_pass_desc.pass_type = self._pass_type + # Traverse all pass pairs and convert them to PassDesc data. + # Here need to add cache in the future. + for (pattern, replace) in self._pass_pairs: + pass_desc = multi_pass_desc.pass_descs.add() + # Convert ProgramDescs of pattern and replace subgraphs. + pattern_vars, pattern_ops = self._func_to_program_desc( + pattern, pass_desc.pattern + ) + replace_vars, replace_ops = self._func_to_program_desc( + replace, pass_desc.replace + ) + self._convert_vars_to_pass_desc( + pattern_vars, replace_vars, pass_desc + ) + self._convert_ops_to_pass_desc(pattern_ops, replace_ops, pass_desc) + if switch_static_mode: + paddle.disable_static() + return multi_pass_desc.SerializeToString() + + +class PassDesc: + class AttrHelper: + def __init__(self, obj, name, element_index=None): + self._obj = obj + self._name = name + self._operation_type = None + self._element_index = element_index + self._elements = list() + self._operation = None + self._condition = None + self._mapped = None + + def __getitem__(self, index): + element = PassDesc.AttrHelper( + self._obj, self._name, element_index=index + ) + self._elements.append(element) + return element + + def _to_pass_desc_attr(self, pass_desc_attr): + if isinstance(self._obj, PassDesc.VarHelper): + pass_desc_attr.role = pass_desc_pb2.PassDesc.RoleType.kVariable + pass_desc_attr.var_name = self._obj.name + else: + pass_desc_attr.role = pass_desc_pb2.PassDesc.RoleType.kOperator + pass_desc_attr.op_index = self._obj._index + pass_desc_attr.name = self._name + if self._operation_type is not None: + pass_desc_attr.operation = self._operation_type + if self._element_index is not None: + pass_desc_attr.element_index = self._element_index + + def _to_op_desc_attr(self, value, op_desc_attr): + op_desc_attr.name = "" + if isinstance(value, int): + op_desc_attr.type = framework_pb2.AttrType.INT + op_desc_attr.i = value + else: + raise NotImplementedError("Unimplemented transform operation.") + + def _clone_with_operation(self, type, value=None): + attr = PassDesc.AttrHelper( + self._obj, self._name, self._element_index + ) + self._elements.append(attr) + if value is None: + attr._operation_type = type + return attr + operation = pass_desc_pb2.PassDesc.Operation() + operation.type = type + if isinstance(value, PassDesc.AttrHelper): + value._to_pass_desc_attr(operation.attr) + else: + self._to_op_desc_attr(value, operation.value) + attr._operation = operation + attr._operation_type = self._operation_type + return attr + + def __sub__(self, value): + return self._clone_with_operation( + pass_desc_pb2.PassDesc.OperationType.kSub, value + ) + + def __add__(self, value): + return self._clone_with_operation( + pass_desc_pb2.PassDesc.OperationType.kAdd, value + ) + + def Mod(self, value): + return self._clone_with_operation( + pass_desc_pb2.PassDesc.OperationType.kMod, value + ) + + def Size(self): + return self._clone_with_operation( + pass_desc_pb2.PassDesc.OperationType.kSize + ) + + def _set_with_condition(self, type, value): + condition = pass_desc_pb2.PassDesc.AttrCondition() + self._to_pass_desc_attr(condition.attr) + condition.type = type + if isinstance(value, PassDesc.AttrHelper): + value._to_pass_desc_attr(condition.condition_attr) + else: + self._to_op_desc_attr(value, condition.condition_value) + if self._operation: + condition.operation.CopyFrom(self._operation) + self._condition = condition + + def EQ(self, value): + self._set_with_condition( + pass_desc_pb2.PassDesc.ConditionType.kEQ, value + ) + + def MappedPattern( + self, var=None, op=None, index=0, name=None, element_index=None + ): + if all([var, op]): + raise ValueError("Only mapped one of which var or op.") + + def mapped_var(pattern_ops): + raise NotImplementedError( + "Mapping to variable is not implemented." + ) + + def mapped_op(pattern_ops): + ops = [o for o in pattern_ops if o._type == op] + if len(ops) <= index: + raise ValueError( + "Index '{}' of operator '{}' is incorrect.".format( + index, op + ) + ) + return PassDesc.AttrHelper( + ops[index], name, element_index=element_index + ) + + self._mapped = mapped_op if var is None else mapped_var + + class VarHelper(paddle.static.Variable): + def __init__(self, *args, **kwargs): + block = paddle.static.default_main_program().current_block() + self._var = paddle.static.data(*args, **kwargs) + self._attrs = dict() + + def __getattr__(self, name): + return getattr(self._var, name) + + def Attr(self, name): + attr = self._attrs.get(name) + if attr is None: + attr = PassDesc.AttrHelper(self, name) + self._attrs[name] = attr + return attr + + class OpHelper: + def __init__(self, type=None): + self._type = type + + def __getattr__(self, name): + op = PassDesc.OpHelper(name) + op.Init() + return op + + def __call__(self, *args, **kwargs): + if len(args) > 0: + raise ValueError( + "Each input argument needs to specify a parameter name." + ) + for (in_name, in_args) in kwargs.items(): + op_input = self._inputs.get(in_name) + if op_input is None: + raise ValueError( + "Operator '{}' does not have input named '{}'.".format( + self._type, in_name + ) + ) + if isinstance(in_args, (list, tuple)): + if len(in_args) == 0: + raise ValueError( + "Input '{}' of operator '{}' cannot be empty.".format( + in_name, self._type + ) + ) + else: + in_args = [in_args] + for in_arg in in_args: + if isinstance(in_arg, PassDesc.OpHelper): + op_outs = in_arg.Outputs() + if len(op_outs) != 1: + raise ValueError( + "The size of outputs of operator '{}' is not equal 1, please specify one output variable.".format( + in_arg._type + ) + ) + for op_out in op_outs.values(): + op_input.extend(op_out) + else: + op_input.append(in_arg) + self._desc.set_input(in_name, [i.name for i in op_input]) + block = paddle.static.default_main_program().current_block() + for out_name, op_output in self._outputs.items(): + op_output_name = unique_name.generate(self._type) + op_output.append(block.create_var(name=op_output_name)) + self._desc.set_output(out_name, [op_output_name]) + return self + + def Init(self): + block = paddle.static.default_main_program().current_block() + self._proto = OpProtoHolder.instance().op_proto_map.get(self._type) + if self._proto is None: + raise AttributeError( + "type object 'OpHelper' has no attribute '{}'".format( + self._type + ) + ) + self._index = len(block.ops) + self._desc = block.desc.append_op() + self._desc.set_type(self._type) + self._attrs = dict() + self._inputs = {i.name: list() for i in self._proto.inputs} + self._outputs = {o.name: list() for o in self._proto.outputs} + block.ops.append(self) + + def Attr(self, name): + attr = self._attrs.get(name) + if attr is None: + attr = PassDesc.AttrHelper(self, name) + self._attrs[name] = attr + return attr + + def SetAttr(self, name, value): + if isinstance(value, PassDesc.AttrHelper): + self.Attr(name)._mapped = value + else: + self._desc._set_attr(name, value) + + def Output(self, name): + output = self._outputs.get(name) + if output is None: + raise ValueError( + "Operator '{}' does not have output named '{}'.".format( + self._type, name + ) + ) + return output + + def Outputs(self): + return self._outputs + + def SetOutputs(self, **kwargs): + for param, arg in kwargs.items(): + if arg is None: + self._desc.remove_output(param) + else: + self._desc.set_output(param, [arg.name]) + + OP = OpHelper() + + +def RegisterPass(function=None, input_specs=dict()): + """ + The function decorator of Register Pass. Decorator @RegisterPass handles + the function and register it into a core.Pass instance. Use name of function + as Pass type. + + Args: + function (callable): The function with return of callable pair(s) that + represents the pattern subgraph and the replace subgraph. + input_specs (dict[str, InputSpec]): Dict of InputSpec to specific the shape/dtype + information of Tensor. Some operators limit the shape and dtype of datas when + create subgraph with Paddle APIs. So user need specify InputSpec of data to + ensure create a correctly subgraph. Of course, this argument is not limited to + matching subgraph. The default is dict(). + + Returns: + callables: Callable pair(s). + + Examples: + .. code-block:: python + + import paddle + from paddle.fluid.ir import RegisterPass + + @RegisterPass + def multi_add_to_addn(): + def pattern(x, y, z): + return paddle.add(paddle.add(x, y), z) + def replace(x, y, z): + return paddle.add_n([x, y, z]) + return pattern, replace + """ + + def _is_pass_pair(check_pair): + if isinstance(check_pair, (list, tuple)): + if len(check_pair) == 2: + if all(map(inspect.isfunction, check_pair)): + return True + return False + + def decorated(python_func): + pass_type = python_func.__name__ + signature = inspect.signature(python_func) + if len(signature.parameters) > 0: + raise NotImplementedError( + "Pass function with parameter is not supported now." + ) + elif len(signature.parameters) == 0: + pass_pairs = python_func() + if _is_pass_pair(pass_pairs): + pass_pairs = [pass_pairs] + elif not all(map(_is_pass_pair, pass_pairs)): + raise ValueError( + "Return value of Pass function must be (callable, callable)." + ) + helper = RegisterPassHelper(pass_pairs, pass_type, input_specs) + core.register_pass(pass_type, helper.SerializeMultiPassDesc) + return python_func + + if inspect.isfunction(function): + return decorated(function) + + return decorated From a1f28a48951f6c6541cd107382ccf08317bb4e76 Mon Sep 17 00:00:00 2001 From: Yuanle Liu Date: Tue, 31 Jan 2023 14:29:11 +0800 Subject: [PATCH 28/89] [Paddle Inference] change the default values of some gflags (#50074) --- .../fluid/inference/api/analysis_predictor.cc | 177 ++++++++++-------- 1 file changed, 100 insertions(+), 77 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 6ccad994b06a8..e89bcfa2c6a99 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -1384,13 +1385,6 @@ template <> std::unique_ptr CreatePaddlePredictor( const AnalysisConfig &config) { - // TODO(NHZlX): Should add the link to the doc of - // paddle_infer::CreatePredictor - if (config.glog_info_disabled()) { - FLAGS_logtostderr = 1; - FLAGS_minloglevel = 2; // GLOG_ERROR - } - VLOG(3) << "create AnalysisConfig"; PADDLE_ENFORCE_EQ( config.is_valid(), true, @@ -1403,83 +1397,112 @@ CreatePaddlePredictor( std::call_once(custom_operators_registered, []() { inference::RegisterAllCustomOperator(); }); - if (config.use_gpu()) { - static std::once_flag gflags_initialized; - static bool process_level_allocator_enabled; - - std::call_once(gflags_initialized, [&]() { - std::vector gflags; - PADDLE_ENFORCE_GE( - config.memory_pool_init_size_mb(), - 0.f, + auto SetGflags = [](const AnalysisConfig &config) { + auto SetGflag = [](const char *name, const char *value) { + std::string ret = ::GFLAGS_NAMESPACE::SetCommandLineOption(name, value); + PADDLE_ENFORCE_EQ( + ret.empty(), + false, platform::errors::InvalidArgument( - "The size of memory pool should be greater than 0.")); - PADDLE_ENFORCE_GE( - config.gpu_device_id(), - 0, - platform::errors::InvalidArgument( - "Invalid device id (%d). The device id should be greater than 0.", - config.gpu_device_id())); - gflags.push_back("dummy"); - - float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool(); - if (fraction_of_gpu_memory > 0.95f) { - LOG(ERROR) - << "Allocate too much memory for the GPU memory pool, assigned " - << config.memory_pool_init_size_mb() << " MB"; - LOG(ERROR) << "Try to shink the value by setting " - "AnalysisConfig::EnableGpu(...)"; - } + "Fail to set gflag: %s, please make sure the gflag exists.", + name)); + VLOG(3) << "set gflag: --" << name << "=" << value; + }; + // TODO(NHZlX): Should add the link to the doc of + // paddle_infer::CreatePredictor + if (config.glog_info_disabled()) { + FLAGS_logtostderr = 1; + FLAGS_minloglevel = 2; // GLOG_ERROR + } - if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) { - std::string flag = "--fraction_of_gpu_memory_to_use=" + - std::to_string(fraction_of_gpu_memory); - VLOG(3) << "set flag: " << flag; - gflags.push_back(flag); - } + if (config.use_gpu()) { + static std::once_flag gflags_initialized; + static bool process_level_allocator_enabled; + + std::call_once(gflags_initialized, [&]() { + PADDLE_ENFORCE_GE( + config.memory_pool_init_size_mb(), + 0.f, + platform::errors::InvalidArgument( + "The size of memory pool should be greater than 0.")); + PADDLE_ENFORCE_GE(config.gpu_device_id(), + 0, + platform::errors::InvalidArgument( + "Invalid device id (%d). The device id should be " + "greater than 0.", + config.gpu_device_id())); + + float fraction_of_gpu_memory = config.fraction_of_gpu_memory_for_pool(); + if (fraction_of_gpu_memory > 0.95f) { + LOG(ERROR) + << "Allocate too much memory for the GPU memory pool, assigned " + << config.memory_pool_init_size_mb() << " MB"; + LOG(ERROR) << "Try to shink the value by setting " + "AnalysisConfig::EnableUseGpu(...)"; + } + if (fraction_of_gpu_memory >= 0.0f || fraction_of_gpu_memory <= 0.95f) { + std::string value = std::to_string(fraction_of_gpu_memory); + SetGflag("fraction_of_gpu_memory_to_use", value.data()); + } - // TODO(Shixiaowei02): Add a mandatory scheme to use the thread local - // allocator when multi-stream is enabled. - if (config.thread_local_stream_enabled()) { - gflags.push_back("--allocator_strategy=thread_local"); - process_level_allocator_enabled = false; - } else { - process_level_allocator_enabled = true; - } + // TODO(Shixiaowei02): Add a mandatory scheme to use the thread local + // allocator when multi-stream is enabled. + if (config.thread_local_stream_enabled()) { + SetGflag("allocator_strategy", "thread_local"); + process_level_allocator_enabled = false; + } else { + process_level_allocator_enabled = true; + } - // support set flags from enviorment. - const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap(); - std::ostringstream os; - os << "--tryfromenv="; - for (auto &pair : env_map) { - os << pair.second.name << ","; - } - auto tryfromenv_str = os.str(); - gflags.push_back(os.str().substr(0, tryfromenv_str.size() - 1)); - - if (framework::InitGflags(gflags)) { - VLOG(3) << "The following gpu analysis configurations only take effect " - "for the first predictor: "; - for (size_t i = 1; i < gflags.size(); ++i) { - VLOG(3) << gflags[i]; + // for inference, the following default values are better. + if (std::getenv("FLAGS_conv_workspace_size_limit") == nullptr) { + SetGflag("conv_workspace_size_limit", "32"); } - } else { - LOG(WARNING) << "The one-time configuration of analysis predictor " - "failed, which may be due to native predictor called " - "first and its configurations taken effect."; - } - }); + if (std::getenv("FLAGS_initial_cpu_memory_in_mb") == nullptr) { + SetGflag("initial_cpu_memory_in_mb", "0"); + } + + // support set gflags from environment. + std::vector gflags; + const phi::ExportedFlagInfoMap &env_map = phi::GetExportedFlagInfoMap(); + std::ostringstream os; + for (auto &pair : env_map) { + os << pair.second.name << ","; + } + std::string tryfromenv_str = os.str(); + if (!tryfromenv_str.empty()) { + tryfromenv_str.pop_back(); + tryfromenv_str = "--tryfromenv=" + tryfromenv_str; + gflags.push_back(tryfromenv_str); + } + if (framework::InitGflags(gflags)) { + VLOG(3) + << "The following gpu analysis configurations only take effect " + "for the first predictor: "; + for (const auto &gflag : gflags) { + VLOG(3) << gflag; + } + } else { + LOG(WARNING) << "The one-time configuration of analysis predictor " + "failed, which may be due to native predictor called " + "first and its configurations taken effect."; + } + }); - if (config.thread_local_stream_enabled() && - process_level_allocator_enabled) { - PADDLE_THROW(platform::errors::Fatal( - "When binding threads and streams, the use of " - "process-level allocators will result in undefined result " - "errors due to memory asynchronous operations." - "The thread and stream binding configuration of all " - "predictors should be the same in a single process.")); + if (config.thread_local_stream_enabled() && + process_level_allocator_enabled) { + PADDLE_THROW(platform::errors::Fatal( + "When binding threads and streams, the use of " + "process-level allocators will result in undefined result " + "errors due to memory asynchronous operations." + "The thread and stream binding configuration of all " + "predictors should be the same in a single process.")); + } } - } + }; + SetGflags(config); + + VLOG(3) << "create AnalysisPredictor"; std::unique_ptr predictor(new AnalysisPredictor(config)); // Each config can only be used for one predictor. From 26bdea0fd12ce2f3b2ed8c0f104c1d8621eeda4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 14:40:01 +0800 Subject: [PATCH 29/89] fix div 0 error in floormod (#49997) * fix mod 0 error * fix div 0 error in floormod --- .../phi/kernels/funcs/elementwise_functor.h | 1 + .../unittests/test_elementwise_floormod_op.py | 35 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_elementwise_floormod_op.py diff --git a/paddle/phi/kernels/funcs/elementwise_functor.h b/paddle/phi/kernels/funcs/elementwise_functor.h index b98247fdf0c67..2636e9814dd2f 100644 --- a/paddle/phi/kernels/funcs/elementwise_functor.h +++ b/paddle/phi/kernels/funcs/elementwise_functor.h @@ -501,6 +501,7 @@ struct MinGradXYFunctor { template struct RemainderFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { + PADDLE_ENFORCE(b != 0, DIV_ERROR_INFO); T res = a % b; // Accoding to #PR26732: in dividen % divsor diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_floormod_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_floormod_op.py new file mode 100644 index 0000000000000..33e6fc2c47d45 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_elementwise_floormod_op.py @@ -0,0 +1,35 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import unittest + +import paddle +import paddle.fluid as fluid + + +class TestFloorModOp(unittest.TestCase): + def test_dygraph(self): + with fluid.dygraph.guard(fluid.CPUPlace()): + # mod by zero + x = paddle.to_tensor([59], dtype='int32') + y = paddle.to_tensor([0], dtype='int32') + try: + paddle.floor_mod(x, y) + except Exception as e: + print("Error: Mod by zero encounter in floor_mod\n") + + +if __name__ == '__main__': + unittest.main() From c64296bf36d4b3f8902b2281969b9512fb1ff472 Mon Sep 17 00:00:00 2001 From: ZZK <359521840@qq.com> Date: Tue, 31 Jan 2023 14:44:30 +0800 Subject: [PATCH 30/89] Bump Cutlass version to 2.11.0 (#50073) --- cmake/external/cutlass.cmake | 2 +- .../cutlass/{ => moe}/default_moe_fc_traits.h | 0 .../{ => moe}/linear_combination_ft_gelu.h | 0 .../fusion/cutlass/{ => moe}/moe_cutlass_kernel.h | 15 +++++++++------ .../fusion/cutlass/{ => moe}/moe_kernel_impl.h | 0 paddle/phi/kernels/fusion/cutlass/moe_kernel.cu | 10 ++++++---- 6 files changed, 16 insertions(+), 11 deletions(-) rename paddle/phi/kernels/fusion/cutlass/{ => moe}/default_moe_fc_traits.h (100%) rename paddle/phi/kernels/fusion/cutlass/{ => moe}/linear_combination_ft_gelu.h (100%) rename paddle/phi/kernels/fusion/cutlass/{ => moe}/moe_cutlass_kernel.h (98%) rename paddle/phi/kernels/fusion/cutlass/{ => moe}/moe_kernel_impl.h (100%) diff --git a/cmake/external/cutlass.cmake b/cmake/external/cutlass.cmake index c96631206dfd7..eee868900b585 100644 --- a/cmake/external/cutlass.cmake +++ b/cmake/external/cutlass.cmake @@ -17,7 +17,7 @@ include(ExternalProject) set(CUTLASS_PREFIX_DIR ${THIRD_PARTY_PATH}/cutlass) set(CUTLASS_REPOSITORY https://github.com/NVIDIA/cutlass.git) -set(CUTLASS_TAG v2.10.0) +set(CUTLASS_TAG v2.11.0) include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/") include_directories("${THIRD_PARTY_PATH}/cutlass/src/extern_cutlass/include/") diff --git a/paddle/phi/kernels/fusion/cutlass/default_moe_fc_traits.h b/paddle/phi/kernels/fusion/cutlass/moe/default_moe_fc_traits.h similarity index 100% rename from paddle/phi/kernels/fusion/cutlass/default_moe_fc_traits.h rename to paddle/phi/kernels/fusion/cutlass/moe/default_moe_fc_traits.h diff --git a/paddle/phi/kernels/fusion/cutlass/linear_combination_ft_gelu.h b/paddle/phi/kernels/fusion/cutlass/moe/linear_combination_ft_gelu.h similarity index 100% rename from paddle/phi/kernels/fusion/cutlass/linear_combination_ft_gelu.h rename to paddle/phi/kernels/fusion/cutlass/moe/linear_combination_ft_gelu.h diff --git a/paddle/phi/kernels/fusion/cutlass/moe_cutlass_kernel.h b/paddle/phi/kernels/fusion/cutlass/moe/moe_cutlass_kernel.h similarity index 98% rename from paddle/phi/kernels/fusion/cutlass/moe_cutlass_kernel.h rename to paddle/phi/kernels/fusion/cutlass/moe/moe_cutlass_kernel.h index f037f4e01b143..f0fcafba453c4 100644 --- a/paddle/phi/kernels/fusion/cutlass/moe_cutlass_kernel.h +++ b/paddle/phi/kernels/fusion/cutlass/moe/moe_cutlass_kernel.h @@ -42,6 +42,7 @@ #include "cutlass/gemm/kernel/grouped_problem_visitor.h" #include "cutlass/layout/matrix.h" #include "cutlass/trace.h" + ///////////////////////////////////////////////////////////////////////////////////////////////// namespace cutlass { @@ -350,14 +351,16 @@ template struct GemmMoeProblemVisitor - : public MoeProblemVisitor, - ThreadblockShape, - GroupScheduleMode_, - PrefetchTileCount, - ThreadCount> { + : public MoeProblemVisitor< + detail::GemmGroupedProblemSizeHelper, + ThreadblockShape, + GroupScheduleMode_, + PrefetchTileCount, + ThreadCount> { static bool const kTransposed = Transposed; - using ProblemSizeHelper = detail::GemmGroupedProblemSizeHelper; + using ProblemSizeHelper = + detail::GemmGroupedProblemSizeHelper; using Base = MoeProblemVisitor Date: Tue, 31 Jan 2023 14:51:46 +0800 Subject: [PATCH 31/89] not use shm cache default (#50089) --- paddle/phi/core/flags.cc | 4 ++-- .../fluid/dataloader/dataloader_iter.py | 21 +++++++++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/paddle/phi/core/flags.cc b/paddle/phi/core/flags.cc index 526457499c884..680661c890519 100644 --- a/paddle/phi/core/flags.cc +++ b/paddle/phi/core/flags.cc @@ -1198,11 +1198,11 @@ PADDLE_DEFINE_EXPORTED_bool(trt_ibuilder_cache, * mmap_allocator related FLAG * Name: use_shm_cache * Since Version: 2.5.0 - * Value Range: bool, default=true + * Value Range: bool, default=false * Example: * Note: . If True, mmap_allocator will cache shm file to decrease munmap * operation. */ PADDLE_DEFINE_EXPORTED_bool(use_shm_cache, - true, + false, "Use shm cache in mmap_allocator."); diff --git a/python/paddle/fluid/dataloader/dataloader_iter.py b/python/paddle/fluid/dataloader/dataloader_iter.py index c7c49c794a101..66c6dff6c1913 100644 --- a/python/paddle/fluid/dataloader/dataloader_iter.py +++ b/python/paddle/fluid/dataloader/dataloader_iter.py @@ -410,13 +410,22 @@ def __init__(self, loader): # Note(zhangbo): shm_buffer_size is used for MemoryMapAllocationPool. # MemoryMapAllocationPool is used to cache and reuse shm, thus reducing munmap in dataloader. # For more details, please see: paddle/fluid/memory/allocation/mmap_allocator.h - try: - self._worker_shm_buffer_size = (2 + 1) * len(self._dataset[0]) - except: + if os.environ.get('FLAGS_use_shm_cache', False) in [ + 1, + '1', + True, + 'True', + 'true', + ]: + try: + self._worker_shm_buffer_size = (2 + 1) * len(self._dataset[0]) + except: + self._worker_shm_buffer_size = 0 + warnings.warn( + "Setting the shm cache buffer size to 0, equivalent to not using the shm cache policy." + ) + else: self._worker_shm_buffer_size = 0 - warnings.warn( - "Setting the shm cache buffer size to 0, equivalent to not using the shm cache policy." - ) self._main_thread_shm_buffer_size = ( (self._worker_shm_buffer_size) * 2 * self._num_workers ) From 9a4acfee2fb1e90ded399511cf0f8ee1def0229f Mon Sep 17 00:00:00 2001 From: zhangkaihuo Date: Tue, 31 Jan 2023 14:56:13 +0800 Subject: [PATCH 32/89] optimize 2D sync_batch_norm (#49663) --- paddle/phi/kernels/funcs/norm_utils.cu.h | 120 ++++++++- .../phi/kernels/gpu/batch_norm_grad_kernel.cu | 229 ++++-------------- paddle/phi/kernels/gpu/batch_norm_kernel.cu | 86 ++----- .../phi/kernels/gpu/sync_batch_norm_utils.h | 206 +++++++++++++++- 4 files changed, 388 insertions(+), 253 deletions(-) diff --git a/paddle/phi/kernels/funcs/norm_utils.cu.h b/paddle/phi/kernels/funcs/norm_utils.cu.h index 0971db10529a9..80f37750adcf9 100644 --- a/paddle/phi/kernels/funcs/norm_utils.cu.h +++ b/paddle/phi/kernels/funcs/norm_utils.cu.h @@ -26,6 +26,7 @@ namespace cub = hipcub; #endif #include "paddle/phi/common/layout.h" #include "paddle/phi/kernels/funcs/math_function.h" +#include "paddle/phi/kernels/funcs/reduce_function.h" #ifdef __HIPCC__ #define LAUNCH_BOUNDS(BlockDim) __launch_bounds__(BlockDim) @@ -36,8 +37,6 @@ namespace cub = hipcub; namespace phi { namespace funcs { -using DataLayout = phi::DataLayout; - // math: dx = scale * ((x - mean) * inv_var / NxHxW * (np.mean(ddx, // axis=(n,h,w)) * // np.sum(dy, axis=(n,h,w)) - @@ -670,5 +669,122 @@ void NormDoubleGradFunctor(const DeviceContext &ctx, } } } + +template +__device__ __forceinline__ void BlockReduceByVetical(BnT x_sum, + BnT x_square_sum, + BnT *smem_sum, + BnT *smem_square_sum, + BnT *x_sum_out, + BnT *x_square_sum_out) { + int tid = threadIdx.x + threadIdx.y * blockDim.x; +#pragma unroll + for (int offset = blockDim.y / 2; offset > 0; offset >>= 1) { + if (threadIdx.y < offset * 2) { + smem_sum[tid] = x_sum; + smem_square_sum[tid] = x_square_sum; + } + __syncthreads(); + if (threadIdx.y < offset) { + int pair_tid = tid + offset * blockDim.x; + x_sum += smem_sum[pair_tid]; + x_square_sum += smem_square_sum[pair_tid]; + } + } + if (threadIdx.y == 0) { + *x_sum_out = x_sum; + *x_square_sum_out = x_square_sum; + } +} + +template +__device__ __forceinline__ void ReduceSumPost(const int C, // channels + const int c, // channel index + BnT *sum1, + BnT *sum2, + bool *is_last_block_done, + BnT *cache1, + BnT *cache2, + BnT *block_data_ptr, + int *flag_ptr) { + volatile BnT *staging_sum = block_data_ptr; + volatile BnT *staging_sum2 = &block_data_ptr[C * gridDim.y]; + // write block data to global memory + if (threadIdx.y == 0) { + staging_sum[c + blockIdx.y * C] = *sum1; + staging_sum2[c + blockIdx.y * C] = *sum2; + } + + // make sure write is visible to all blocks + __threadfence(); + __syncthreads(); + + // mark block done + if (threadIdx.x == 0 && threadIdx.y == 0) { + int old = atomicAdd(&flag_ptr[blockIdx.x], 1); + *is_last_block_done = (old == (gridDim.y - 1)); + } + + __syncthreads(); + + if (*is_last_block_done) { + *sum1 = static_cast(0); + *sum2 = static_cast(0); + // thread sum + for (int y = threadIdx.y; y < gridDim.y; y += blockDim.y) { + *sum1 += staging_sum[c + y * C]; + *sum2 += staging_sum2[c + y * C]; + } + + // vertical block sum + funcs::BlockReduceByVetical( + *sum1, *sum2, &cache1[0], &cache2[0], sum1, sum2); + } +} + +template +void SetLaunchConfigInfoForChannelLast(const Context &ctx, + DenseTensor *block_data_tensor, + DenseTensor *flag_tensor, + BnT **block_data_ptr, + int **flag_ptr, + const int N, + const int H, + const int W, + const int D, + const int C, + const int block_size, + dim3 *block, + dim3 *grid) { + const int MAX_GRID_SIZE = 128; + const int WARP_SIZE = 32; + + int block_x = std::min(phi::funcs::details::GetLastPow2(C), WARP_SIZE); + int block_y = std::min(phi::funcs::details::GetLastPow2(N * H * W * D / 16), + block_size / block_x); + if (block_x * block_y != block_size) { + block_x = + std::min(phi::funcs::details::GetLastPow2(C), block_size / block_y); + } + int grid_x = (C + block_x - 1) / block_x; + int grid_y = std::min((N * H * W * D + block_y * 16 - 1) / (block_y * 16), + MAX_GRID_SIZE); + + block->x = block_x; + block->y = block_y; + grid->x = grid_x; + grid->y = grid_y; + + if (grid->y > 1) { + *block_data_tensor = phi::Empty(ctx, {2 * C * grid->y}); + *flag_tensor = phi::Empty(ctx, {grid->x}); + + *block_data_ptr = block_data_tensor->data(); + *flag_ptr = flag_tensor->data(); + funcs::SetConstant set_zero; + set_zero(ctx, flag_tensor, static_cast(0)); + } +} + } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu index 01a7aa0162718..58d05d6075816 100644 --- a/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_grad_kernel.cu @@ -245,34 +245,6 @@ static __global__ LAUNCH_BOUNDS(BlockDim) void BNBackward( } } -template -__device__ __forceinline__ void BlockReduceByVetical( - BatchNormParamType x_sum, - BatchNormParamType x_square_sum, - BatchNormParamType *smem_sum, - BatchNormParamType *smem_square_sum, - BatchNormParamType *x_sum_out, - BatchNormParamType *x_square_sum_out) { - int tid = threadIdx.x + threadIdx.y * blockDim.x; -#pragma unroll - for (int offset = blockDim.y / 2; offset > 0; offset >>= 1) { - if (threadIdx.y < offset * 2) { - smem_sum[tid] = x_sum; - smem_square_sum[tid] = x_square_sum; - } - __syncthreads(); - if (threadIdx.y < offset) { - int pair_tid = tid + offset * blockDim.x; - x_sum += smem_sum[pair_tid]; - x_square_sum += smem_square_sum[pair_tid]; - } - } - if (threadIdx.y == 0) { - *x_sum_out = x_sum; - *x_square_sum_out = x_square_sum; - } -} - template static __global__ void BNBackward2DChannelLastStage1( const T *x, @@ -309,53 +281,25 @@ static __global__ void BNBackward2DChannelLastStage1( } // vertical block sum - BlockReduceByVetical(x_sum, - x_square_sum, - &smem_sum[0], - &smem_square_sum[0], - &x_sum, - &x_square_sum); + funcs::BlockReduceByVetical>(x_sum, + x_square_sum, + &smem_sum[0], + &smem_square_sum[0], + &x_sum, + &x_square_sum); if (gridDim.y > 1) { - volatile BatchNormParamType *staging_sum = block_data_ptr; - volatile BatchNormParamType *staging_square_sum = - &block_data_ptr[C * gridDim.y]; - // write block data to global memory - if (threadIdx.y == 0) { - staging_sum[i + blockIdx.y * C] = x_sum; - staging_square_sum[i + blockIdx.y * C] = x_square_sum; - } - - // make sure write is visible to all blocks - __threadfence(); - __syncthreads(); - __shared__ bool is_last_block_done; - // mark block done - if (threadIdx.x == 0 && threadIdx.y == 0) { - int old = atomicAdd(&flag_ptr[blockIdx.x], 1); - is_last_block_done = (old == (gridDim.y - 1)); - } - - __syncthreads(); - + funcs::ReduceSumPost>(C, + i, + &x_sum, + &x_square_sum, + &is_last_block_done, + smem_sum, + smem_square_sum, + block_data_ptr, + flag_ptr); if (is_last_block_done) { - x_sum = static_cast>(0); - x_square_sum = static_cast>(0); - // thread sum - for (int y = threadIdx.y; y < gridDim.y; y += blockDim.y) { - x_sum += staging_sum[i + y * C]; - x_square_sum += staging_square_sum[i + y * C]; - } - - // vertical block sum - BlockReduceByVetical(x_sum, - x_square_sum, - &smem_sum[0], - &smem_square_sum[0], - &x_sum, - &x_square_sum); - // final compute if (threadIdx.y == 0) { BatchNormParamType compute_mean_val = x_sum / inner_size; @@ -417,45 +361,21 @@ static __global__ void BNBackward2DChannelLastStage2( } // vertical block sum - BlockReduceByVetical( + funcs::BlockReduceByVetical>( ds_sum, db_sum, &smem_ds_sum[0], &smem_db_sum[0], &ds_sum, &db_sum); if (gridDim.y > 1) { - volatile BatchNormParamType *staging_ds_sum = block_data_ptr; - volatile BatchNormParamType *staging_db_sum = - &block_data_ptr[C * gridDim.y]; - // write block data to global memory - if (threadIdx.y == 0) { - staging_ds_sum[i + blockIdx.y * C] = ds_sum; - staging_db_sum[i + blockIdx.y * C] = db_sum; - } - - // make sure write is visible to all blocks - __threadfence(); - __syncthreads(); - __shared__ bool is_last_block_done; - // mark block done - if (threadIdx.x == 0 && threadIdx.y == 0) { - int old = atomicAdd(&flag_ptr[blockIdx.x], 1); - is_last_block_done = (old == (gridDim.y - 1)); - } - - __syncthreads(); - + funcs::ReduceSumPost>(C, + i, + &ds_sum, + &db_sum, + &is_last_block_done, + smem_ds_sum, + smem_db_sum, + block_data_ptr, + flag_ptr); if (is_last_block_done) { - ds_sum = static_cast>(0); - db_sum = static_cast>(0); - // thread sum - for (int y = threadIdx.y; y < gridDim.y; y += blockDim.y) { - ds_sum += staging_ds_sum[i + y * C]; - db_sum += staging_db_sum[i + y * C]; - } - - // vertical block sum - BlockReduceByVetical( - ds_sum, db_sum, &smem_ds_sum[0], &smem_db_sum[0], &ds_sum, &db_sum); - // final compute if (threadIdx.y == 0) { dscale[i] = ds_sum * inv_var_val; @@ -563,51 +483,6 @@ static __global__ LAUNCH_BOUNDS(BlockDim) void BNBackwardData( } } -template -void SetLaunchConfigInfoForChannelLast(const Context &ctx, - DenseTensor *block_data_tensor, - DenseTensor *flag_tensor, - BatchNormParamType **block_data_ptr, - int **flag_ptr, - const int N, - const int H, - const int W, - const int D, - const int C, - const int block_size, - dim3 *block, - dim3 *grid) { - const int MAX_GRID_SIZE = 128; - const int WARP_SIZE = 32; - - int block_x = std::min(phi::funcs::details::GetLastPow2(C), WARP_SIZE); - int block_y = std::min(phi::funcs::details::GetLastPow2(N * H * W * D / 16), - block_size / block_x); - if (block_x * block_y != block_size) { - block_x = - std::min(phi::funcs::details::GetLastPow2(C), block_size / block_y); - } - int grid_x = (C + block_x - 1) / block_x; - int grid_y = std::min((N * H * W * D + block_y * 16 - 1) / (block_y * 16), - MAX_GRID_SIZE); - - block->x = block_x; - block->y = block_y; - grid->x = grid_x; - grid->y = grid_y; - - if (grid->y > 1) { - *block_data_tensor = - phi::Empty, Context>(ctx, {2 * C * grid->y}); - *flag_tensor = phi::Empty(ctx, {grid->x}); - - *block_data_ptr = block_data_tensor->data>(); - *flag_ptr = flag_tensor->data(); - funcs::SetConstant set_zero; - set_zero(ctx, flag_tensor, static_cast(0)); - } -} - template void BatchNormGradRawKernel(const Context &ctx, const DenseTensor &x, @@ -931,19 +806,20 @@ void BatchNormGradRawKernel(const Context &ctx, BatchNormParamType *block_data_ptr = nullptr; int *flag_ptr = nullptr; - SetLaunchConfigInfoForChannelLast(ctx, - &block_data_tensor, - &flag_tensor, - &block_data_ptr, - &flag_ptr, - N, - H, - W, - D, - C, - block_size, - &block, - &grid); + funcs::SetLaunchConfigInfoForChannelLast>( + ctx, + &block_data_tensor, + &flag_tensor, + &block_data_ptr, + &flag_ptr, + N, + H, + W, + D, + C, + block_size, + &block, + &grid); // 1. reduce_sum(x) => mean, inv_var auto *mean_ptr = @@ -1294,19 +1170,20 @@ void BatchNormGradRawKernel(const Context &ctx, BatchNormParamType *block_data_ptr = nullptr; int *flag_ptr = nullptr; - SetLaunchConfigInfoForChannelLast(ctx, - &block_data_tensor, - &flag_tensor, - &block_data_ptr, - &flag_ptr, - N, - H, - W, - D, - C, - block_size, - &block, - &grid); + funcs::SetLaunchConfigInfoForChannelLast>( + ctx, + &block_data_tensor, + &flag_tensor, + &block_data_ptr, + &flag_ptr, + N, + H, + W, + D, + C, + block_size, + &block, + &grid); BNBackward2DChannelLastStage2 <<>>( transformed_d_y.template data(), diff --git a/paddle/phi/kernels/gpu/batch_norm_kernel.cu b/paddle/phi/kernels/gpu/batch_norm_kernel.cu index 60d0d1a01bb30..fc460574b74b7 100644 --- a/paddle/phi/kernels/gpu/batch_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/batch_norm_kernel.cu @@ -30,6 +30,7 @@ namespace cub = hipcub; #include "paddle/phi/kernels/batch_norm_kernel.h" #include "paddle/phi/kernels/funcs/batch_norm_utils.h" #include "paddle/phi/kernels/funcs/eigen/common.h" +#include "paddle/phi/kernels/funcs/norm_utils.cu.h" #include "paddle/phi/kernels/funcs/norm_utils.h" #include "paddle/phi/kernels/funcs/reduce_function.h" @@ -171,34 +172,6 @@ static __global__ LAUNCH_BOUNDS(BlockDim) void BNForwardTraining( } } -template -__device__ __forceinline__ void merge_block_vertical( - BatchNormParamType x_sum, - BatchNormParamType x_square_sum, - BatchNormParamType *smem_sum, - BatchNormParamType *smem_square_sum, - BatchNormParamType *x_sum_out, - BatchNormParamType *x_square_sum_out) { - int tid = threadIdx.x + threadIdx.y * blockDim.x; -#pragma unroll - for (int offset = blockDim.y / 2; offset > 0; offset >>= 1) { - if (threadIdx.y < offset * 2) { - smem_sum[tid] = x_sum; - smem_square_sum[tid] = x_square_sum; - } - __syncthreads(); - if (threadIdx.y < offset) { - int pair_tid = tid + offset * blockDim.x; - x_sum += smem_sum[pair_tid]; - x_square_sum += smem_square_sum[pair_tid]; - } - } - if (threadIdx.y == 0) { - *x_sum_out = x_sum; - *x_square_sum_out = x_square_sum; - } -} - template __device__ __forceinline__ void merge_block_horizonal( BatchNormParamType x_sum, @@ -269,53 +242,26 @@ static __global__ void BNForwardTraining2DChannelLastCompStat( } // vertical block sum - merge_block_vertical(x_sum, - x_square_sum, - &smem_sum[0], - &smem_square_sum[0], - &x_sum, - &x_square_sum); + funcs::BlockReduceByVetical>(x_sum, + x_square_sum, + &smem_sum[0], + &smem_square_sum[0], + &x_sum, + &x_square_sum); if (gridDim.y > 1) { - volatile BatchNormParamType *staging_sum = block_data_ptr; - volatile BatchNormParamType *staging_square_sum = - &block_data_ptr[C * gridDim.y]; - // write block data to global memory - if (threadIdx.y == 0) { - staging_sum[i + blockIdx.y * C] = x_sum; - staging_square_sum[i + blockIdx.y * C] = x_square_sum; - } - - // make sure write is visible to all blocks - __threadfence(); - __syncthreads(); - __shared__ bool is_last_block_done; - // mark block done - if (threadIdx.x == 0 && threadIdx.y == 0) { - int old = atomicAdd(&flag_ptr[blockIdx.x], 1); - is_last_block_done = (old == (gridDim.y - 1)); - } - - __syncthreads(); + funcs::ReduceSumPost>(C, + i, + &x_sum, + &x_square_sum, + &is_last_block_done, + smem_sum, + smem_square_sum, + block_data_ptr, + flag_ptr); if (is_last_block_done) { - x_sum = static_cast>(0); - x_square_sum = static_cast>(0); - // thread sum - for (int y = threadIdx.y; y < gridDim.y; y += blockDim.y) { - x_sum += staging_sum[i + y * C]; - x_square_sum += staging_square_sum[i + y * C]; - } - - // vertical block sum - merge_block_vertical(x_sum, - x_square_sum, - &smem_sum[0], - &smem_square_sum[0], - &x_sum, - &x_square_sum); - // final compute if (threadIdx.y == 0) { BatchNormParamType compute_mean_val = x_sum / inner_size; diff --git a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h index 81717cd445bc0..71d0ccfa0eb4b 100644 --- a/paddle/phi/kernels/gpu/sync_batch_norm_utils.h +++ b/paddle/phi/kernels/gpu/sync_batch_norm_utils.h @@ -34,6 +34,7 @@ namespace cub = hipcub; #include "paddle/fluid/platform/device/gpu/nccl_helper.h" #include "paddle/phi/backends/gpu/gpu_dnn.h" #include "paddle/phi/common/layout.h" +#include "paddle/phi/kernels/funcs/norm_utils.cu.h" #include "paddle/phi/kernels/funcs/norm_utils.h" namespace phi { @@ -168,6 +169,61 @@ __global__ void KeBackwardLocalStats(const T *dy, } } +template +__global__ void KeBackwardLocalStats2D(const T *dy, + const T *x, + const BatchNormParamType *means, + int N, + int M, + int C, + BatchNormParamType *block_data_ptr, + int *flag_ptr, + BatchNormParamType *sum_dy_prod) { + __shared__ BatchNormParamType smem_sum[BlockDim]; + __shared__ BatchNormParamType smem_square_sum[BlockDim]; + for (int k = blockIdx.x * blockDim.x + threadIdx.x; k < C; + k += gridDim.x * blockDim.x) { + BatchNormParamType sum1 = 0.; + BatchNormParamType sum2 = 0.; + auto mean = means[k]; + for (int i = blockIdx.y * blockDim.y + threadIdx.y; i < N * M; + i += gridDim.y * blockDim.y) { + int id = layout == DataLayout::kNCHW ? (i / M) * C * M + k * M + i % M + : i * C + k; + auto g = static_cast>(dy[id]); + sum1 += g; + auto x_i = static_cast>(x[id]); + sum2 += g * (x_i - mean); + } + funcs::BlockReduceByVetical>( + sum1, sum2, &smem_sum[0], &smem_square_sum[0], &sum1, &sum2); + + if (gridDim.y > 1) { + __shared__ bool is_last_block_done; + funcs::ReduceSumPost>(C, + k, + &sum1, + &sum2, + &is_last_block_done, + smem_sum, + smem_square_sum, + block_data_ptr, + flag_ptr); + if (is_last_block_done) { + // final compute + if (threadIdx.y == 0) { + sum_dy_prod[k] = sum1; + sum_dy_prod[k + C] = sum2; + } + } + } + } + if (blockIdx.y == 0 && blockIdx.x == 0 && threadIdx.y == 0 && + threadIdx.x == 0) { + sum_dy_prod[2 * C] = 1.0; + } +} + template static __global__ void KeBNBackwardScaleBias( const T *dy, @@ -213,6 +269,68 @@ static __global__ void KeBNBackwardScaleBias( } } +template +static __global__ void KeBNBackwardScaleBias2D( + const T *dy, + const T *x, + const BatchNormParamType *mean, + const BatchNormParamType *inv_variance, + const double epsilon, + const int N, + const int C, + const int HxW, + BatchNormParamType *block_data_ptr, + int *flag_ptr, + BatchNormParamType *dscale, + BatchNormParamType *dbias) { + const int outer_size = C; + const int inner_size = N * HxW; + __shared__ BatchNormParamType smem_sum[BlockDim]; + __shared__ BatchNormParamType smem_square_sum[BlockDim]; + + for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < outer_size; + i += gridDim.x * blockDim.x) { + BatchNormParamType ds_sum = 0.; + BatchNormParamType db_sum = 0.; + + auto inv_var_i = inv_variance[i]; + auto mean_i = mean[i]; + for (int j = blockIdx.y * blockDim.y + threadIdx.y; j < inner_size; + j += gridDim.y * blockDim.y) { + const int id = layout == DataLayout::kNCHW + ? ((j / HxW) * C + i) * HxW + (j % HxW) + : j * outer_size + i; + auto x_i = static_cast>(x[id]); + auto dy_i = static_cast>(dy[id]); + ds_sum += dy_i * (x_i - mean_i); + db_sum += dy_i; + } + + funcs::BlockReduceByVetical>( + ds_sum, db_sum, &smem_sum[0], &smem_square_sum[0], &ds_sum, &db_sum); + + if (gridDim.y > 1) { + __shared__ bool is_last_block_done; + funcs::ReduceSumPost>(C, + i, + &ds_sum, + &db_sum, + &is_last_block_done, + smem_sum, + smem_square_sum, + block_data_ptr, + flag_ptr); + if (is_last_block_done) { + // final compute + if (threadIdx.y == 0) { + dscale[i] = ds_sum * inv_var_i; + dbias[i] = db_sum; + } + } + } + } +} + template static __global__ void KeBNRestoreData(T *x, const BatchNormParamType *scale, @@ -410,9 +528,46 @@ void SyncBatchNormGradFunctor( <<>>( dy_d, x_d, saved_mean_ptr, N, fsize, C, stats); } else { - KeBackwardLocalStats - <<>>( - dy_d, x_d, saved_mean_ptr, N, fsize, C, stats); + if (x_dims.size() == 2 && N >= 65535) { + dim3 block; + dim3 grid; + const int block_size = 512; + + // init intermediate storage + DenseTensor block_data_tensor; + DenseTensor flag_tensor; + BatchNormParamType *block_data_ptr = nullptr; + int *flag_ptr = nullptr; + + funcs::SetLaunchConfigInfoForChannelLast>( + ctx, + &block_data_tensor, + &flag_tensor, + &block_data_ptr, + &flag_ptr, + N, + H, + W, + D, + C, + block_size, + &block, + &grid); + KeBackwardLocalStats2D + <<>>(dy_d, + x_d, + saved_mean_ptr, + N, + fsize, + C, + block_data_ptr, + flag_ptr, + stats); + } else { + KeBackwardLocalStats + <<>>( + dy_d, x_d, saved_mean_ptr, N, fsize, C, stats); + } } #if defined(PADDLE_WITH_NCCL) || defined(PADDLE_WITH_RCCL) @@ -476,8 +631,33 @@ void SyncBatchNormGradFunctor( } } else { if (d_scale && d_bias) { - KeBNBackwardScaleBias - <<>>(dy_d, + if (x_dims.size() == 2 && N >= 65535) { + dim3 block; + dim3 grid; + const int block_size = 512; + + // init intermediate storage + DenseTensor block_data_tensor; + DenseTensor flag_tensor; + BatchNormParamType *block_data_ptr = nullptr; + int *flag_ptr = nullptr; + + funcs::SetLaunchConfigInfoForChannelLast>( + ctx, + &block_data_tensor, + &flag_tensor, + &block_data_ptr, + &flag_ptr, + N, + H, + W, + D, + C, + block_size, + &block, + &grid); + KeBNBackwardScaleBias2D + <<>>(dy_d, x_d, saved_mean_ptr, saved_inv_var, @@ -485,8 +665,24 @@ void SyncBatchNormGradFunctor( N, C, fsize, + block_data_ptr, + flag_ptr, d_scale->data>(), d_bias->data>()); + } else { + KeBNBackwardScaleBias + <<>>( + dy_d, + x_d, + saved_mean_ptr, + saved_inv_var, + epsilon, + N, + C, + fsize, + d_scale->data>(), + d_bias->data>()); + } } if (d_x) { KeBNBackwardData<<>>( From 1048b166d9db56dafd93d714ad0bcf9fd92cfaeb Mon Sep 17 00:00:00 2001 From: Roc <30228238+sljlp@users.noreply.github.com> Date: Tue, 31 Jan 2023 15:01:47 +0800 Subject: [PATCH 33/89] fix send start msg (#50085) --- paddle/fluid/distributed/fleet_executor/carrier.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/paddle/fluid/distributed/fleet_executor/carrier.cc b/paddle/fluid/distributed/fleet_executor/carrier.cc index 2b75c3ba066ec..4a759646067df 100644 --- a/paddle/fluid/distributed/fleet_executor/carrier.cc +++ b/paddle/fluid/distributed/fleet_executor/carrier.cc @@ -179,6 +179,7 @@ void Carrier::Start() { "Using carrier before initialized.")); InterceptorMessage start_msg; start_msg.set_dst_id(SOURCE_ID); + start_msg.set_src_id(SOURCE_ID); start_msg.set_message_type(START); Send(start_msg); // TODO(wangxi): async step From 78ec942b7b4918ff710a13b28c79a91e7f2e5a03 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 31 Jan 2023 15:08:12 +0800 Subject: [PATCH 34/89] =?UTF-8?q?Fix=20=E7=A9=BA=E6=8C=87=E9=92=88=20(Null?= =?UTF-8?q?=20pointer)=20of=20case15:=20paddle.broadcast=5Ftensors=20(#499?= =?UTF-8?q?80)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix incorrect output shape of broadcast * add unittest --- paddle/phi/infermeta/multiary.cc | 2 +- .../tests/unittests/test_broadcast_tensors_op.py | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index ef94266b4ebe1..545b3c6f52354 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -791,7 +791,7 @@ void BroadcastTensorsInferMeta(const std::vector& x, // We performed bcast semantics check at python level // So input tensors should all have legal shape - target_dim_size = std::max(target_dim_size, dim_size); + target_dim_size = dim_size == 1 ? target_dim_size : dim_size; } target_dims[target_rank - index - 1] = target_dim_size; } diff --git a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py index 6eec711c49e0a..9879aac254fb7 100644 --- a/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py +++ b/python/paddle/fluid/tests/unittests/test_broadcast_tensors_op.py @@ -33,14 +33,12 @@ def find_output_shape(input_list): rank = len(x.shape) output_rank = max(output_rank, rank) - output_shape = [0 for i in range(output_rank)] + output_shape = [1 for i in range(output_rank)] for i in range(output_rank): for x in input_list: shape = list(reversed(x.shape)) - size = 1 - if i < len(shape): - size = shape[i] - output_shape[i] = max(output_shape[i], size) + if i < len(shape) and shape[i] != 1: + output_shape[i] = shape[i] return list(reversed(output_shape)) @@ -80,6 +78,11 @@ def gen_mixed_tensors_test(dtype): return make_inputs_outputs(input_shapes, dtype) +def gen_empty_tensors_test(dtype): + input_shapes = [(0), (0), (0)] + return make_inputs_outputs(input_shapes, dtype) + + class TestCPUBroadcastTensorsOp(OpTest): def set_place(self): self.place = core.CPUPlace() @@ -95,6 +98,7 @@ def setUp(self): gen_rank_diff_test, gen_no_broadcast_test, gen_mixed_tensors_test, + gen_empty_tensors_test, ] self.set_place() self.set_dtypes() From fce05d7d91300179320c5242d1b92ea9de418bf2 Mon Sep 17 00:00:00 2001 From: HongyuJia Date: Tue, 31 Jan 2023 15:17:52 +0800 Subject: [PATCH 35/89] update pybind11, 2.4.3->2.6.0 (#50068) --- cmake/external/pybind11.cmake | 2 +- python/paddle/fluid/dygraph/varbase_patch_methods.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cmake/external/pybind11.cmake b/cmake/external/pybind11.cmake index e236767cec156..49e3111bae9c5 100644 --- a/cmake/external/pybind11.cmake +++ b/cmake/external/pybind11.cmake @@ -16,7 +16,7 @@ include(ExternalProject) set(PYBIND_PREFIX_DIR ${THIRD_PARTY_PATH}/pybind) set(PYBIND_REPOSITORY ${GIT_URL}/pybind/pybind11.git) -set(PYBIND_TAG v2.4.3) +set(PYBIND_TAG v2.6.0) set(PYBIND_INCLUDE_DIR ${THIRD_PARTY_PATH}/pybind/src/extern_pybind/include) include_directories(${PYBIND_INCLUDE_DIR}) diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index 9f0d8297f349b..d6d45f23146f8 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -1079,7 +1079,7 @@ def __hash__(self): # NOTE(zhiqiu): pybind11 will set a default __str__ method of enum class. # So, we need to overwrite it to a more readable one. # See details in https://github.com/pybind/pybind11/issues/2537. - origin = getattr(core.VarDesc.VarType, "__repr__") + origin = getattr(core.VarDesc.VarType, "__str__") def dtype_str(dtype): if dtype in _PADDLE_DTYPE_2_NUMPY_DTYPE: @@ -1092,7 +1092,7 @@ def dtype_str(dtype): # for example, paddle.fluid.core.VarDesc.VarType.LOD_TENSOR return origin(dtype) - setattr(core.VarDesc.VarType, "__repr__", dtype_str) + setattr(core.VarDesc.VarType, "__str__", dtype_str) _already_patch_repr = True # patch math methods for varbase From 5d110365c2a05dcaf7fbe0b808e6d4c533d581e8 Mon Sep 17 00:00:00 2001 From: jameszhang Date: Tue, 31 Jan 2023 15:19:13 +0800 Subject: [PATCH 36/89] [KUNLUN] rename test_pool_max_op.py (#49945) * [KUNLUN] rename test_pool_max_op.py * update xpu toolchain --- cmake/external/xpu.cmake | 2 +- .../xpu/{test_pool_max_op.py => test_pool_max_op_xpu.py} | 0 2 files changed, 1 insertion(+), 1 deletion(-) rename python/paddle/fluid/tests/unittests/xpu/{test_pool_max_op.py => test_pool_max_op_xpu.py} (100%) diff --git a/cmake/external/xpu.cmake b/cmake/external/xpu.cmake index 4711ce83da7d5..7eee112036adc 100644 --- a/cmake/external/xpu.cmake +++ b/cmake/external/xpu.cmake @@ -7,7 +7,7 @@ set(XPU_PROJECT "extern_xpu") set(XPU_API_LIB_NAME "libxpuapi.so") set(XPU_RT_LIB_NAME "libxpurt.so") -set(XPU_BASE_DATE "20230114") +set(XPU_BASE_DATE "20230119") set(XPU_XCCL_BASE_VERSION "1.0.7") if(NOT DEFINED XPU_BASE_URL) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pool_max_op.py b/python/paddle/fluid/tests/unittests/xpu/test_pool_max_op_xpu.py similarity index 100% rename from python/paddle/fluid/tests/unittests/xpu/test_pool_max_op.py rename to python/paddle/fluid/tests/unittests/xpu/test_pool_max_op_xpu.py From 7aaaa1c61eaf2d699d5ef1e946de6566f1d92ce8 Mon Sep 17 00:00:00 2001 From: ronnywang Date: Tue, 31 Jan 2023 15:38:04 +0800 Subject: [PATCH 37/89] Add unified device management api (#48651) * [CustomDevice] add custom device api * update * update * test=document_fix * update * update * add examples --- .../platform/device_event_custom_device.cc | 2 +- paddle/fluid/pybind/CMakeLists.txt | 1 + paddle/fluid/pybind/cuda_streams_py.cc | 4 + paddle/fluid/pybind/custom_device_py.cc | 572 ++++++++++++++++++ paddle/fluid/pybind/custom_device_py.h | 28 + paddle/fluid/pybind/pybind.cc | 2 + paddle/phi/backends/CMakeLists.txt | 22 +- paddle/phi/backends/callback_manager.h | 10 - paddle/phi/backends/custom/custom_context.cc | 14 + paddle/phi/backends/custom/custom_context.h | 8 +- paddle/phi/backends/custom/custom_device.cc | 7 - paddle/phi/backends/device_base.h | 3 - paddle/phi/backends/device_guard.h | 4 - paddle/phi/backends/device_manager.cc | 5 +- paddle/phi/backends/device_manager.h | 5 +- paddle/phi/backends/event.cc | 2 +- paddle/phi/backends/event.h | 2 +- python/paddle/device/__init__.py | 480 +++++++++++++++ python/paddle/device/cuda/__init__.py | 19 + python/paddle/device/xpu/__init__.py | 7 + python/paddle/fluid/core.py | 7 + .../custom_runtime/test_custom_cpu_plugin.py | 29 + 22 files changed, 1189 insertions(+), 44 deletions(-) create mode 100644 paddle/fluid/pybind/custom_device_py.cc create mode 100644 paddle/fluid/pybind/custom_device_py.h diff --git a/paddle/fluid/platform/device_event_custom_device.cc b/paddle/fluid/platform/device_event_custom_device.cc index a45cb43baf2ec..6d284d657818a 100644 --- a/paddle/fluid/platform/device_event_custom_device.cc +++ b/paddle/fluid/platform/device_event_custom_device.cc @@ -76,7 +76,7 @@ bool DeviceEventQueryCustomDevice(const DeviceEvent* event) { void DeviceEventFinishCustomDevice(const DeviceEvent* event) { auto* wrapper = static_cast(event->GetEvent().get()); - wrapper->inner_event_->Synchonrize(); + wrapper->inner_event_->Synchronize(); } void DeviceEventCustomDeviceWaitCustomDevice(const DeviceEvent* event, diff --git a/paddle/fluid/pybind/CMakeLists.txt b/paddle/fluid/pybind/CMakeLists.txt index dba7f0d032b39..283b305d71806 100755 --- a/paddle/fluid/pybind/CMakeLists.txt +++ b/paddle/fluid/pybind/CMakeLists.txt @@ -138,6 +138,7 @@ set(PYBIND_SRCS generator_py.cc communication.cc cuda_streams_py.cc + custom_device_py.cc xpu_streams_py.cc jit.cc auto_parallel_py.cc) diff --git a/paddle/fluid/pybind/cuda_streams_py.cc b/paddle/fluid/pybind/cuda_streams_py.cc index 8898088596e71..41202daa9c521 100644 --- a/paddle/fluid/pybind/cuda_streams_py.cc +++ b/paddle/fluid/pybind/cuda_streams_py.cc @@ -243,6 +243,10 @@ void BindCudaStream(py::module *m_ptr) { print(ptr) )DOC") + .def_property_readonly("place", + [](phi::CUDAStream &self) { + return platform::CUDAPlace(self.place()); + }) #endif .def( "__init__", diff --git a/paddle/fluid/pybind/custom_device_py.cc b/paddle/fluid/pybind/custom_device_py.cc new file mode 100644 index 0000000000000..d3b4183f2f4f0 --- /dev/null +++ b/paddle/fluid/pybind/custom_device_py.cc @@ -0,0 +1,572 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/pybind/custom_device_py.h" + +#include +#include + +#include "paddle/fluid/platform/device_context.h" +#include "paddle/phi/backends/device_manager.h" +#include "paddle/phi/backends/event.h" +#include "paddle/phi/backends/stream.h" + +namespace py = pybind11; + +namespace paddle { +namespace pybind { +void BindCustomDevicePy(py::module *m_ptr) { + auto &m = *m_ptr; + // Bind Methods + m.def( + "_get_current_custom_device_stream", + [](const std::string &device_type, int device_id) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto place = paddle::platform::CustomPlace( + device_type, + device_id == -1 ? phi::DeviceManager::GetDevice(device_type) + : device_id); + + return static_cast( + paddle::platform::DeviceContextPool::Instance().Get(place)) + ->GetStream(); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit _get_current_custom_device_stream.")); +#endif + }, + py::return_value_policy::reference, + py::arg("device_type"), + py::arg("device_id") = -1); + m.def( + "_set_current_custom_device_stream", + [](const std::string &device_type, + int device_id, + std::shared_ptr stream) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto place = paddle::platform::CustomPlace( + device_type, + device_id == -1 ? phi::DeviceManager::GetDevice(device_type) + : device_id); + static_cast( + paddle::platform::DeviceContextPool::Instance().Get(place)) + ->SetStream(stream); + return stream; +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit _set_current_custom_device_stream.")); +#endif + }, + py::arg("device_type"), + py::arg("device_id") = -1, + py::arg("stream") = nullptr); + m.def("_synchronize_custom_device", + [](const std::string &device_type, int device_id) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto place = paddle::platform::CustomPlace( + device_type, + device_id == -1 ? phi::DeviceManager::GetDevice(device_type) + : device_id); + phi::DeviceManager::SynchronizeDevice(place); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit _synchronize_custom_device.")); +#endif + }); + + py::class_>( + m, "CustomDeviceStream", R"DOC( + The handle of the custom device stream. + + Parameters: + device(paddle.CustomPlace()|str): The device which wanted to allocate the stream. + + device_id(int, optional): The id of the device which wanted to allocate the stream. + If device is None or negative integer, device will be the current device. + If device is positive integer, it must less than the device count. Default: None. + + priority(int|None, optional): The priority of stream. The priority can be 1(high) or 2(normal). + If priority is None, the priority is 2(normal). Default: None. + + blocking(int|None, optional): Whether the stream is executed synchronously. Default: False. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + s3 = paddle.device.custom.Stream('custom_cpu') + s2 = paddle.device.custom.Stream('custom_cpu', 0) + s1 = paddle.device.custom.Stream(paddle.CustomPlace('custom_cpu')) + s1 = paddle.device.custom.Stream(paddle.CustomPlace('custom_cpu'), 1) + s1 = paddle.device.custom.Stream(paddle.CustomPlace('custom_cpu'), 1, True) + + )DOC") + .def( + "__init__", + [](phi::stream::Stream &self, + const platform::CustomPlace &place, + int priority, + bool blocking) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + new (&self) phi::stream::Stream(); + self.Init( + place, + static_cast(priority), + static_cast( + blocking ? phi::stream::Stream::Flag::kDefaultFlag + : phi::stream::Stream::Flag::kStreamNonBlocking)); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + py::arg("device"), + py::arg("priority") = 2, + py::arg("blocking") = false) + .def( + "__init__", + [](phi::stream::Stream &self, + const std::string &device_type, + int device_id, + int priority, + bool blocking) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + new (&self) phi::stream::Stream(); + self.Init( + phi::CustomPlace( + device_type, + device_id == -1 ? phi::DeviceManager::GetDevice(device_type) + : device_id), + static_cast(priority), + static_cast( + blocking ? phi::stream::Stream::Flag::kDefaultFlag + : phi::stream::Stream::Flag::kStreamNonBlocking)); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + py::arg("device"), + py::arg("device_id") = -1, + py::arg("priority") = 2, + py::arg("blocking") = false) + .def( + "wait_event", + [](const phi::stream::Stream &self, phi::event::Event *event) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + self.WaitEvent(event); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + R"DOC( + Makes all future work submitted to stream wait for all work captured in event. + + Parameters: + event(CustomDeviceEvent): The event to wait on. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + s = paddle.device.custom.Stream(place) + event = paddle.device.custom.Event(place) + s.wait_event(event) + + )DOC") + .def( + "wait_stream", + [](const phi::stream::Stream &self, phi::stream::Stream *other) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + phi::event::Event event; + event.Init(self.GetPlace()); + event.Record(other); + self.WaitEvent(&event); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + R"DOC( + Synchronizes with the given stream. + + Parameters: + stream(CUDAStream): The stream to synchronize with. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + s1 = paddle.device.custom.Stream(place) + s2 = paddle.device.custom.Stream(place) + s1.wait_stream(s2) + + )DOC") + .def( + "query", + [](const phi::stream::Stream &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + return self.Query(); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + R"DOC( + Return the status whether if all operations in stream have completed. + + Returns: A boolean value. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + s = paddle.device.custom.Stream(place) + is_done = s.query() + + )DOC") + .def( + "synchronize", + [](const phi::stream::Stream &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + self.Synchronize(); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + R"DOC( + Waits for stream tasks to complete. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + s = paddle.device.custom.Stream(place) + s.synchronize() + + )DOC") + .def( + "record_event", + [](const phi::stream::Stream &self, phi::event::Event *event) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + if (event == nullptr) { + event = new phi::event::Event; + event->Init(self.GetPlace()); + } + event->Record(&self); + return event; +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + R"DOC( + Record an event in the stream. + + Parameters: + event(CustomDeviceEvent, optional): The event to be record. If event is None, a new event is created. + Default: None. + + Returns: + The recored event. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + s = paddle.device.custom.Stream(place) + event = s.record_event() + + )DOC", + py::arg("event") = nullptr) + .def_property_readonly( + "raw_stream", + [](const phi::stream::Stream &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + VLOG(10) << self.raw_stream(); + return reinterpret_cast(self.raw_stream()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }, + R"DOC( + return the raw stream of type CustomDeviceStream as type int. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + import ctypes + stream = paddle.device.custom.current_stream().raw_stream + print(stream) + + ptr = ctypes.c_void_p(stream) # convert back to void* + print(ptr) + + )DOC") + .def_property_readonly("place", [](const phi::stream::Stream &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + return reinterpret_cast(self.GetPlace()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceStream.")); +#endif + }); + + py::class_>( + m, "CustomDeviceEvent", R"DOC( + The handle of the custom device event. + + Parameters: + device(paddle.CustomPlace()|str): The device which wanted to allocate the stream. + + device_id(int, optional): The id of the device which wanted to allocate the stream. + If device is None or negative integer, device will be the current device. + If device is positive integer, it must less than the device count. Default: None. + + enable_timing(bool, optional): Whether the event will measure time. Default: False. + + blocking(bool, optional): Whether the wait() func will be blocking. Default: False; + + interprocess(bool, optional): Whether the event can be shared between processes. Default: False. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + event = paddle.device.custom.Event(place) + + )DOC") + .def( + "__init__", + [](phi::event::Event &self, + const platform::CustomPlace &place, + bool enable_timing, + bool blocking, + bool interprocess) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto flag = static_cast( + static_cast( + enable_timing ? 0 + : phi::event::Event::Flag::DisableTiming) | + static_cast( + !blocking ? 0 : phi::event::Event::Flag::BlockingSync) | + static_cast( + !interprocess ? 0 : phi::event::Event::Flag::Interprocess) + + ); + new (&self) phi::event::Event(); + self.Init(place, flag); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceEvent.")); +#endif + }, + py::arg("device"), + py::arg("enable_timing") = false, + py::arg("blocking") = false, + py::arg("interprocess") = false) + .def( + "__init__", + [](phi::event::Event &self, + const std::string &device_type, + int device_id, + bool enable_timing, + bool blocking, + bool interprocess) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + auto flag = static_cast( + static_cast( + enable_timing ? 0 + : phi::event::Event::Flag::DisableTiming) | + static_cast( + !blocking ? 0 : phi::event::Event::Flag::BlockingSync) | + static_cast( + !interprocess ? 0 : phi::event::Event::Flag::Interprocess) + + ); + new (&self) phi::event::Event(); + self.Init( + phi::CustomPlace( + device_type, + device_id == -1 ? phi::DeviceManager::GetDevice(device_type) + : device_id), + flag); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceEvent.")); +#endif + }, + py::arg("device"), + py::arg("device_id") = -1, + py::arg("enable_timing") = false, + py::arg("blocking") = false, + py::arg("interprocess") = false) + .def( + "record", + [](phi::event::Event &self, phi::stream::Stream *stream) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + if (stream == nullptr) { + stream = static_cast( + paddle::platform::DeviceContextPool::Instance().Get( + self.GetPlace())) + ->GetStream() + .get(); + } + self.Record(stream); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceEvent.")); +#endif + }, + R"DOC( + Records the event in the given stream. + + Parameters: + stream(CustomDeviceStream, optional): The handle of custom device stream. If None, the stream is the current stream. Default: None. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + event = paddle.device.custom.Event(place) + event.record() + + )DOC") + .def( + "query", + [](const phi::event::Event &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + return self.Query(); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceEvent.")); +#endif + }, + R"DOC( + Queries the event's status. + + Returns: A boolean which indicates all work currently captured by the event has been completed. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + event = paddle.device.cuda.Event(place) + is_done = event.query() + + )DOC") + .def( + "synchronize", + [](const phi::event::Event &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + self.Synchronize(); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceEvent.")); +#endif + }, + R"DOC( + Waits for an event to complete. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + place = paddle.CustomPlace('custom_cpu', 0) + event = paddle.device.custom.Event(place) + event.synchronize() + + )DOC") + .def_property_readonly( + "raw_event", + [](const phi::event::Event &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + VLOG(10) << self.raw_event(); + return reinterpret_cast(self.raw_event()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceEvent.")); +#endif + }, + R"DOC( + return the raw event of type CustomDeviceEvent as type int. + + Examples: + .. code-block:: python + + # required: custom_device + import paddle + import ctypes + place = paddle.CustomPlace('custom_cpu', 0) + event = paddle.device.custom.Event(place) + raw_event = event.raw_event + print(raw_event) + + ptr = ctypes.c_void_p(raw_event) # convert back to void* + print(ptr) + + )DOC") + .def_property_readonly("place", [](const phi::event::Event &self) { +#ifdef PADDLE_WITH_CUSTOM_DEVICE + return reinterpret_cast(self.GetPlace()); +#else + PADDLE_THROW(platform::errors::Unavailable( + "Paddle is not compiled with CustomDevice. " + "Cannot visit CustomDeviceEvent.")); +#endif + }); +} +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/custom_device_py.h b/paddle/fluid/pybind/custom_device_py.h new file mode 100644 index 0000000000000..26aed199bc729 --- /dev/null +++ b/paddle/fluid/pybind/custom_device_py.h @@ -0,0 +1,28 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "pybind11/pybind11.h" +#include "pybind11/stl.h" + +namespace py = pybind11; + +namespace paddle { +namespace pybind { + +void BindCustomDevicePy(py::module* m); + +} // namespace pybind +} // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index d2f622537216b..36e2436406812 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -88,6 +88,7 @@ limitations under the License. */ #include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/platform/profiler/profiler.h" #include "paddle/fluid/pybind/cuda_streams_py.h" +#include "paddle/fluid/pybind/custom_device_py.h" #include "paddle/fluid/pybind/distributed_py.h" #include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/pybind/imperative.h" @@ -629,6 +630,7 @@ PYBIND11_MODULE(libpaddle, m) { BindCudaStream(&m); BindXpuStream(&m); BindJit(&m); + BindCustomDevicePy(&m); // Not used, just make sure cpu_info.cc is linked. phi::backends::cpu::CpuTotalPhysicalMemory(); diff --git a/paddle/phi/backends/CMakeLists.txt b/paddle/phi/backends/CMakeLists.txt index cfb55565a2eb3..3b2314b0963cf 100644 --- a/paddle/phi/backends/CMakeLists.txt +++ b/paddle/phi/backends/CMakeLists.txt @@ -36,18 +36,18 @@ if(WITH_MKLDNN) list(APPEND BACKENDS_DEPS mkldnn) endif() +list( + APPEND + BACKENDS_SRCS + callback_manager.cc + device_guard.cc + stream.cc + event.cc + device_base.cc + device_manager.cc) + if(WITH_CUSTOM_DEVICE) - list( - APPEND - BACKENDS_SRCS - callback_manager.cc - device_guard.cc - stream.cc - event.cc - device_base.cc - device_manager.cc - custom/custom_context.cc - custom/custom_device.cc) + list(APPEND BACKENDS_SRCS custom/custom_context.cc custom/custom_device.cc) endif() add_library(phi_backends "${BACKENDS_SRCS}") diff --git a/paddle/phi/backends/callback_manager.h b/paddle/phi/backends/callback_manager.h index 2bb26745288df..1a5c201620a49 100644 --- a/paddle/phi/backends/callback_manager.h +++ b/paddle/phi/backends/callback_manager.h @@ -13,16 +13,6 @@ // limitations under the License. #pragma once - -#ifdef PADDLE_WITH_CUDA -#include -#include -#endif - -#ifdef PADDLE_WITH_HIP -#include -#endif - #include #include // NOLINT #include diff --git a/paddle/phi/backends/custom/custom_context.cc b/paddle/phi/backends/custom/custom_context.cc index e34e0f94b7067..14c2afe3950ba 100644 --- a/paddle/phi/backends/custom/custom_context.cc +++ b/paddle/phi/backends/custom/custom_context.cc @@ -36,6 +36,12 @@ struct CustomContext::Impl { return reinterpret_cast(stream_->raw_stream()); } + std::shared_ptr GetStream() const { return stream_; } + + void SetStream(std::shared_ptr stream) { + stream_ = stream; + } + void Wait() const { stream_->Wait(); } Place place_; @@ -49,6 +55,14 @@ const Place& CustomContext::GetPlace() const { return impl_->GetPlace(); } void* CustomContext::stream() const { return impl_->stream(); } +std::shared_ptr CustomContext::GetStream() const { + return impl_->GetStream(); +} + +void CustomContext::SetStream(std::shared_ptr stream) { + impl_->SetStream(stream); +} + void CustomContext::Wait() const { return impl_->Wait(); } CustomContext::CustomContext(const CustomPlace& place) diff --git a/paddle/phi/backends/custom/custom_context.h b/paddle/phi/backends/custom/custom_context.h index d007cb62cd4f9..18d0dfedb2188 100644 --- a/paddle/phi/backends/custom/custom_context.h +++ b/paddle/phi/backends/custom/custom_context.h @@ -16,6 +16,7 @@ limitations under the License. */ #include +#include "paddle/phi/backends/stream.h" #include "paddle/phi/common/place.h" #include "paddle/phi/core/device_context.h" @@ -30,9 +31,14 @@ class CustomContext : public DeviceContext, const Place& GetPlace() const override; - /*! \brief Return stream in the device context. */ + /*! \brief Return raw stream in the device context. */ void* stream() const; + /*! \brief Return stream in the device context. */ + std::shared_ptr GetStream() const; + + void SetStream(std::shared_ptr stream); + // Wait for all operations completion in the stream. void Wait() const override; diff --git a/paddle/phi/backends/custom/custom_device.cc b/paddle/phi/backends/custom/custom_device.cc index 2c986df278173..c0e28a90e9ff3 100644 --- a/paddle/phi/backends/custom/custom_device.cc +++ b/paddle/phi/backends/custom/custom_device.cc @@ -146,13 +146,6 @@ class CustomDevice : public DeviceInterface { stream::Stream::Priority::kNormal, const stream::Stream::Flag& flag = stream::Stream::Flag::kDefaultFlag) override { - if (priority != stream::Stream::Priority::kNormal || - flag != stream::Stream::Flag::kDefaultFlag) { - PADDLE_THROW(phi::errors::Unavailable( - "priority != stream::Stream::Priority::kNormal || flag != " - "stream::Stream::Flag::kDefaultFlag is not allowed on " - "CustomDevice.")); - } const auto device = &devices_pool[dev_id]; C_Stream c_stream; PADDLE_ENFORCE_CUSTOM_DEVICE_SUCCESS( diff --git a/paddle/phi/backends/device_base.h b/paddle/phi/backends/device_base.h index 7030777474d5a..893aa39d8a51b 100644 --- a/paddle/phi/backends/device_base.h +++ b/paddle/phi/backends/device_base.h @@ -13,7 +13,6 @@ // limitations under the License. #pragma once -#ifdef PADDLE_WITH_CUSTOM_DEVICE #include #include "paddle/phi/backends/c_comm_lib.h" @@ -275,5 +274,3 @@ class DeviceInterface { // Driver / Runtime }; } // namespace phi - -#endif diff --git a/paddle/phi/backends/device_guard.h b/paddle/phi/backends/device_guard.h index 668951f8a1c98..eb14236d251b3 100644 --- a/paddle/phi/backends/device_guard.h +++ b/paddle/phi/backends/device_guard.h @@ -13,8 +13,6 @@ // limitations under the License. #pragma once -#ifdef PADDLE_WITH_CUSTOM_DEVICE - #include "paddle/phi/backends/device_manager.h" namespace phi { @@ -46,5 +44,3 @@ class DeviceGuard { }; } // namespace phi - -#endif diff --git a/paddle/phi/backends/device_manager.cc b/paddle/phi/backends/device_manager.cc index 2bb57ab8fe6ea..69c2d9d088cfe 100644 --- a/paddle/phi/backends/device_manager.cc +++ b/paddle/phi/backends/device_manager.cc @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -#ifdef PADDLE_WITH_CUSTOM_DEVICE #include "paddle/phi/backends/device_manager.h" #include "paddle/phi/common/complex.h" @@ -663,6 +662,8 @@ std::vector ListAllLibraries(const std::string& library_dir) { std::vector libraries; std::regex express(".*\\.so"); std::match_results results; + +#if !defined(_WIN32) DIR* dir = nullptr; dirent* ptr = nullptr; @@ -680,9 +681,9 @@ std::vector ListAllLibraries(const std::string& library_dir) { } closedir(dir); } +#endif return libraries; } } // namespace phi -#endif diff --git a/paddle/phi/backends/device_manager.h b/paddle/phi/backends/device_manager.h index 130f8fab449ac..990157da0f462 100644 --- a/paddle/phi/backends/device_manager.h +++ b/paddle/phi/backends/device_manager.h @@ -13,7 +13,6 @@ // limitations under the License. #pragma once -#ifdef PADDLE_WITH_CUSTOM_DEVICE #include @@ -285,12 +284,14 @@ class DeviceManager { std::vector ListAllLibraries(const std::string& library_dir); +#ifdef PADDLE_WITH_CUSTOM_DEVICE void LoadCustomRuntimeLib(const std::string& dso_lib_path, void* dso_handle); void LoadCustomRuntimeLib(const CustomRuntimeParams& runtime_params, std::unique_ptr device_interface, const std::string& dso_lib_path, void* dso_handle); +#endif class Registrar { public: @@ -303,5 +304,3 @@ class Registrar { }; } // namespace phi - -#endif diff --git a/paddle/phi/backends/event.cc b/paddle/phi/backends/event.cc index b594d919abc18..7d87318cfec55 100644 --- a/paddle/phi/backends/event.cc +++ b/paddle/phi/backends/event.cc @@ -59,7 +59,7 @@ void Event::Record(const stream::Stream* stream) { stream->RecordEvent(this); } bool Event::Query() const { return device_->QueryEvent(this); } -void Event::Synchonrize() const { device_->SynchronizeEvent(this); } +void Event::Synchronize() const { device_->SynchronizeEvent(this); } const Place& Event::GetPlace() const { return place_; } diff --git a/paddle/phi/backends/event.h b/paddle/phi/backends/event.h index 8de223528f8fd..a58083ff2898f 100644 --- a/paddle/phi/backends/event.h +++ b/paddle/phi/backends/event.h @@ -46,7 +46,7 @@ class Event { void Destroy(); void Record(const stream::Stream* stream); bool Query() const; - void Synchonrize() const; + void Synchronize() const; const Place& GetPlace() const; private: diff --git a/python/paddle/device/__init__.py b/python/paddle/device/__init__.py index 2751009dd3090..defb6321847c2 100644 --- a/python/paddle/device/__init__.py +++ b/python/paddle/device/__init__.py @@ -15,6 +15,8 @@ # TODO: define the functions to manipulate devices import re import os +import ctypes +import paddle from paddle.fluid import core from paddle.fluid import framework from paddle.fluid.dygraph.parallel import ParallelEnv @@ -43,6 +45,12 @@ 'get_all_custom_device_type', 'get_available_device', 'get_available_custom_device', + 'Stream', + 'Event', + 'current_stream', + 'set_stream', + 'stream_guard', + 'synchronize', ] _cudnn_version = None @@ -514,3 +522,475 @@ def get_available_custom_device(): # Output: ['CustomCPU', 'CustomGPU:0', 'CustomGPU:1'] """ return core.get_available_custom_device() + + +class Event(object): + ''' + A device event wrapper around StreamBase. + Parameters: + device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. + It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + enable_timing (bool, optional): indicates if the event should measure time, default is False + blocking (bool, optional): if True, ``wait`` will be blocking, default is False + interprocess (bool): if True, the event can be shared between processes, default is False + Returns: + Event: The event. + Examples: + .. code-block:: python + # required: custom_device + import paddle + e1 = paddle.device.Event() + e2 = paddle.device.Event('custom_cpu') + e3 = paddle.device.Event('custom_cpu:0') + e4 = paddle.device.Event(paddle.CustomPlace('custom_cpu', 0)) + ''' + + def __init__( + self, + device=None, + enable_timing=False, + blocking=False, + interprocess=False, + ): + if device is None: + self.device = paddle.framework._current_expected_place() + elif isinstance(device, str): + self.device = paddle.device._convert_to_place(device) + else: + self.device = device + + if paddle.is_compiled_with_cuda() and isinstance( + self.device, paddle.CUDAPlace + ): + self.event_base = core.CUDAEvent( + enable_timing, blocking, interprocess + ) + elif isinstance(self.device, paddle.CustomPlace): + self.event_base = core.CustomDeviceEvent( + self.device.get_device_type(), + self.device.get_device_id(), + enable_timing, + blocking, + interprocess, + ) + else: + raise TypeError( + "device should be gpu, xpu, {}".format( + ",".join(paddle.device.get_all_custom_device_type()) + ) + ) + + def record(self, stream=None): + ''' + Records the event in a given stream. + Parameters: + stream(Stream, optional): The given stream. By default, stream is None, + event will be recorded in current_stream. + Returns: + None. + Examples: + .. code-block:: python + # required: custom_device + import paddle + e = paddle.device.Event() + e.record() + + s = paddle.device.Stream() + e.record(s) + ''' + if stream is None: + stream = current_stream(self.device) + + self.event_base.record(stream.stream_base) + + def query(self): + ''' + Checks if all work currently captured by event has completed. + Returns: + bool: Whether all work currently captured by event has completed. + Examples: + .. code-block:: python + # required: custom_device + import paddle + e = paddle.device.Event() + e.query() + ''' + return self.event_base.query() + + def elapsed_time(self, end_event): + ''' + Returns the time elapsed in milliseconds after the event was + recorded and before the end_event was recorded. + Returns: + int: The time. + Examples: + .. code-block:: python + # required: custom_device + import paddle + e1 = paddle.device.Event() + e2 = paddle.device.Event() + e1.elapsed_time(e2) + ''' + return 0 + + def synchronize(self): + ''' + Waits for the event to complete. + Waits until the completion of all work currently captured in this event. + This prevents the CPU thread from proceeding until the event completes. + Returns: + None. + Examples: + .. code-block:: python + # required: custom_device + import paddle + e = paddle.device.Event() + e.synchronize() + ''' + self.event_base.synchronize() + + def __repr__(self): + return self.event_base + + +class Stream(object): + ''' + A device stream wrapper around StreamBase. + Parameters: + device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): Which device the stream runn on. If device is None, the device is the current device. Default: None. + It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + priority(int, optional): priority of the CUDA stream. Can be either + 1 (high priority) or 2 (low priority). By default, streams have + priority 2. + Returns: + Stream: The stream. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s1 = paddle.device.Stream() + s2 = paddle.device.Stream('custom_cpu') + s3 = paddle.device.Stream('custom_cpu:0') + s4 = paddle.device.Stream(paddle.CustomPlace('custom_cpu', 0)) + ''' + + def __init__(self, device=None, priority=2, stream_base=None): + if stream_base is not None: + if isinstance( + stream_base, (core.CUDAStream, core.CustomDeviceStream) + ): + self.stream_base = stream_base + self.device = stream_base.place + else: + raise TypeError( + "stream_base should be CUDAStream, CustomDeviceStream" + ) + return + + if device is None: + self.device = paddle.framework._current_expected_place() + elif isinstance(device, str): + self.device = paddle.device._convert_to_place(device) + else: + self.device = device + + if paddle.is_compiled_with_cuda() and isinstance( + self.device, paddle.CUDAPlace + ): + self.stream_base = core.CUDAStream( + self.device.get_device_id(), priority + ) + elif isinstance(self.device, paddle.CustomPlace): + self.stream_base = core.CustomDeviceStream( + self.device.get_device_type(), + self.device.get_device_id(), + priority, + blocking=False, + ) + else: + raise TypeError( + "device should be gpu, xpu, {}".format( + ",".join(paddle.device.get_all_custom_device_type()) + ) + ) + + def wait_event(self, event): + ''' + Makes all future work submitted to the stream wait for an event. + Parameters: + event (Event): an event to wait for. + Returns: + None. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s = paddle.device.Stream() + e = paddle.device.Event() + s.wait_event(e) + ''' + self.stream_base.wait_event(event.event_base) + + def wait_stream(self, stream): + ''' + Synchronizes with another stream. + All future work submitted to this stream will wait until all kernels + submitted to a given stream at the time of call complete. + Parameters: + stream (Stream): a stream to synchronize. + Returns: + None. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s1 = paddle.device.Stream() + s2 = paddle.device.Stream() + s1.wait_stream(s2) + ''' + self.stream_base.wait_stream(stream.stream_base) + + def record_event(self, event=None): + ''' + Records an event. + Parameters: + event (Event, optional): event to record. If not given, a new one + will be allocated. + Returns: + Event: Recorded event. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s = paddle.device.Stream() + e1 = s.record_event() + + e2 = paddle.device.Event() + s.record_event(e2) + ''' + if event is None: + event = Event(self.device) + event.record(self) + return event + + def query(self): + ''' + Checks if all the work submitted has been completed. + Returns: + bool: Whether all kernels in this stream are completed. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s = paddle.device.Stream() + s.query() + ''' + return self.stream_base.query() + + def synchronize(self): + ''' + Wait for all the kernels in this stream to complete. + Returns: + None. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s = paddle.device.Stream() + s.synchronize() + ''' + self.stream_base.synchronize() + + @property + def _as_parameter_(self): + if isinstance(self.stream_base, core.CUDAStream): + return ctypes.c_void_p(self.stream_base.cuda_stream) + else: + return ctypes.c_void_p(self.stream_base.raw_stream) + + def __eq__(self, o): + if isinstance(o, Stream): + return super(Stream, self).__eq__(o) + return False + + def __hash__(self): + return hash((self.stream_base, self.device)) + + def __repr__(self): + return ''.format( + self.device, self._as_parameter_.value + ) + + +def current_stream(device=None): + ''' + Return the current stream by the device. + Parameters: + device(str|paddle.CUDAPlace(n)|paddle.CustomPlace(n)): The device which want to get stream from. If device is None, the device is the current device. Default: None. + It can be ``gpu``, ``gpu:x``,``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + where ``x`` is the index of the GPUs, CustomDevicecs. And it can be paddle.CUDAPlace(n) or paddle.CustomPlace(n). + Returns: + Stream: The stream to the device. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s1 = paddle.device.current_stream() + s2 = paddle.device.current_stream("gpu:0") + place = paddle.CustomPlace('custom_cpu', 0) + s3 = paddle.device.current_stream(place) + ''' + if device is None: + place = paddle.framework._current_expected_place() + elif isinstance(device, str): + place = paddle.device._convert_to_place(device) + else: + place = device + + if paddle.is_compiled_with_cuda() and isinstance(place, paddle.CUDAPlace): + return Stream( + stream_base=core._get_current_stream(place.get_device_id()) + ) + elif isinstance(place, paddle.CustomPlace): + return Stream( + stream_base=core._get_current_custom_device_stream( + place.get_device_type(), place.get_device_id() + ) + ) + else: + raise TypeError( + "device should be gpu, xpu, {}".format( + ",".join(paddle.device.get_all_custom_device_type()) + ) + ) + + +def set_stream(stream): + ''' + Set the current stream. + Parameters: + stream(Stream): The selected stream. + Returns: + Stream: The previous stream. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s = paddle.device.Stream() + paddle.device.set_stream(s) + ''' + + prev_stream = current_stream(stream.stream_base.place) + + if paddle.is_compiled_with_cuda() and isinstance( + stream.stream_base.place, paddle.CUDAPlace + ): + core._set_current_stream(stream.stream_base) + elif isinstance(stream.stream_base.place, paddle.CustomPlace): + core._set_current_custom_device_stream( + stream.stream_base.place.get_device_type(), + stream.stream_base.place.get_device_id(), + stream.stream_base, + ) + else: + raise TypeError( + "device should be gpu, xpu, {}".format( + ",".join(paddle.device.get_all_custom_device_type()) + ) + ) + + return prev_stream + + +class stream_guard(object): + ''' + Notes: + This API only supports dynamic graph mode currently. + A context manager that specifies the current stream context by the given stream. + Parameters: + stream(Stream, optional): the selected stream. If stream is None, just yield. + Returns: + None. + Examples: + .. code-block:: python + # required: custom_device + import paddle + s = paddle.device.Stream() + data1 = paddle.ones(shape=[20]) + data2 = paddle.ones(shape=[20]) + data3 = data1 + data2 + with paddle.device.stream_guard(s): + s.wait_stream(paddle.device.default_stream()) + data4 = data1 + data3 + ''' + + def __init__(self, stream=None): + self.stream = stream + + def __enter__(self): + cur_stream = self.stream + if cur_stream is None: + return + + self.src_prev_stream = current_stream(cur_stream.device) + if self.src_prev_stream.device != cur_stream.device: + self.tmp_place = paddle.fluid.framework._current_expected_place() + paddle.fluid.framework._set_expected_place(cur_stream.device) + self.dst_prev_stream = current_stream(cur_stream.device) + set_stream(cur_stream) + else: + set_stream(cur_stream) + + def __exit__(self, *args): + cur_stream = self.stream + if cur_stream is None: + return + + if self.src_prev_stream.device != cur_stream.device: + set_stream(self.dst_prev_stream) + paddle.fluid.framework._set_expected_place(self.tmp_place) + set_stream(self.src_prev_stream) + else: + set_stream(self.src_prev_stream) + + +def synchronize(device=None): + ''' + Wait for the compute on the given device to finish. + Parameters: + device(str|paddle.CUDAPlace(n)|paddle.XPUPlace(n)|paddle.CustomPlace(n)): The device which want to wait for. If device is None, the device is the current device. Default: None. + It can be ``gpu``, ``gpu:x``, ``xpu``, ``xpu:x``, ``custom_device``, ``custom_device:x``, where ``custom_device`` is the name of CustomDevicec, + where ``x`` is the index of the GPUs, XPUs, NPUs or MLUs. And it can be paddle.CUDAPlace(n) or paddle.XPUPlace(n) or paddle.CustomPlace(n). + Examples: + .. code-block:: python + # required: custom_device + import paddle + paddle.device.synchronize() + paddle.device.synchronize("gpu:0") + place = paddle.CustomPlace('custom_cpu', 0) + paddle.device.synchronize(place) + ''' + + if device is None: + place = paddle.framework._current_expected_place() + elif isinstance(device, str): + place = paddle.device._convert_to_place(device) + else: + place = device + + if paddle.is_compiled_with_cuda() and isinstance(place, paddle.CUDAPlace): + core._device_synchronize(place.get_device_id()) + elif paddle.is_compiled_with_xpu() and isinstance(place, paddle.XPUPlace): + core._xpu_device_synchronize(place.get_device_id()) + elif isinstance(place, paddle.CustomPlace): + core._synchronize_custom_device( + place.get_device_type(), place.get_device_id() + ) + else: + raise TypeError( + "device should be gpu, xpu, {}".format( + ",".join(paddle.device.get_all_custom_device_type()) + ) + ) diff --git a/python/paddle/device/cuda/__init__.py b/python/paddle/device/cuda/__init__.py index 188e334c2f1ba..fed666e2e8e08 100644 --- a/python/paddle/device/cuda/__init__.py +++ b/python/paddle/device/cuda/__init__.py @@ -15,6 +15,7 @@ import paddle from paddle.fluid import core from paddle.fluid.wrapped_decorator import signature_safe_contextmanager +from paddle.utils import deprecated from .streams import Stream # noqa: F401 from .streams import Event # noqa: F401 @@ -37,6 +38,12 @@ ] +@deprecated( + since="2.5.0", + update_to="paddle.device.current_stream", + level=1, + reason="current_stream in paddle.device.cuda will be removed in future", +) def current_stream(device=None): ''' Return the current CUDA stream by the device. @@ -75,6 +82,12 @@ def current_stream(device=None): return core._get_current_stream(device_id) +@deprecated( + since="2.5.0", + update_to="paddle.device.synchronize", + level=1, + reason="synchronize in paddle.device.cuda will be removed in future", +) def synchronize(device=None): ''' Wait for the compute on the given CUDA device to finish. @@ -352,6 +365,12 @@ def _set_current_stream(stream): return core._set_current_stream(stream) +@deprecated( + since="2.5.0", + update_to="paddle.device.stream_guard", + level=1, + reason="stream_guard in paddle.device.cuda will be removed in future", +) @signature_safe_contextmanager def stream_guard(stream): ''' diff --git a/python/paddle/device/xpu/__init__.py b/python/paddle/device/xpu/__init__.py index a928a0f7c0405..832c1baa63153 100644 --- a/python/paddle/device/xpu/__init__.py +++ b/python/paddle/device/xpu/__init__.py @@ -14,12 +14,19 @@ import paddle from paddle.fluid import core +from paddle.utils import deprecated __all__ = [ 'synchronize', ] +@deprecated( + since="2.5.0", + update_to="paddle.device.synchronize", + level=1, + reason="synchronize in paddle.device.xpu will be removed in future", +) def synchronize(device=None): ''' Wait for the compute on the given XPU device to finish. diff --git a/python/paddle/fluid/core.py b/python/paddle/fluid/core.py index b17c29a97868a..9aaf0f684f1e7 100644 --- a/python/paddle/fluid/core.py +++ b/python/paddle/fluid/core.py @@ -314,6 +314,13 @@ def to_list(s): from .libpaddle import _is_fwd_prim_enabled from .libpaddle import __set_all_prim_enabled + # custom devivce + from .libpaddle import _get_current_custom_device_stream + from .libpaddle import _set_current_custom_device_stream + from .libpaddle import _synchronize_custom_device + from .libpaddle import CustomDeviceStream + from .libpaddle import CustomDeviceEvent + if sys.platform != 'win32': from .libpaddle import _set_process_pids from .libpaddle import _erase_process_pids diff --git a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py index 3139f13127d6a..80b91ca8f0133 100755 --- a/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py +++ b/python/paddle/fluid/tests/custom_runtime/test_custom_cpu_plugin.py @@ -56,6 +56,7 @@ def test_custom_device(self): self._test_eager_copy_to() self._test_fallback_kernel() self._test_scalar() + self._test_custom_device_py_api() def _test_custom_device_dataloader(self): import paddle @@ -257,6 +258,34 @@ def forward(self, inputs, label=None): avg_loss.backward() sgd.step() + def _test_custom_device_py_api(self): + import paddle + + p = paddle.set_device('custom_cpu') + paddle.device.synchronize('custom_cpu') + + s1 = paddle.device.Stream() + s2 = paddle.device.Stream(p) + + s1 = paddle.device.current_stream() + s2 = paddle.device.current_stream(p) + + e1 = paddle.device.Event() + e2 = paddle.device.Event(p) + + s = paddle.device.Stream() + e = paddle.device.Event() + s.query() + s.synchronize() + s.wait_event(e) + s.record_event(e) + s.wait_stream(s) + paddle.device.set_stream(s) + + e.query() + e.synchronize() + e.record(s) + if __name__ == '__main__': if os.name == 'nt' or sys.platform.startswith('darwin'): From 96a0ce60d215b8525f63a87594ea1080bc27f174 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Tue, 31 Jan 2023 15:42:18 +0800 Subject: [PATCH 38/89] fix div 0 error of NoamDecay (#49953) * fix div 0 error of NoamDecay * add unittest * Update lr.py --- .../tests/unittests/test_noamdecay_op.py | 34 +++++++++++++++++++ python/paddle/optimizer/lr.py | 3 ++ 2 files changed, 37 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_noamdecay_op.py diff --git a/python/paddle/fluid/tests/unittests/test_noamdecay_op.py b/python/paddle/fluid/tests/unittests/test_noamdecay_op.py new file mode 100644 index 0000000000000..62312c7a8b9f0 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_noamdecay_op.py @@ -0,0 +1,34 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle + + +class TestSparseEmbeddingAPIError(unittest.TestCase): + def test_errors(self): + with paddle.fluid.dygraph.guard(): + # The size of input in sparse_embedding should not be 0. + def test_0_d_model(): + schedular = paddle.optimizer.lr.NoamDecay( + d_model=0, warmup_steps=0 + ) + + self.assertRaises(ValueError, test_0_d_model) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() diff --git a/python/paddle/optimizer/lr.py b/python/paddle/optimizer/lr.py index 07420be8915d3..bc5f9020b7f30 100644 --- a/python/paddle/optimizer/lr.py +++ b/python/paddle/optimizer/lr.py @@ -296,6 +296,9 @@ def __init__( last_epoch=-1, verbose=False, ): + if d_model <= 0: + raise ValueError("d_model should be grater than 0") + self.d_model = d_model self.warmup_steps = warmup_steps super().__init__(learning_rate, last_epoch, verbose) From 86a238186d39286a9ee5af6d2defb0ab80f324c1 Mon Sep 17 00:00:00 2001 From: PuQing Date: Tue, 31 Jan 2023 15:46:18 +0800 Subject: [PATCH 39/89] [Numpy] Add FP16 dtype for CastNumpy2Scalar (#50002) * add FP16 dtype for CastNumpy2Scalar * fix throw message * add test * fix SyntaxWarning * test skip for float16 * fix dtype mistakes --- paddle/fluid/pybind/eager_utils.cc | 5 ++++- paddle/fluid/pybind/op_function_common.cc | 6 ++++++ paddle/fluid/pybind/op_function_common.h | 3 +++ .../tests/unittests/test_elementwise_add_op.py | 18 ++++++++++++++++++ python/paddle/fluid/transpiler/collective.py | 4 ++-- 5 files changed, 33 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 371ba65a46d15..b3ed1df95709f 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -1343,6 +1343,9 @@ paddle::experimental::Scalar CastNumpy2Scalar(PyObject* obj, } else if (type_name == "numpy.float32") { float value = CastPyArg2Float(obj, op_type, arg_pos); return paddle::experimental::Scalar(value); + } else if (type_name == "numpy.float16") { + float16 value = CastPyArg2Float16(obj, op_type, arg_pos); + return paddle::experimental::Scalar(value); } else if (type_name == "numpy.int64") { int64_t value = CastPyArg2Long(obj, op_type, arg_pos); return paddle::experimental::Scalar(value); @@ -1352,7 +1355,7 @@ paddle::experimental::Scalar CastNumpy2Scalar(PyObject* obj, } else { PADDLE_THROW(platform::errors::InvalidArgument( "%s(): argument (position %d) must be " - "numpy.float32/float64, numpy.int32/int64, but got %s", + "numpy.float16/float32/float64, numpy.int32/int64, but got %s", op_type, arg_pos + 1, type_name)); // NOLINT diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc index 5cdd9a0fa0668..edab97c8b5e69 100644 --- a/paddle/fluid/pybind/op_function_common.cc +++ b/paddle/fluid/pybind/op_function_common.cc @@ -184,6 +184,12 @@ void CastPyArg2AttrLong(PyObject* obj, attrs[key] = CastPyArg2Long(obj, op_type, arg_pos); } +float16 CastPyArg2Float16(PyObject* obj, + const std::string& op_type, + ssize_t arg_pos) { + return static_cast(CastPyArg2Double(obj, op_type, arg_pos)); +} + float CastPyArg2Float(PyObject* obj, const std::string& op_type, ssize_t arg_pos) { diff --git a/paddle/fluid/pybind/op_function_common.h b/paddle/fluid/pybind/op_function_common.h index 686694631cc66..57423bb3b74a3 100644 --- a/paddle/fluid/pybind/op_function_common.h +++ b/paddle/fluid/pybind/op_function_common.h @@ -55,6 +55,9 @@ int CastPyArg2Int(PyObject* obj, const std::string& op_type, ssize_t arg_pos); int64_t CastPyArg2Long(PyObject* obj, const std::string& op_type, ssize_t arg_pos); +float16 CastPyArg2Float16(PyObject* obj, + const std::string& op_type, + ssize_t arg_pos); float CastPyArg2Float(PyObject* obj, const std::string& op_type, ssize_t arg_pos); diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index 1cb57e6d72fd5..3bf2b7cdcd703 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -737,6 +737,24 @@ def test_dygraph_add(self): paddle.enable_static() +class TestTensorAddNumpyScalar(unittest.TestCase): + def test_float32_add(self): + paddle.disable_static() + a = paddle.full([4, 5, 6], 1.5, dtype='float32') + b = np.array([1.5], dtype='float32')[0] + c = a + b + self.assertTrue(c.dtype == core.VarDesc.VarType.FP32) + + def test_float16_add(self): + if not core.is_compiled_with_cuda(): + return + paddle.disable_static() + a = paddle.full([4, 5, 6], 1.5, dtype='float16') + b = np.array([1.5], dtype='float16')[0] + c = a + b + self.assertTrue(c.dtype == core.VarDesc.VarType.FP16) + + if __name__ == '__main__': paddle.enable_static() unittest.main() diff --git a/python/paddle/fluid/transpiler/collective.py b/python/paddle/fluid/transpiler/collective.py index 870efa0968d72..04bd68d257163 100644 --- a/python/paddle/fluid/transpiler/collective.py +++ b/python/paddle/fluid/transpiler/collective.py @@ -516,12 +516,12 @@ def _transpile_startup_program(self): def _transpile_main_program(self): # not need loss scale and no dense param param_cnt = self._get_update_param_count() - if self.loss_scale is 0 and param_cnt is 0: + if self.loss_scale == 0 and param_cnt == 0: return # scale loss self._insert_scale_loss_grad_ops() # no param - if param_cnt is 0: + if param_cnt == 0: return # fuse allreduce if self.fuse_allreduce > 0: From 5822e15ce4ffd9c4561a6403ca352a65dcb5cd76 Mon Sep 17 00:00:00 2001 From: Zhang Jun Date: Tue, 31 Jan 2023 16:19:27 +0800 Subject: [PATCH 40/89] [inference][trt] add elementwise input data type check (#49675) --- paddle/fluid/inference/tensorrt/op_teller.cc | 45 ++++- .../test_trt_convert_compare_and_logical.py | 137 +++++++++++++++ .../inference/test_trt_convert_elementwise.py | 166 +++++++++++++++++- .../ir/inference/test_trt_convert_equal.py | 2 +- 4 files changed, 335 insertions(+), 15 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index fbbd77a4c9825..0075c64759333 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1365,16 +1365,26 @@ struct SimpleOpTypeSetTeller : public Teller { VLOG(3) << "Ops(" << op_type << ") do not support static shape yet."; return false; } + auto* block = desc.Block(); + auto* x_var_desc = block->FindVar(desc.Input("X")[0]); + auto* y_var_desc = block->FindVar(desc.Input("Y")[0]); + auto x_dtype = x_var_desc->GetDataType(); + auto y_dtype = y_var_desc->GetDataType(); if (op_type == "logical_or" || op_type == "logical_xor" || op_type == "logical_and") { - auto* block = desc.Block(); - auto* x_var_desc = block->FindVar(desc.Input("X")[0]); - auto* y_var_desc = block->FindVar(desc.Input("Y")[0]); - auto x_dtype = x_var_desc->GetDataType(); - auto y_dtype = y_var_desc->GetDataType(); if (x_dtype != framework::proto::VarType::BOOL || y_dtype != framework::proto::VarType::BOOL) { - VLOG(3) << "the op only support input of BOOL."; + VLOG(3) << "the op (" << op_type << ") only support input of BOOL."; + return false; + } + } + if (op_type == "less_than" || op_type == "greater_than" || + op_type == "less_equal") { + if (x_dtype == framework::proto::VarType::BOOL || + y_dtype == framework::proto::VarType::BOOL) { + VLOG(3) + << "ElementWiseOperation::kLESS/ElementWiseOperation::kGREATER " + "do not support boolean datatype."; return false; } } @@ -1417,6 +1427,29 @@ struct SimpleOpTypeSetTeller : public Teller { const auto x_shape = x_var_desc->GetShape(); const auto y_shape = y_var_desc->GetShape(); + // These operations do not support boolean datatype. + if (op_type == "elementwise_add" || op_type == "elementwise_mul" || + op_type == "elementwise_sub" || op_type == "elementwise_div" || + op_type == "elementwise_pow" || op_type == "elementwise_min" || + op_type == "elementwise_max" || op_type == "elementwise_floordiv") { + if (x_var_desc->GetDataType() == + paddle::framework::proto::VarType_Type::VarType_Type_BOOL) { + VLOG(3) << "These operations " + "(elementwise_add/mul/sub/div/pow/min/max/floordiv) do " + "not support boolean datatype."; + return false; + } + } + // These operations input do not support int32 datatype. + if (op_type == "elementwise_pow") { + if (x_var_desc->GetDataType() == + paddle::framework::proto::VarType_Type::VarType_Type_INT32) { + VLOG(3) << "These operations (elementwise_pow) do not support int32 " + "datatype."; + return false; + } + } + // The case when x_shape.size() == 1 is dealt with in common case if (!with_dynamic_shape && (!y_var_desc->Persistable()) && y_shape.size() == 1) { diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py index e59b9a0cd416c..50159c222cc8a 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_compare_and_logical.py @@ -481,5 +481,142 @@ def test(self): self.run_test() +class TrtConvertCompareSkipTest(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + return True + + def sample_program_configs(self): + def generate_input(shape): + return np.random.random(shape).astype(np.int32) + + for shape in [[2, 16], [2, 16, 32], [1, 32, 16, 32]]: + for op_type in ["less_than", "greater_than"]: + for axis in [-1]: + self.dims = len(shape) + dics = [ + {"axis": axis}, + {"in_dtype": 2, "out_dtype": 0}, + {"in_dtype": 0, "out_dtype": 2}, + ] + ops_config = [ + { + "op_type": "cast", + "op_inputs": {"X": ["input_data1"]}, + "op_outputs": {"Out": ["cast_output_data1"]}, + "op_attrs": dics[1], + "outputs_dtype": {"cast_output_data1": np.bool_}, + }, + { + "op_type": "cast", + "op_inputs": {"X": ["input_data2"]}, + "op_outputs": {"Out": ["cast_output_data2"]}, + "op_attrs": dics[1], + "outputs_dtype": {"cast_output_data2": np.bool_}, + }, + { + "op_type": op_type, + "op_inputs": { + "X": ["cast_output_data1"], + "Y": ["cast_output_data2"], + }, + "op_outputs": {"Out": ["cast_output_data0"]}, + "op_attrs": dics[0], + "outputs_dtype": {"cast_output_data0": np.bool_}, + }, + { + "op_type": "cast", + "op_inputs": {"X": ["cast_output_data0"]}, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[2], + "outputs_dtype": {"output_data": np.int32}, + }, + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input, shape) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input, shape) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 2: + shape_data = [2, 16] + if self.dims == 3: + shape_data = [2, 16, 32] + if self.dims == 4: + shape_data = [1, 32, 16, 32] + + shape_info = { + "input_data1": shape_data, + "input_data2": shape_data, + "cast_output_data0": shape_data, + "cast_output_data1": shape_data, + "cast_output_data2": shape_data, + } + self.dynamic_shape.min_input_shape = shape_info + self.dynamic_shape.max_input_shape = shape_info + self.dynamic_shape.opt_input_shape = shape_info + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + ver = paddle_infer.get_trt_compile_version() + if ver[0] * 1000 + ver[1] * 100 + ver[2] * 10 < 8400: + return 0, 7 + if not dynamic_shape: + return 0, 7 + return 3, 4 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), (1e-3, 1e-3) + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), 1e-5 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, True + ), (1e-3, 1e-3) + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py index 89debb2a27dcd..4d4df30acb031 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_elementwise.py @@ -25,7 +25,7 @@ # This is the special test case with weight including batch dimension # I don't want to mess up the code written by others, so I wrote a class specifically -class TrtConvertElementwiseTest_one_input_special_case0(TrtLayerAutoScanTest): +class TrtConvertElementwiseTestOneInputSpecialCase0(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -158,7 +158,7 @@ def test(self): # This is the special test case -class TrtConvertElementwiseTest_one_input_special_case1(TrtLayerAutoScanTest): +class TrtConvertElementwiseTestOneInputSpecialCase1(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -279,7 +279,7 @@ def test(self): self.run_test() -class TrtConvertElementwiseTest_one_input(TrtLayerAutoScanTest): +class TrtConvertElementwiseTestOneInput(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -431,9 +431,7 @@ def test(self): self.run_test() -class TrtConvertElementwiseTest_two_input_without_broadcast( - TrtLayerAutoScanTest -): +class TrtConvertElementwiseTestTwoInputWithoutBroadcast(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -592,7 +590,7 @@ def test(self): self.run_test() -class TrtConvertElementwiseTest_two_input_with_broadcast(TrtLayerAutoScanTest): +class TrtConvertElementwiseTestTwoInputWithBroadcast(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: inputs = program_config.inputs if len(inputs['input_data1'].shape) != len(inputs['input_data2'].shape): @@ -754,7 +752,7 @@ def test(self): self.run_test() -class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): +class TrtConvertElementwiseTestOneInputCornerCase(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: return True @@ -896,5 +894,157 @@ def test(self): self.run_test() +class TrtConvertElementwiseTestTwoInputSkipCase(TrtLayerAutoScanTest): + def is_program_valid(self, program_config: ProgramConfig) -> bool: + # if program_config.ops[0].type in "round": + return True + + def sample_program_configs(self): + def generate_input(shape, op_type): + if op_type == "elementwise_pow": + return np.random.randint( + low=1, high=10000, size=shape, dtype=np.int32 + ) + # Paddle mul support bool and TensorRT not + if op_type == "elementwise_mul": + return np.random.random(shape).astype(np.bool) + + for shape in [[4], [4, 32], [2, 32, 16], [1, 8, 16, 32]]: + for op_type in [ + "elementwise_pow", + "elementwise_mul", + ]: + for axis in [0, -1]: + self.dims = len(shape) + dics = [{"axis": axis}] + ops_config = [ + { + "op_type": op_type, + "op_inputs": { + "X": ["input_data1"], + "Y": ["input_data2"], + }, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[0], + "outputs_dtype": { + "output_data": np.int32 + if op_type == "elementwise_pow" + else np.bool_ + }, + } + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data1": TensorConfig( + data_gen=partial(generate_input, shape, op_type) + ), + "input_data2": TensorConfig( + data_gen=partial(generate_input, shape, op_type) + ), + }, + outputs=["output_data"], + ) + + yield program_config + + def sample_predictor_configs( + self, program_config + ) -> (paddle_infer.Config, List[int], float): + def generate_dynamic_shape(attrs): + if self.dims == 1: + self.dynamic_shape.min_input_shape = { + "input_data1": [1], + "input_data2": [1], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [128], + "input_data2": [128], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [32], + "input_data2": [32], + } + elif self.dims == 2: + self.dynamic_shape.min_input_shape = { + "input_data1": [1, 4], + "input_data2": [1, 4], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [128, 256], + "input_data2": [128, 256], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [32, 64], + "input_data2": [32, 64], + } + elif self.dims == 3: + self.dynamic_shape.min_input_shape = { + "input_data1": [1, 4, 4], + "input_data2": [1, 4, 4], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [128, 128, 256], + "input_data2": [128, 128, 256], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 32, 16], + "input_data2": [2, 32, 16], + } + elif self.dims == 4: + self.dynamic_shape.min_input_shape = { + "input_data1": [1, 4, 4, 4], + "input_data2": [1, 4, 4, 4], + } + self.dynamic_shape.max_input_shape = { + "input_data1": [8, 128, 64, 128], + "input_data2": [8, 128, 64, 128], + } + self.dynamic_shape.opt_input_shape = { + "input_data1": [2, 64, 32, 32], + "input_data2": [2, 64, 32, 32], + } + + def clear_dynamic_shape(): + self.dynamic_shape.max_input_shape = {} + self.dynamic_shape.min_input_shape = {} + self.dynamic_shape.opt_input_shape = {} + + def generate_trt_nodes_num(attrs, dynamic_shape): + return 0, 4 + + attrs = [ + program_config.ops[i].attrs for i in range(len(program_config.ops)) + ] + + # for static_shape + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), (1e-5, 1e-5) + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), (1e-3, 1e-3) + + # for dynamic_shape + generate_dynamic_shape(attrs) + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), (0, 4), (1e-5, 1e-5) + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), (0, 4), (1e-3, 1e-3) + + def add_skip_trt_case(self): + pass + + def test(self): + self.add_skip_trt_case() + self.run_test() + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py index 7be685d2894b1..4993e830f190b 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_equal.py @@ -23,7 +23,7 @@ import paddle.inference as paddle_infer -class TrtConvertElementwiseTest_one_input_corner_case(TrtLayerAutoScanTest): +class TrtConvertEqualOneInputCornerCase(TrtLayerAutoScanTest): def is_program_valid(self, program_config: ProgramConfig) -> bool: attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) From 0e51f3988bf92c3a13f3a1e54c0ade4d98c7edeb Mon Sep 17 00:00:00 2001 From: Charles-hit <56987902+Charles-hit@users.noreply.github.com> Date: Tue, 31 Jan 2023 16:20:00 +0800 Subject: [PATCH 41/89] Integrate static code gen info (#49858) * polish static grad op maker gen * fix some bugs * fix static code gen * solve conflict * modify composite grad maker name * integrate phi and fluid info in static code gen * rename some composite maker * modify static code gen format --- paddle/fluid/framework/details/op_registry.h | 4 +- paddle/fluid/framework/op_info.h | 4 +- paddle/fluid/framework/type_defs.h | 2 +- .../elementwise/elementwise_add_op.cc | 8 +- .../elementwise/elementwise_div_op.cc | 8 +- .../elementwise/elementwise_mul_op.cc | 8 +- .../elementwise/elementwise_sub_op.cc | 8 +- paddle/fluid/operators/expand_v2_op.cc | 6 +- paddle/fluid/operators/generator/filters.py | 23 +- .../fluid/operators/generator/generate_op.py | 193 ++++++++--------- .../operators/generator/generate_sparse_op.py | 16 +- .../operators/generator/generate_static_op.py | 12 +- .../fluid/operators/generator/parse_utils.py | 15 +- .../operators/generator/templates/op.c.j2 | 8 +- .../generator/templates/operator_utils.c.j2 | 201 +++++++++--------- .../operators/reduce_ops/reduce_sum_op.cc | 4 +- paddle/fluid/prim/tests/test_static_prim.cc | 26 +-- .../utils/static/composite_grad_desc_maker.h | 6 +- paddle/fluid/pybind/pybind.cc | 8 +- 19 files changed, 286 insertions(+), 274 deletions(-) diff --git a/paddle/fluid/framework/details/op_registry.h b/paddle/fluid/framework/details/op_registry.h index 1edc84aba07d9..4b109ba0dcff2 100644 --- a/paddle/fluid/framework/details/op_registry.h +++ b/paddle/fluid/framework/details/op_registry.h @@ -63,7 +63,7 @@ using OpRegistryClasses = std::tuple< // NOLINT TypePair, // NOLINT TypePair, // NOLINT TypePair, // NOLINT - TypePair, // NOLINT + TypePair, // NOLINT TypePair, // NOLINT TypePair, // NOLINT TypePair, // NOLINT @@ -262,7 +262,7 @@ struct OpInfoFiller { info->grad_comp_op_maker_, nullptr, platform::errors::AlreadyExists( - "GradCompositeOpMakerBase of %s has been registered", op_type)); + "CompositeGradOpMakerBase of %s has been registered", op_type)); info->grad_comp_op_maker_ = [](const OpDesc& fwd_op, diff --git a/paddle/fluid/framework/op_info.h b/paddle/fluid/framework/op_info.h index 61a2373eb3479..bd4405f722844 100644 --- a/paddle/fluid/framework/op_info.h +++ b/paddle/fluid/framework/op_info.h @@ -43,7 +43,7 @@ class OpInfo { public: OpCreator creator_; GradOpMakerFN grad_op_maker_; - GradCompositeOpMakerFN grad_comp_op_maker_; + CompositeGradOpMakerFN grad_comp_op_maker_; proto::OpProto* proto_{nullptr}; OpAttrChecker* checker_{nullptr}; InferVarTypeFN infer_var_type_; @@ -84,7 +84,7 @@ class OpInfo { const GradOpMakerFN& GradOpMaker() const { return grad_op_maker_; } - const GradCompositeOpMakerFN& GradCompOpMaker() const { + const CompositeGradOpMakerFN& CompGradOpMaker() const { return grad_comp_op_maker_; } diff --git a/paddle/fluid/framework/type_defs.h b/paddle/fluid/framework/type_defs.h index 7c90925da4a88..13bd782ce4033 100644 --- a/paddle/fluid/framework/type_defs.h +++ b/paddle/fluid/framework/type_defs.h @@ -96,7 +96,7 @@ using GradOpMakerFN = std::function>( std::unordered_map* /*grad_to_var*/, const std::vector& grad_block)>; -using GradCompositeOpMakerFN = +using CompositeGradOpMakerFN = std::function>( const OpDesc&, const std::unordered_set& /*no_grad_set*/, diff --git a/paddle/fluid/operators/elementwise/elementwise_add_op.cc b/paddle/fluid/operators/elementwise/elementwise_add_op.cc index 11e0fa7dd1f97..48a5d2e433a10 100644 --- a/paddle/fluid/operators/elementwise/elementwise_add_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_add_op.cc @@ -51,9 +51,9 @@ class ElementwiseAddOpMaker : public ElementwiseOpMaker { } }; -class ElementwiseAddGradCompositeOpMaker - : public prim::GradCompositeOpMakerBase { - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; +class ElementwiseAddCompositeGradOpMaker + : public prim::CompositeGradOpMakerBase { + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; public: void Apply() override { @@ -122,7 +122,7 @@ REGISTER_OPERATOR(elementwise_add, ::paddle::operators::ElementwiseOpInferVarType, elementwise_addGradMaker<::paddle::framework::OpDesc>, elementwise_addGradMaker<::paddle::imperative::OpBase>, - ::paddle::operators::ElementwiseAddGradCompositeOpMaker, + ::paddle::operators::ElementwiseAddCompositeGradOpMaker, ::paddle::operators::ElementwiseOpInplaceInferer); namespace ops = paddle::operators; diff --git a/paddle/fluid/operators/elementwise/elementwise_div_op.cc b/paddle/fluid/operators/elementwise/elementwise_div_op.cc index 3d62792d8513e..41549ede1ebc6 100644 --- a/paddle/fluid/operators/elementwise/elementwise_div_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_div_op.cc @@ -67,9 +67,9 @@ class ElementwiseDivGradOpMaker : public framework::SingleGradOpMaker { } }; -class ElementwiseDivGradCompositeOpMaker - : public prim::GradCompositeOpMakerBase { - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; +class ElementwiseDivCompositeGradOpMaker + : public prim::CompositeGradOpMakerBase { + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; public: void Apply() override { @@ -123,7 +123,7 @@ REGISTER_OPERATOR(elementwise_div, ops::ElementwiseOp, ops::ElementwiseDivOpMaker, ops::ElementwiseOpInferVarType, - ops::ElementwiseDivGradCompositeOpMaker, + ops::ElementwiseDivCompositeGradOpMaker, ops::ElementwiseDivGradOpMaker, ops::ElementwiseDivGradOpMaker); diff --git a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc index 4052f3e09e0cc..740c9381d92e2 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mul_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mul_op.cc @@ -66,9 +66,9 @@ class ElementwiseMulOpGradMaker : public framework::SingleGradOpMaker { } }; -class ElementwiseMulGradCompositeOpMaker - : public prim::GradCompositeOpMakerBase { - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; +class ElementwiseMulCompositeGradOpMaker + : public prim::CompositeGradOpMakerBase { + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; public: void Apply() override { @@ -155,7 +155,7 @@ REGISTER_OPERATOR(elementwise_mul, ops::ElementwiseOpInferVarType, ops::ElementwiseMulOpGradMaker, ops::ElementwiseMulOpGradMaker, - ops::ElementwiseMulGradCompositeOpMaker); + ops::ElementwiseMulCompositeGradOpMaker); REGISTER_OPERATOR( elementwise_mul_grad, ops::ElementwiseOpGrad, diff --git a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc index be839f123a1e9..2a9e14867acf1 100644 --- a/paddle/fluid/operators/elementwise/elementwise_sub_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_sub_op.cc @@ -54,9 +54,9 @@ class ElementwiseSubOpMaker : public ElementwiseOpMaker { } }; -class ElementwiseSubGradCompositeOpMaker - : public prim::GradCompositeOpMakerBase { - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; +class ElementwiseSubCompositeGradOpMaker + : public prim::CompositeGradOpMakerBase { + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; public: void Apply() override { @@ -109,7 +109,7 @@ REGISTER_OPERATOR(elementwise_sub, ::paddle::operators::ElementwiseOpInferVarType, elementwise_subGradMaker<::paddle::framework::OpDesc>, elementwise_subGradMaker<::paddle::imperative::OpBase>, - ::paddle::operators::ElementwiseSubGradCompositeOpMaker, + ::paddle::operators::ElementwiseSubCompositeGradOpMaker, ::paddle::operators::ElementwiseOpInplaceInferer); REGISTER_OPERATOR( diff --git a/paddle/fluid/operators/expand_v2_op.cc b/paddle/fluid/operators/expand_v2_op.cc index 9a867c040fcb8..3c05ab9295c67 100644 --- a/paddle/fluid/operators/expand_v2_op.cc +++ b/paddle/fluid/operators/expand_v2_op.cc @@ -193,8 +193,8 @@ class ExpandV2GradOpMaker : public framework::SingleGradOpMaker { } }; -class ExpandV2GradCompositeOpMaker : public prim::GradCompositeOpMakerBase { - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; +class ExpandV2CompositeGradOpMaker : public prim::CompositeGradOpMakerBase { + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; public: void Apply() override { @@ -244,7 +244,7 @@ namespace ops = paddle::operators; REGISTER_OPERATOR(expand_v2, ops::ExpandV2Op, ops::ExpandV2OpMaker, - ops::ExpandV2GradCompositeOpMaker, + ops::ExpandV2CompositeGradOpMaker, ops::ExpandV2GradOpMaker, ops::ExpandV2GradOpMaker, ExpandInferShapeFunctor); diff --git a/paddle/fluid/operators/generator/filters.py b/paddle/fluid/operators/generator/filters.py index 8efbac1f7e92c..50bc1f7bca884 100644 --- a/paddle/fluid/operators/generator/filters.py +++ b/paddle/fluid/operators/generator/filters.py @@ -14,6 +14,7 @@ import itertools import re +from typing import Dict, List from type_mapping import ( attr_types_map, @@ -137,17 +138,23 @@ def to_composite_grad_opmaker_name(backward_op_name): for i in range(len(words)): words[i] = words[i].strip() words[i] = words[i].capitalize() - composite_grad_opmaker_name = words[0] + "Composite" - composite_grad_opmaker_name += "".join(word for word in words[1:]) - composite_grad_opmaker_name += "OpMaker" + composite_grad_opmaker_name = "".join(word for word in words[:-1]) + composite_grad_opmaker_name += "CompositeGradOpMaker" return composite_grad_opmaker_name +def to_variable_names(dict_list: List[Dict], key: str) -> List[str]: + names = [] + for var in dict_list: + names.append(var[key]) + return names + + def cartesian_prod_attrs(attrs): items = [] for attr in attrs: type_name = attr["typename"] - name = attr["name"] + name = attr["fluid_name"] if type_name == "Scalar": items.append((name, to_scalar_tensor_name(attr))) elif type_name == "IntArray": @@ -176,11 +183,15 @@ def cartesian_prod_attrs(attrs): def cartesian_prod_mapping(op): kernels = op["kernel"]["func"] inputs = [ - x["name"] for x in op["inputs"] if x["name"] in op["kernel"]["param"] + x["fluid_name"] + for x in op["inputs"] + if x["fluid_name"] in op["kernel"]["param"] ] inputs = [to_opmaker_name_cstr(input) for input in inputs] attrs = cartesian_prod_attrs(op["attrs"]) - outputs = [to_opmaker_name_cstr(output["name"]) for output in op["outputs"]] + outputs = [ + to_opmaker_name_cstr(output["fluid_name"]) for output in op["outputs"] + ] def vec(items): return "{" + ', '.join(items) + "}" diff --git a/paddle/fluid/operators/generator/generate_op.py b/paddle/fluid/operators/generator/generate_op.py index e4bb7041016d2..2da40b1edd114 100644 --- a/paddle/fluid/operators/generator/generate_op.py +++ b/paddle/fluid/operators/generator/generate_op.py @@ -28,6 +28,7 @@ to_opmaker_name_cstr, to_pascal_case, to_scalar_tensor_name, + to_variable_names, ) from jinja2 import Environment, FileSystemLoader, StrictUndefined from parse_utils import to_named_dict @@ -60,6 +61,7 @@ env.filters["to_opmaker_name_cstr"] = to_opmaker_name_cstr env.filters["cartesian_prod_mapping"] = cartesian_prod_mapping env.filters["to_composite_grad_opmaker_name"] = to_composite_grad_opmaker_name +env.filters["to_variable_names"] = to_variable_names env.tests["base_op"] = is_base_op env.tests["composite_op"] = is_composite_op env.tests["vec"] = is_vec @@ -157,29 +159,26 @@ def process_int_array(op_item, int_array_configs): ] -def parse_composite_info(ops, backward_ops, backward_op_dict): - for op in ops: - if "backward" in op: - op["phi_backward"] = op["backward"] - for backward_op in backward_ops: - if "backward" in backward_op: - backward_op["phi_backward"] = backward_op["backward"] - for backward_op_name, op_dict in backward_op_dict.items(): - if "composite" not in op_dict: - continue - op_dict["composite"]["phi_inputs"] = [] - op_dict["composite"]["phi_attrs"] = [] - op_dict["composite"]["phi_outputs"] = [] - for input in op_dict["inputs"]: - op_dict["composite"]["phi_inputs"].append(input['name']) - for attr in op_dict["attrs"]: - op_dict["composite"]["phi_attrs"].append(attr['name']) - for output in op_dict["outputs"]: - op_dict["composite"]["phi_outputs"].append(output['name']) - - -# replace name of op and params for OpMaker -def replace_compat_name(op_fluid_map_list, forward_op_dict, backward_op_dict): +def add_composite_info(ops, backward_ops, backward_op_dict): + # add backward composite name in forward + for op in ops + backward_ops: + if ( + op["backward"] in backward_op_dict + and "composite" in backward_op_dict[op["backward"]] + ): + op["backward_composite"] = op["backward"] + else: + op["backward_composite"] = None + + +# add fluid name in ops and backward ops info +def add_fluid_name(dict_list): + for item in dict_list: + item["fluid_name"] = item["name"] + + +# add fluid name of op and params for OpMaker +def add_compat_name(op_fluid_map_list, forward_op_dict, backward_op_dict): def get_phi_and_fluid_op_name(op_item): names = op_item.split('(') if len(names) == 1: @@ -187,12 +186,14 @@ def get_phi_and_fluid_op_name(op_item): else: return names[0].strip(), names[1].split(')')[0].strip() - def update_op_param_name(op_args, args_alias_map): + def add_op_param_name(op_args, args_alias_map): for item in op_args: if item['name'] in args_alias_map: - item['name'] = args_alias_map[item['name']] + item['fluid_name'] = args_alias_map[item['name']] + else: + item['fluid_name'] = item['name'] - def update_grad_args_name(op_args, args_alias_map): + def add_grad_args_name(op_args, args_alias_map): for item in op_args: if ( item['name'].endswith('_grad') @@ -201,38 +202,12 @@ def update_grad_args_name(op_args, args_alias_map): args_alias_map[item['name']] = ( args_alias_map[item['name'][:-5]] + '_grad' ) - item['name'] = args_alias_map[item['name'][:-5]] + '_grad' - - def add_fluid_info_in_composite(composite_map, args_alias_map): - fluid_input_list = [] - fluid_attr_list = [] - fluid_output_list = [] - # add fluid op inputs - for input in composite_map["phi_inputs"]: - if input in args_alias_map: - fluid_input_list.append(args_alias_map[input]) - else: - fluid_input_list.append(input) - # add fluid op attrs - for attr in composite_map["phi_attrs"]: - if attr in args_alias_map: - fluid_attr_list.append(args_alias_map[attr]) - else: - fluid_attr_list.append(attr) - # add fluid op outputs - for output in composite_map["phi_outputs"]: - if output in args_alias_map: - fluid_output_list.append(args_alias_map[output]) - else: - fluid_output_list.append(output) - - composite_map.update( - { - "fluid_inputs": fluid_input_list, - "fluid_attrs": fluid_attr_list, - "fluid_outputs": fluid_output_list, - } - ) + item['fluid_name'] = args_alias_map[item['name'][:-5]] + '_grad' + elif ( + item['name'].endswith('_grad') + and item['name'][:-5] not in args_alias_map + ): + item['fluid_name'] = item['name'] def get_param_list_alias(param_list, args_map): return [ @@ -297,15 +272,15 @@ def update_common_params_name( op_item['kernel']['layout']['candidates'], args_name_map ) - def update_grad_op_compat_name(grad_op_item, args_name_map): - update_op_param_name(grad_op_item['inputs'], args_name_map) - update_op_param_name(grad_op_item['outputs'], args_name_map) - update_op_param_name(grad_op_item['attrs'], args_name_map) - update_op_param_name(grad_op_item['forward']['inputs'], args_name_map) - update_op_param_name(grad_op_item['forward']['outputs'], args_name_map) - update_op_param_name(grad_op_item['forward']['attrs'], args_name_map) - update_grad_args_name(grad_op_item['inputs'], args_map) - update_grad_args_name(grad_op_item['outputs'], args_map) + def add_grad_op_compat_name(grad_op_item, args_name_map): + add_op_param_name(grad_op_item['inputs'], args_name_map) + add_op_param_name(grad_op_item['outputs'], args_name_map) + add_op_param_name(grad_op_item['attrs'], args_name_map) + add_op_param_name(grad_op_item['forward']['inputs'], args_name_map) + add_op_param_name(grad_op_item['forward']['outputs'], args_name_map) + add_op_param_name(grad_op_item['forward']['attrs'], args_name_map) + add_grad_args_name(grad_op_item['inputs'], args_map) + add_grad_args_name(grad_op_item['outputs'], args_map) for op_args in op_fluid_map_list: new_op_name, op_name = get_phi_and_fluid_op_name(op_args['op']) @@ -350,39 +325,32 @@ def update_grad_op_compat_name(grad_op_item, args_name_map): int_array_configs[ op_args[key][args_item['name']] ] = int_array_configs[args_item['name']] - args_item['name'] = op_args[key][args_item['name']] - if has_backward: - for args_item in backward_op_item['forward'][key]: - if args_item['name'] in op_args[key]: - args_item['name'] = op_args[key][args_item['name']] - forward_op_item["attr_dict"] = to_named_dict(forward_op_item["attrs"]) + args_item['fluid_name'] = op_args[key][ + args_item['name'] + ] update_common_params_name( forward_op_item, args_map, scalar_configs, int_array_configs ) if has_backward: - update_grad_op_compat_name(backward_op_item, args_map) + # update fluid info in backward + add_grad_op_compat_name(backward_op_item, args_map) update_common_params_name( backward_op_item, args_map, scalar_configs, int_array_configs ) - backward_op_item["attr_dict"] = to_named_dict( - backward_op_item["attrs"] - ) if 'backward' not in op_args: continue backward_op_list = op_args['backward'].split(',') - # add fluid args name in composite map - for backward_op in backward_op_list: - if ( - "composite" - in backward_op_dict[backward_op.split('(')[0].strip()] - ): - add_fluid_info_in_composite( - backward_op_dict[backward_op]["composite"], args_map - ) - _, bw_op_name = get_phi_and_fluid_op_name(backward_op_list[0]) + phi_bw_op_name, bw_op_name = get_phi_and_fluid_op_name( + backward_op_list[0] + ) + if ( + forward_op_item["backward_composite"] is not None + and phi_bw_op_name != bw_op_name + ): + forward_op_item["backward_composite"] = bw_op_name forward_op_item['backward'] = bw_op_name backward_op_item['op_name'] = bw_op_name @@ -393,18 +361,20 @@ def update_grad_op_compat_name(grad_op_item, args_name_map): double_grad_op_name, ) = get_phi_and_fluid_op_name(backward_op_list[1]) double_grad_item = backward_op_dict[phi_double_grad_op_name] + if ( + backward_op_item["backward_composite"] is not None + and phi_double_grad_op_name != double_grad_op_name + ): + backward_op_item["backward_composite"] = double_grad_op_name backward_op_item['backward'] = double_grad_op_name double_grad_item['op_name'] = double_grad_op_name - update_grad_op_compat_name(double_grad_item, args_map) + add_grad_op_compat_name(double_grad_item, args_map) update_common_params_name( double_grad_item, args_map, scalar_configs, int_array_configs, ) - double_grad_item["attr_dict"] = to_named_dict( - double_grad_item["attrs"] - ) # for triple grad if len(backward_op_list) > 2: @@ -413,18 +383,22 @@ def update_grad_op_compat_name(grad_op_item, args_name_map): triple_grad_op_name, ) = get_phi_and_fluid_op_name(backward_op_list[2]) triple_grad_item = backward_op_dict[phi_triple_grad_op_name] + if ( + double_grad_item["backward_composite"] is not None + and phi_triple_grad_op_name != triple_grad_op_name + ): + double_grad_item[ + "backward_composite" + ] = triple_grad_op_name double_grad_item['backward'] = triple_grad_op_name triple_grad_item['op_name'] = triple_grad_op_name - update_grad_op_compat_name(triple_grad_item, args_map) + add_grad_op_compat_name(triple_grad_item, args_map) update_common_params_name( triple_grad_item, args_map, scalar_configs, int_array_configs, ) - triple_grad_item["attr_dict"] = to_named_dict( - triple_grad_item["attrs"] - ) def process_invoke_op(forward_op_dict, backward_op_dict): @@ -442,20 +416,28 @@ def process_invoke_op(forward_op_dict, backward_op_dict): for input_item in reuse_op['inputs']: bw_op['invoke']['inputs'].append( { + 'fluid_name': input_item['fluid_name'], 'name': input_item['name'], 'value': args_list[args_index], } ) args_index = args_index + 1 + bw_fluid_attrs_set = [ + item['fluid_name'] for item in bw_op['attrs'] + ] for attr in reuse_op['attrs']: if args_index < len(args_list): attr_value = ( f"this->GetAttr(\"{args_list[args_index]}\")" - if args_list[args_index] in bw_op['attr_dict'] + if args_list[args_index] in bw_fluid_attrs_set else args_list[args_index] ) bw_op['invoke']['attrs'].append( - {'name': attr['name'], 'value': attr_value} + { + 'name': attr['name'], + 'fluid_name': attr['fluid_name'], + 'value': attr_value, + } ) args_index = args_index + 1 else: @@ -464,7 +446,8 @@ def process_invoke_op(forward_op_dict, backward_op_dict): bw_op['invoke']['outputs'].append( { 'name': output_item['name'], - 'value': bw_op['outputs'][idx]['name'], + 'fluid_name': output_item['fluid_name'], + 'value': bw_op['outputs'][idx]['fluid_name'], } ) @@ -517,17 +500,26 @@ def main( for op in ops: op['op_name'] = op['name'] + add_fluid_name(op['inputs']) + add_fluid_name(op['attrs']) + add_fluid_name(op['outputs']) for bw_op in backward_ops: bw_op['op_name'] = bw_op['name'] + add_fluid_name(bw_op['inputs']) + add_fluid_name(bw_op['attrs']) + add_fluid_name(bw_op['outputs']) + add_fluid_name(bw_op['forward']['inputs']) + add_fluid_name(bw_op['forward']['attrs']) + add_fluid_name(bw_op['forward']['outputs']) for bw_output in bw_op['outputs']: bw_output['drop_empty_grad'] = True # deal the drop_empty_grad of bw_op by op_compat.yaml parse_drop_empty_grad(op_fluid_map_list, backward_op_dict) - parse_composite_info(ops, backward_ops, backward_op_dict) + add_composite_info(ops, backward_ops, backward_op_dict) - replace_compat_name(op_fluid_map_list, forward_op_dict, backward_op_dict) + add_compat_name(op_fluid_map_list, forward_op_dict, backward_op_dict) # prepare for invoke case process_invoke_op(forward_op_dict, backward_op_dict) @@ -555,7 +547,6 @@ def main( ops=ops, backward_ops=backward_ops, op_dict=op_dict, - composite_gen_flag=True, ) f.write(msg) ks_template = env.get_template('ks.c.j2') diff --git a/paddle/fluid/operators/generator/generate_sparse_op.py b/paddle/fluid/operators/generator/generate_sparse_op.py index 1da91e3f60005..3eea32091dc80 100644 --- a/paddle/fluid/operators/generator/generate_sparse_op.py +++ b/paddle/fluid/operators/generator/generate_sparse_op.py @@ -28,12 +28,14 @@ to_opmaker_name_cstr, to_pascal_case, to_scalar_tensor_name, + to_variable_names, ) -from generate_op import process_invoke_op +from generate_op import add_fluid_name, process_invoke_op from jinja2 import Environment, FileSystemLoader, StrictUndefined from parse_utils import to_named_dict from tests import ( is_base_op, + is_composite_op, is_initializer_list, is_scalar, is_vec, @@ -60,7 +62,9 @@ env.filters["to_opmaker_name_cstr"] = to_opmaker_name_cstr env.filters["cartesian_prod_mapping"] = cartesian_prod_mapping env.filters["to_composite_grad_opmaker_name"] = to_composite_grad_opmaker_name +env.filters["to_variable_names"] = to_variable_names env.tests["base_op"] = is_base_op +env.tests["composite_op"] = is_composite_op env.tests["vec"] = is_vec env.tests["scalar"] = is_scalar env.tests["initializer_list"] = is_initializer_list @@ -96,9 +100,18 @@ def main(op_yaml_path, backward_yaml_path, output_op_path, output_arg_map_path): op['name'] = op['op_name'] if op["backward"] is not None: op["backward"] = SPARSE_OP_PREFIX + op["backward"] + add_fluid_name(op["inputs"]) + add_fluid_name(op["attrs"]) + add_fluid_name(op["outputs"]) for bw_op in backward_ops: bw_op['op_name'] = SPARSE_OP_PREFIX + bw_op['name'] bw_op['name'] = bw_op['op_name'] + add_fluid_name(bw_op["inputs"]) + add_fluid_name(bw_op["attrs"]) + add_fluid_name(bw_op["outputs"]) + add_fluid_name(bw_op["forward"]["inputs"]) + add_fluid_name(bw_op["forward"]["attrs"]) + add_fluid_name(bw_op["forward"]["outputs"]) if 'invoke' in bw_op: bw_op['invoke']['args'] = [ param.strip() for param in bw_op['invoke']['args'].split(',') @@ -139,7 +152,6 @@ def main(op_yaml_path, backward_yaml_path, output_op_path, output_arg_map_path): ops=ops, backward_ops=backward_ops, op_dict=op_dict, - composite_gen_flag=False, ) f.write(msg) diff --git a/paddle/fluid/operators/generator/generate_static_op.py b/paddle/fluid/operators/generator/generate_static_op.py index 7701f76734a0c..3a825bafb127c 100644 --- a/paddle/fluid/operators/generator/generate_static_op.py +++ b/paddle/fluid/operators/generator/generate_static_op.py @@ -28,12 +28,14 @@ to_opmaker_name_cstr, to_pascal_case, to_scalar_tensor_name, + to_variable_names, ) -from generate_op import replace_compat_name +from generate_op import add_compat_name, add_fluid_name from jinja2 import Environment, FileSystemLoader, StrictUndefined from parse_utils import to_named_dict from tests import ( is_base_op, + is_composite_op, is_initializer_list, is_scalar, is_vec, @@ -60,7 +62,9 @@ env.filters["to_opmaker_name_cstr"] = to_opmaker_name_cstr env.filters["cartesian_prod_mapping"] = cartesian_prod_mapping env.filters["to_composite_grad_opmaker_name"] = to_composite_grad_opmaker_name +env.filters["to_variable_names"] = to_variable_names env.tests["base_op"] = is_base_op +env.tests["composite_op"] = is_composite_op env.tests["vec"] = is_vec env.tests["scalar"] = is_scalar env.tests["initializer_list"] = is_initializer_list @@ -100,8 +104,11 @@ def main( for op in ops: op['op_name'] = op['name'] + add_fluid_name(op["inputs"]) + add_fluid_name(op["attrs"]) + add_fluid_name(op["outputs"]) - replace_compat_name(op_op_map, forward_op_dict, {}) + add_compat_name(op_op_map, forward_op_dict, {}) if len(ops) == 0: if os.path.isfile(output_op_path): @@ -116,7 +123,6 @@ def main( ops=ops, backward_ops=[], op_dict=forward_op_dict, - composite_gen_flag=False, ) f.write(msg) diff --git a/paddle/fluid/operators/generator/parse_utils.py b/paddle/fluid/operators/generator/parse_utils.py index d5a58a2a94a0e..31441aadbf8e3 100644 --- a/paddle/fluid/operators/generator/parse_utils.py +++ b/paddle/fluid/operators/generator/parse_utils.py @@ -294,14 +294,13 @@ def parse_composite( composite_config: str, ) -> Dict[str, Any]: # composite_config: func(args1, args2,.....) - fname = r'(.*?)' - wspace = r'\s*' - fargs = r'(.*?)' - pattern = fr'{fname}{wspace}\({wspace}{fargs}{wspace}\)' - - m = re.search(pattern, composite_config) - func_name = m.group(1) - func_args = m.group(2) + result = re.search( + r"(?P[a-z][a-z0-9_]+)\s*\((?P[^\)]+)\)", + composite_config, + ) + + func_name = result.group("func_name") + func_args = result.group("func_args") composite_dict = {} composite_dict["func_name"] = func_name diff --git a/paddle/fluid/operators/generator/templates/op.c.j2 b/paddle/fluid/operators/generator/templates/op.c.j2 index 23641dad90f1b..2339822af280f 100644 --- a/paddle/fluid/operators/generator/templates/op.c.j2 +++ b/paddle/fluid/operators/generator/templates/op.c.j2 @@ -39,11 +39,9 @@ using paddle::framework::GradVarName; {% else %} {{backward_op_reused_maker(op, op_dict[op["forward"]["name"]], op["invoke"])}} {% endif %} - {% if composite_gen_flag == True %} - {% if op is composite_op %} + {% if op is composite_op %} {{composite_grad_op_maker(op_dict[op["name"]])}} - {% endif %} - {% endif %} + {% endif %} {% endfor %} } // namespace operators } // namespace paddle @@ -51,7 +49,7 @@ using paddle::framework::GradVarName; namespace ops = paddle::operators; {% for op in ops + backward_ops %} {% if op is base_op %} -{{register_op_with_components(op, op_dict)}} +{{register_op_with_components(op)}} {{register_op_version(op)}} {% endif %} {% endfor %} diff --git a/paddle/fluid/operators/generator/templates/operator_utils.c.j2 b/paddle/fluid/operators/generator/templates/operator_utils.c.j2 index 000e56453d934..a471efaa562b4 100644 --- a/paddle/fluid/operators/generator/templates/operator_utils.c.j2 +++ b/paddle/fluid/operators/generator/templates/operator_utils.c.j2 @@ -12,7 +12,7 @@ class {{op_name | to_pascal_case}}OpMaker : public framework::OpProtoAndCheckerM {{add_output(loop.index0, output, op_name)}}; {% endfor %} {% for attr in op["attrs"] %} - {% if attr["name"] in op["kernel"]["param"] %} + {% if attr["fluid_name"] in op["kernel"]["param"] %} {{add_attr(loop.index0, attr, op_name)}}; {% endif %} {% endfor %} @@ -27,7 +27,7 @@ TODO: Documentation of {{op_name}} op. {# add input, it could be duplicable or dispensable #} {% macro add_input(i, input, op_name) %}{# inline #} - {% set name = input["name"] %} + {% set name = input["fluid_name"] %} {% set typename = input["typename"] %} AddInput({{name| to_opmaker_name}}, "({{typename}}), input {{i}} of {{op_name}} op.") {%- if typename is vec %} @@ -42,7 +42,7 @@ AddInput({{name| to_opmaker_name}}, "({{typename}}), input {{i}} of {{op_name}} {# add output, it could be duplicable or intermediate, however, optional output is not supported #} {% macro add_output(i, output, op_name) %}{# inline #} - {% set name = output["name"] %} + {% set name = output["fluid_name"] %} {% set typename = output["typename"] %} {% set is_intermediate = output["intermediate"] %} AddOutput({{name | to_opmaker_name}}, "({{typename}}), output {{i}} of {{op_name}} op.") @@ -66,7 +66,7 @@ AddOutput({{name | to_opmaker_name}}, "({{typename}}), output {{i}} of {{op_name {# add attribute, and process default value if needed #} {% macro add_attr(i, attr, op_name) %}{# inline #} - {% set name = attr["name"] %} + {% set name = attr["fluid_name"] %} {% set typename = attr["typename"] %} {% if typename is scalar %} AddInput("{{attr | to_scalar_tensor_name}}", "attribute {{i}} for {{op_name}} op from 0D Tensor.") @@ -153,15 +153,15 @@ KernelSignature {{op["op_name"] | to_pascal_case }}OpArgumentMapping(const Argum {% set kernel_in_type_list = kernel_config["dispatch"][kernel_func][0] %} if ( {%- for input in inputs %} - {%- if input["name"] in kernel_config["param"] %} + {%- if input["fluid_name"] in kernel_config["param"] %} {%- if kernel_in_type_list[input_idx.idx] == "dense" %} -ctx.IsDenseTensorInput("{{input["name"]}}"){{" && " if not loop.last}} +ctx.IsDenseTensorInput("{{input["fluid_name"]}}"){{" && " if not loop.last}} {%- elif kernel_in_type_list[input_idx.idx] == "selected_rows" %} -ctx.IsSelectedRowsInput("{{input["name"]}}"){{" && " if not loop.last}} +ctx.IsSelectedRowsInput("{{input["fluid_name"]}}"){{" && " if not loop.last}} {%- elif kernel_in_type_list[input_idx.idx] == "sparse_coo" %} -ctx.IsSparseCooTensorInput("{{input["name"]}}"){{" && " if not loop.last}} +ctx.IsSparseCooTensorInput("{{input["fluid_name"]}}"){{" && " if not loop.last}} {%- elif kernel_in_type_list[input_idx.idx] == "sparse_csr" %} -ctx.IsSparseCsrTensorInput("{{input["name"]}}"){{" && " if not loop.last}} +ctx.IsSparseCsrTensorInput("{{input["fluid_name"]}}"){{" && " if not loop.last}} {%- endif %} {% set input_idx.idx = input_idx.idx + 1 %} {%- endif %} @@ -210,8 +210,8 @@ PD_REGISTER_ARG_MAPPING_FN({{op["op_name"]}}, phi::{{op["op_name"] | to_pascal_c {% macro get_input_list(inputs, kernel_args) %}{# inline #} paddle::small_vector inputs { {%- for input in inputs %} -{%- if input["name"] in kernel_args %} -{{input["name"] | to_opmaker_name_cstr}}{{", " if not loop.last}} +{%- if input["fluid_name"] in kernel_args %} +{{input["fluid_name"] | to_opmaker_name_cstr}}{{", " if not loop.last}} {%- endif %} {%- endfor %} } @@ -219,8 +219,8 @@ paddle::small_vector inputs { {% macro get_an_attr(attr, kernel_args) %}{# inline #} {% set typename = attr["typename"] %} -{%- if attr["name"] in kernel_args %} -{% set name = attr["name"] %} +{%- if attr["fluid_name"] in kernel_args %} +{% set name = attr["fluid_name"] %} {% if typename is scalar %}{# scalar correspond to a dispensable input and an attr in opmaker #} attrs.emplace_back(ctx.HasInput("{{attr | to_scalar_tensor_name}}") ? "{{attr | to_scalar_tensor_name}}" : "{{name}}"); {%- elif typename == "IntArray" %} @@ -251,7 +251,7 @@ attrs.emplace_back("{{name}}"); {% macro get_output_list(outputs, kernel_args) %}{# inline #} paddle::small_vector outputs { {%- for output in outputs %} -{{output["name"] | to_opmaker_name_cstr}}{{", " if not loop.last}} +{{output["fluid_name"] | to_opmaker_name_cstr}}{{", " if not loop.last}} {%- endfor %} } {%- endmacro %} @@ -263,7 +263,7 @@ phi::KernelKey GetExpectedKernelType( {%if kernel["data_type"] is not none %}{# data type ---------------------------------#} {% if kernel["data_type"]["candidates"] | length == 1 %} {% set data_type_arg = kernel["data_type"]["candidates"][0] %} - {% set inputs = op["inputs"] | map(attribute="name") | list %} + {% set inputs = op["inputs"] | map(attribute="fluid_name") | list %} {% if data_type_arg in inputs %} auto data_type = framework::OperatorWithKernel::IndicateVarDataType(ctx, {{data_type_arg | to_opmaker_name}}); {% if kernel["data_type"]["to_complex_flag"][0] %} @@ -353,9 +353,8 @@ DECLARE_NO_NEED_BUFFER_VARS_INFERER({{op["op_name"] | to_pascal_case}}NoNeedBuff {% endif %} {% endmacro%} -{% macro register_op_with_components(op, op_dict) %} +{% macro register_op_with_components(op) %} {% set name = op["op_name"] %} -{% set phi_name = op["name"] %} REGISTER_OPERATOR({{name}}, ops::{{name | to_pascal_case}}Op, {% if not "forward" in op %}{# it is a forward op #} ops::{{name | to_pascal_case}}OpMaker, @@ -371,8 +370,8 @@ REGISTER_OPERATOR({{name}}, ops::{{name | to_pascal_case}}Op, {% if op is supports_inplace %}{# inplace#} ops::{{name | to_pascal_case}}InplaceInferer, {% endif %} -{% if "phi_backward" in op and op["phi_backward"] is not none and "composite" in op_dict[op["phi_backward"]] %} - ops::{{op["phi_backward"] | to_composite_grad_opmaker_name}}, +{% if "backward_composite" in op and op["backward_composite"] is not none %} + ops::{{op["backward_composite"] | to_composite_grad_opmaker_name}}, {% endif %} {% if op is supports_no_need_buffer %}{# no_need_buffer #} ops::{{name | to_pascal_case}}NoNeedBufferVarInferer, @@ -425,12 +424,12 @@ REGISTER_OP_VERSION({{name}}) {# --------------------------------------- backward op maker ---------------------------------------------- #} {% macro backward_op_maker(op, forward_op ) %} {% set name = op["op_name"] %} - {% set forward_input_names = op["forward"]["inputs"] | map(attribute="name") | list %} - {% set forward_output_names = op["forward"]["outputs"] | map(attribute="name") | list %} - {% set forward_attr_names = op["forward"]["attrs"] | map(attribute="name") | list %} - {% set forward_input_orig_names = forward_op["inputs"] | map(attribute="name") | list %} - {% set forward_output_orig_names = forward_op["outputs"] | map(attribute="name") | list %} - {% set forward_attr_orig_names = forward_op["attrs"] | map(attribute="name") | list %} + {% set forward_input_names = op["forward"]["inputs"] | map(attribute="fluid_name") | list %} + {% set forward_output_names = op["forward"]["outputs"] | map(attribute="fluid_name") | list %} + {% set forward_attr_names = op["forward"]["attrs"] | map(attribute="fluid_name") | list %} + {% set forward_input_orig_names = forward_op["inputs"] | map(attribute="fluid_name") | list %} + {% set forward_output_orig_names = forward_op["outputs"] | map(attribute="fluid_name") | list %} + {% set forward_attr_orig_names = forward_op["attrs"] | map(attribute="fluid_name") | list %} template class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker { public: @@ -441,8 +440,8 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker grad_op->SetType("{{name}}"); {% for input in op["inputs"] %} - grad_op->SetInput({{input["name"] | to_opmaker_name}}, this->{{extract_input_from_forward( - input["name"], + grad_op->SetInput({{input["fluid_name"] | to_opmaker_name}}, this->{{extract_input_from_forward( + input["fluid_name"], forward_input_names, forward_output_names, forward_input_orig_names, @@ -450,8 +449,8 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker {% endfor %} {% for output in op["outputs"] %} - grad_op->SetOutput({{output["name"] | to_opmaker_name}}, this->{{extract_output_from_forward( - output["name"], + grad_op->SetOutput({{output["fluid_name"] | to_opmaker_name}}, this->{{extract_output_from_forward( + output["fluid_name"], forward_input_names, forward_output_names, forward_input_orig_names, @@ -461,7 +460,7 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker grad_op->SetAttrMap(this->Attrs()); {% for attr in op["attrs"] %} - {% set attr_name = attr["name"] %} + {% set attr_name = attr["fluid_name"] %} {% if attr_name in forward_attr_names %} {% if attr["typename"] == "IntArray" %} {% if 'tensor_name' in attr or 'manual_flag' not in attr %} @@ -489,12 +488,12 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker {% macro backward_op_reused_maker(bw_op, forward_op, invoke_op) %} {% set name = bw_op["op_name"] %} - {% set forward_input_names = bw_op["forward"]["inputs"] | map(attribute="name") | list %} - {% set forward_output_names = bw_op["forward"]["outputs"] | map(attribute="name") | list %} - {% set forward_attr_names = bw_op["forward"]["attrs"] | map(attribute="name") | list %} - {% set forward_input_orig_names = forward_op["inputs"] | map(attribute="name") | list %} - {% set forward_output_orig_names = forward_op["outputs"] | map(attribute="name") | list %} - {% set forward_attr_orig_names = forward_op["attrs"] | map(attribute="name") | list %} + {% set forward_input_names = bw_op["forward"]["inputs"] | map(attribute="fluid_name") | list %} + {% set forward_output_names = bw_op["forward"]["outputs"] | map(attribute="fluid_name") | list %} + {% set forward_attr_names = bw_op["forward"]["attrs"] | map(attribute="fluid_name") | list %} + {% set forward_input_orig_names = forward_op["inputs"] | map(attribute="fluid_name") | list %} + {% set forward_output_orig_names = forward_op["outputs"] | map(attribute="fluid_name") | list %} + {% set forward_attr_orig_names = forward_op["attrs"] | map(attribute="fluid_name") | list %} template class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker { public: @@ -505,7 +504,7 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker grad_op->SetType("{{invoke_op["func"]}}"); {% for input in invoke_op["inputs"] %} - grad_op->SetInput({{input["name"] | to_opmaker_name}}, this->{{extract_input_from_forward( + grad_op->SetInput({{input["fluid_name"] | to_opmaker_name}}, this->{{extract_input_from_forward( input["value"], forward_input_names, forward_output_names, @@ -514,7 +513,7 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker {% endfor %} {% for output in invoke_op["outputs"] %} - grad_op->SetOutput({{output["name"] | to_opmaker_name}}, this->{{extract_output_from_forward( + grad_op->SetOutput({{output["fluid_name"] | to_opmaker_name}}, this->{{extract_output_from_forward( output["value"], forward_input_names, forward_output_names, @@ -524,42 +523,49 @@ class {{name | to_pascal_case}}OpMaker : public framework::SingleGradOpMaker {% endfor %} {% for attr in invoke_op["attrs"] %} - grad_op->SetAttr("{{attr["name"]}}", {{attr["value"]}}); + grad_op->SetAttr("{{attr["fluid_name"]}}", {{attr["value"]}}); {% endfor %} } }; {% endmacro %} -{% macro composite_grad_op_maker(composite_op_dict) %} - {% set op_name = composite_op_dict["name"] %} -class {{op_name | to_composite_grad_opmaker_name}} : public prim::GradCompositeOpMakerBase { +{% macro composite_grad_op_maker(backward_op) %} + {% set op_name = backward_op["op_name"] %} + {% set inputs = backward_op["inputs"] | to_variable_names("name")%} + {% set input_dict = backward_op["input_dict"] %} + {% set fluid_inputs = backward_op["inputs"] | to_variable_names("fluid_name")%} + {% set forward_fluid_inputs = backward_op["forward"]["inputs"] | to_variable_names("fluid_name")%} + {% set forward_fluid_outputs = backward_op["forward"]["outputs"] | to_variable_names("fluid_name")%} + {% set attrs = backward_op["attrs"] | to_variable_names("name") %} + {% set fluid_attrs = backward_op["attrs"] | to_variable_names("fluid_name") %} + {% set attr_dict = backward_op["attr_dict"] %} + {% set outputs = backward_op["outputs"] | to_variable_names("name")%} + {% set output_dict = backward_op["output_dict"] %} + {% set fluid_outputs = backward_op["outputs"] | to_variable_names("fluid_name")%} + {% set composite_func_info = backward_op["composite"] %} +class {{op_name | to_composite_grad_opmaker_name}} : public prim::CompositeGradOpMakerBase { public: - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; void Apply() override { //get inputs -{{construct_composite_input(composite_op_dict)}} +{{construct_composite_input(inputs, fluid_inputs, forward_fluid_inputs, forward_fluid_outputs, input_dict)}} //get attr -{{construct_composite_attr(composite_op_dict)}} +{{construct_composite_attr(attrs, fluid_attrs, attr_dict)}} //get output -{{construct_composite_output(composite_op_dict)}} +{{construct_composite_output(outputs, fluid_outputs, output_dict)}} //get output ptr -{{construct_composite_output_ptr(composite_op_dict)}} +{{construct_composite_output_ptr(outputs, output_dict)}} //get output orginal name -{{get_composite_output_orginal_name(composite_op_dict)}} +{{get_composite_output_orginal_name(outputs, output_dict)}} //call composite backward func -{{call_composite_backward_api(composite_op_dict)}} +{{call_composite_backward_api(composite_func_info)}} //recover output name -{{recover_composite_output_name(composite_op_dict)}} +{{recover_composite_output_name(outputs)}} } }; {%- endmacro %} -{% macro construct_composite_input(composite_op_dict) %} - {% set inputs = composite_op_dict["composite"]["phi_inputs"] %} - {% set input_dict = composite_op_dict["input_dict"] %} - {% set fluid_inputs = composite_op_dict["composite"]["fluid_inputs"] %} - {% set forward_fluid_inputs = composite_op_dict["forward"]["inputs"] | map(attribute="name") | list %} - {% set forward_fluid_outputs = composite_op_dict["forward"]["outputs"] | map(attribute="name") | list %} +{% macro construct_composite_input(inputs, fluid_inputs, forward_fluid_inputs, forward_fluid_outputs, input_dict) %} {% set inputs_length = inputs | length %} {% for i in range(inputs_length) %} {% set input_typename = input_dict[inputs[i]]["typename"] %} @@ -567,83 +573,75 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::GradCompositeO {% if fluid_inputs[i] in forward_fluid_inputs %} {% if input_typename == "Tensor" %} {% if input_optional_flag == True %} - paddle::optional {{inputs[i]}} = this->GetOptionalSingleForwardInput("{{fluid_inputs[i]}}"); - {% elif input_optional_flag == False %} - paddle::experimental::Tensor {{inputs[i]}} = this->GetSingleForwardInput("{{fluid_inputs[i]}}"); + auto {{inputs[i]}} = this->GetOptionalSingleForwardInput("{{fluid_inputs[i]}}"); + {% else %} + auto {{inputs[i]}} = this->GetSingleForwardInput("{{fluid_inputs[i]}}"); {% endif %} {% elif input_typename == "Tensor[]" %} {% if input_optional_flag == True %} - std::vector> {{inputs[i]}} = this->GetOptionalMultiForwardInput("{{fluid_inputs[i]}}"); - {% elif input_optional_flag == False %} - std::vector {{inputs[i]}} = this->GetMultiForwardInput("{{fluid_inputs[i]}}"); + auto {{inputs[i]}} = this->GetOptionalMultiForwardInput("{{fluid_inputs[i]}}"); + {% else %} + auto {{inputs[i]}} = this->GetMultiForwardInput("{{fluid_inputs[i]}}"); {% endif %} {% endif %} {% elif fluid_inputs[i] in forward_fluid_outputs %} {% if input_typename == "Tensor" %} {% if input_optional_flag == True %} - paddle::optional {{inputs[i]}} = this->GetOptionalSingleForwardOutput("{{fluid_inputs[i]}}"); - {% elif input_optional_flag == False %} - paddle::experimental::Tensor {{inputs[i]}} = this->GetSingleForwardOutput("{{fluid_inputs[i]}}"); + auto {{inputs[i]}} = this->GetOptionalSingleForwardOutput("{{fluid_inputs[i]}}"); + {% else %} + auto {{inputs[i]}} = this->GetSingleForwardOutput("{{fluid_inputs[i]}}"); {% endif %} {% elif input_typename == "Tensor[]" %} {% if input_optional_flag == True %} - std::vector> {{inputs[i]}} = this->GetOptionalMultiForwardOutput("{{fluid_inputs[i]}}"); - {% elif input_optional_flag == False %} - std::vector {{inputs[i]}} = this->GetMultiForwardOutput("{{fluid_inputs[i]}}"); + auto {{inputs[i]}} = this->GetOptionalMultiForwardOutput("{{fluid_inputs[i]}}"); + {% else %} + auto {{inputs[i]}} = this->GetMultiForwardOutput("{{fluid_inputs[i]}}"); {% endif %} {% endif %} {% elif fluid_inputs[i][:-5] in forward_fluid_outputs %} {% if input_typename == "Tensor" %} {% if input_optional_flag == True %} - paddle::optional {{inputs[i]}} = this->GetOptionalSingleOutputGrad("{{fluid_inputs[i][:-5]}}"); - {% elif input_optional_flag == False %} - paddle::experimental::Tensor {{inputs[i]}} = this->GetSingleOutputGrad("{{fluid_inputs[i][:-5]}}"); + auto {{inputs[i]}} = this->GetOptionalSingleOutputGrad("{{fluid_inputs[i][:-5]}}"); + {% else %} + auto {{inputs[i]}} = this->GetSingleOutputGrad("{{fluid_inputs[i][:-5]}}"); {% endif %} {% elif input_typename == "Tensor[]" %} {% if input_optional_flag == True %} - std::vector> {{inputs[i]}} = this->GetOptionalMultiOutputGrad("{{fluid_inputs[i][:-5]}}"); - {% elif input_optional_flag == False %} - std::vector {{inputs[i]}} = this->GetMultiOutputGrad("{{fluid_inputs[i][:-5]}}"); + auto {{inputs[i]}} = this->GetOptionalMultiOutputGrad("{{fluid_inputs[i][:-5]}}"); + {% else %} + auto {{inputs[i]}} = this->GetMultiOutputGrad("{{fluid_inputs[i][:-5]}}"); {%- endif %} {%- endif %} {%- endif %} {%- endfor %} {%- endmacro %} -{% macro construct_composite_attr(composite_op_dict) %} - {% set attrs = composite_op_dict["composite"]["phi_attrs"] %} - {% set fluid_attrs = composite_op_dict["composite"]["fluid_attrs"] %} - {% set fluid_attrs_dict = composite_op_dict["attr_dict"] %} +{% macro construct_composite_attr(attrs, fluid_attrs, attr_dict) %} {% set attrs_length = attrs | length %} {% for i in range(attrs_length) %} - {% set attrs_data_type = fluid_attrs_dict[fluid_attrs[i]]["typename"] | to_op_attr_type %} - {{attrs_data_type}} {{attrs[i]}} = this->Attr<{{attrs_data_type}}>("{{fluid_attrs[i]}}"); + {% set attrs_data_type = attr_dict[attrs[i]]["typename"] | to_op_attr_type %} + const {{attrs_data_type}} {{attrs[i]}} = this->Attr<{{attrs_data_type}}>("{{fluid_attrs[i]}}"); {% endfor %} {%- endmacro %} -{% macro construct_composite_output(composite_op_dict) %} - {% set outputs = composite_op_dict["composite"]["phi_outputs"] %} - {% set fluid_outputs = composite_op_dict["composite"]["fluid_outputs"] %} - {% set outputs_dict = composite_op_dict["output_dict"] %} +{% macro construct_composite_output(outputs, fluid_outputs, output_dict) %} {% set outputs_length = outputs | length %} {% for i in range(outputs_length) %} - {% set output_typename = outputs_dict[outputs[i]]["typename"] %} + {% set output_typename = output_dict[outputs[i]]["typename"] %} {% if output_typename == "Tensor" %} - paddle::experimental::Tensor {{outputs[i] + "_t"}} = this->GetSingleInputGrad("{{fluid_outputs[i][:-5]}}"); + auto {{outputs[i] + "_t"}} = this->GetSingleInputGrad("{{fluid_outputs[i][:-5]}}"); {% elif output_typename == "Tensor[]" %} - std::vector {{outputs[i] + "_t"}} = this->GetMultiInputGrad("{{fluid_outputs[i][:-5]}}"); + auto {{outputs[i] + "_t"}} = this->GetMultiInputGrad("{{fluid_outputs[i][:-5]}}"); {%- endif %} {%- endfor %} {%- endmacro %} -{% macro construct_composite_output_ptr(composite_op_dict) %} - {% set outputs = composite_op_dict["composite"]["phi_outputs"] %} - {% set outputs_dict = composite_op_dict["output_dict"] %} +{% macro construct_composite_output_ptr(outputs, output_dict) %} {% set outputs_length = outputs | length %} {% for i in range(outputs_length) %} - {% set output_typename = outputs_dict[outputs[i]]["typename"] %} + {% set output_typename = output_dict[outputs[i]]["typename"] %} {% if output_typename == "Tensor" %} - paddle::experimental::Tensor* {{outputs[i]}} = this->GetOutputPtr(&{{outputs[i]+ "_t"}}); + auto {{outputs[i]}} = this->GetOutputPtr(&{{outputs[i]+ "_t"}}); {% elif output_typename == "Tensor[]" %} std::vector {{outputs[i]}}({{outputs[i] + "_t"}}.size()); for(size_t i = 0; i < {{outputs[i]}}.size(); ++i){ @@ -654,27 +652,24 @@ class {{op_name | to_composite_grad_opmaker_name}} : public prim::GradCompositeO {%- endfor %} {%- endmacro %} -{% macro get_composite_output_orginal_name(composite_op_dict) %} - {% set outputs = composite_op_dict["composite"]["phi_outputs"] %} - {% set outputs_dict = composite_op_dict["output_dict"] %} +{% macro get_composite_output_orginal_name(outputs, output_dict) %} {% set outputs_length = outputs | length %} {% for i in range(outputs_length) %} - {% set output_typename = outputs_dict[outputs[i]]["typename"] %} + {% set output_typename = output_dict[outputs[i]]["typename"] %} {% if output_typename == "Tensor" %} - std::string {{outputs[i] + "_name"}} = this->GetOutputName({{outputs[i] + "_t"}}); + auto {{outputs[i] + "_name"}} = this->GetOutputName({{outputs[i] + "_t"}}); {% elif output_typename == "Tensor[]" %} - std::vector {{outputs[i] + "_name"}} = this->GetOutputName({{outputs[i] + "_t"}}); + auto {{outputs[i] + "_name"}} = this->GetOutputName({{outputs[i] + "_t"}}); {%- endif %} {%- endfor %} {%- endmacro %} -{% macro call_composite_backward_api(composite_op_dict) %} - VLOG(3) << "Runing {{composite_op_dict["composite"]["func_name"]}} composite func"; - prim::{{composite_op_dict["composite"]["func_name"]}}({{composite_op_dict["composite"]["func_args"]}}); +{% macro call_composite_backward_api(composite_func_info) %} + VLOG(3) << "Runing {{composite_func_info["func_name"]}} composite func"; + prim::{{composite_func_info["func_name"]}}({{composite_func_info["func_args"]}}); {%- endmacro %} -{% macro recover_composite_output_name(composite_op_dict) %} - {% set outputs = composite_op_dict["composite"]["phi_outputs"] %} +{% macro recover_composite_output_name(outputs) %} {% set outputs_length = outputs | length %} {% for i in range(outputs_length) %} this->RecoverOutputName({{outputs[i] + "_t"}}, {{outputs[i] + "_name"}}); diff --git a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc index 2b337887faa3f..25e6ad9b65cc0 100644 --- a/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc +++ b/paddle/fluid/operators/reduce_ops/reduce_sum_op.cc @@ -64,9 +64,9 @@ class ReduceSumOpGradMaker : public framework::SingleGradOpMaker { } }; -class ReduceSumCompositeGradOpMaker : public prim::GradCompositeOpMakerBase { +class ReduceSumCompositeGradOpMaker : public prim::CompositeGradOpMakerBase { public: - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; void Apply() override { // get inputs paddle::experimental::Tensor x = this->GetSingleForwardInput("X"); diff --git a/paddle/fluid/prim/tests/test_static_prim.cc b/paddle/fluid/prim/tests/test_static_prim.cc index fe7a6ca404044..313a3ccc99b74 100644 --- a/paddle/fluid/prim/tests/test_static_prim.cc +++ b/paddle/fluid/prim/tests/test_static_prim.cc @@ -135,9 +135,9 @@ struct TestBaseProgram { int idx_{0}; }; -class TestGradCompositeGradMaker : public GradCompositeOpMakerBase { +class TestCompositeGradMaker : public CompositeGradOpMakerBase { public: - using prim::GradCompositeOpMakerBase::GradCompositeOpMakerBase; + using prim::CompositeGradOpMakerBase::CompositeGradOpMakerBase; void Apply() override {} }; @@ -177,7 +177,7 @@ TEST(StaticPrim, TanhBackwardComposite) { std::vector> grad_ops = std::move(framework::OpInfoMap::Instance() .Get(forward_opdesc->Type()) - .GradCompOpMaker()(*forward_opdesc, + .CompGradOpMaker()(*forward_opdesc, std::unordered_set(), &grad_to_var, target_block, @@ -250,11 +250,11 @@ TEST(StaticCompositeGradMaker, TestMutiInputMethod) { auto* forward_opdesc = target_block->AllOps()[0]; std::unordered_map grad_to_var; std::vector grad_sub_block; - auto test = TestGradCompositeGradMaker(*forward_opdesc, - std::unordered_set(), - &grad_to_var, - target_block, - grad_sub_block); + auto test = TestCompositeGradMaker(*forward_opdesc, + std::unordered_set(), + &grad_to_var, + target_block, + grad_sub_block); test(); std::vector muti_fw_input = test.GetMultiForwardInput("X"); @@ -312,11 +312,11 @@ TEST(StaticCompositeGradMaker, TestMutiOutputMethod) { auto* forward_opdesc = target_block->AllOps()[0]; std::unordered_map grad_to_var; std::vector grad_sub_block; - auto test = TestGradCompositeGradMaker(*forward_opdesc, - std::unordered_set(), - &grad_to_var, - target_block, - grad_sub_block); + auto test = TestCompositeGradMaker(*forward_opdesc, + std::unordered_set(), + &grad_to_var, + target_block, + grad_sub_block); test(); paddle::experimental::Tensor fw_input = test.GetSingleForwardInput("X"); paddle::optional opt_fw_input = diff --git a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h index c2e7ca4ec57e2..e391d8ac5300b 100644 --- a/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h +++ b/paddle/fluid/prim/utils/static/composite_grad_desc_maker.h @@ -41,9 +41,9 @@ namespace prim { argument DropEmptyIG in the derived classes. */ -class GradCompositeOpMakerBase { +class CompositeGradOpMakerBase { public: - explicit GradCompositeOpMakerBase( + explicit CompositeGradOpMakerBase( const framework::OpDesc& fwd_op, const std::unordered_set& no_grad_set, std::unordered_map* grad_to_var, @@ -61,7 +61,7 @@ class GradCompositeOpMakerBase { acting_program_.MutableBlock(0)); } - virtual ~GradCompositeOpMakerBase() = default; + virtual ~CompositeGradOpMakerBase() = default; virtual std::vector> operator()() { this->Apply(); diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 36e2436406812..5569657707389 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -1251,7 +1251,7 @@ All parameter, weight, gradient are variables in Paddle. auto op_info = framework::OpInfoMap::Instance().Get(op_desc.Type()); auto grad_op_maker = op_info.GradOpMaker(); - auto grad_comp_op_maker = op_info.GradCompOpMaker(); + auto grad_comp_op_maker = op_info.CompGradOpMaker(); if ((grad_op_maker == nullptr) && (grad_comp_op_maker == nullptr)) { // Normally, proto_ should not be null, except some special @@ -1259,7 +1259,7 @@ All parameter, weight, gradient are variables in Paddle. std::string type = op_info.proto_ ? op_info.proto_->type() : "unknown"; PADDLE_THROW(platform::errors::NotFound( - "Neither operator %s's GradOpMaker nor GradCompOpMaker has " + "Neither operator %s's GradOpMaker nor CompGradOpMaker has " "been registered.\nPlease check whether (%s) operator has " "gradient operator.\nIf not, please set stop_gradient to be " "True for its input and output variables using " @@ -1268,10 +1268,10 @@ All parameter, weight, gradient are variables in Paddle. type.c_str())); } - // In PrimEnabled mode, the priority of GradCompOpMaker is greater + // In PrimEnabled mode, the priority of CompGradOpMaker is greater // than GradCompMaker as we need split first-order grad operator into // primitive operators for compiler. In PrimDisabled mode, the - // priority of GradCompOpMaker is less than GradCompMaker for better + // priority of CompGradOpMaker is less than GradCompMaker for better // performance. std::vector> grad_op_descs; if (paddle::prim::PrimCommonUtils::IsBwdPrimEnabled()) { From c18fddd348391b3008807d68061ed7e92bd4b87a Mon Sep 17 00:00:00 2001 From: niuliling123 <51102941+niuliling123@users.noreply.github.com> Date: Tue, 31 Jan 2023 16:49:03 +0800 Subject: [PATCH 42/89] Save nan log to file when output_dir is setted (#49200) --- .../framework/details/nan_inf_utils_detail.cc | 123 ++---------- .../framework/details/nan_inf_utils_detail.cu | 34 +++- .../framework/details/nan_inf_utils_detail.h | 187 +++++++++++++++++- paddle/fluid/pybind/pybind.cc | 4 + .../fluid/tests/unittests/test_nan_inf_dir.py | 108 ++++++++++ 5 files changed, 344 insertions(+), 112 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_nan_inf_dir.py diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cc b/paddle/fluid/framework/details/nan_inf_utils_detail.cc index 30046b2d1d44e..dd7791af85447 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cc +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cc @@ -30,6 +30,23 @@ DECLARE_int32(check_nan_inf_level); namespace paddle { namespace framework { namespace details { +struct DebugTools { + DebugTools() {} + std::string path = ""; +}; +static DebugTools debug_nan_inf; + +void SetNanInfDebugPath(const std::string& nan_inf_path) { + debug_nan_inf.path = nan_inf_path; + VLOG(4) << "Set the log's path of debug tools : " << nan_inf_path; +} + +std::string GetNanPath() { + if (debug_nan_inf.path.empty()) { + return ""; + } + return debug_nan_inf.path + "/"; +} static std::once_flag white_list_init_flag; @@ -134,112 +151,6 @@ static void InitWhiteListFormEnv() { } } -template < - typename T, - std::enable_if_t>::value && - !std::is_same>::value, - bool> = true> -static void CheckNanInfCpuImpl(const T* value_ptr, - const int64_t numel, - const std::string& cpu_hint_str) { - using MT = typename phi::dtype::template MPTypeTrait::Type; - -#ifdef _OPENMP - // Use maximum 4 threads to collect the nan and inf information. - int num_threads = std::max(omp_get_num_threads(), 1); - num_threads = std::min(num_threads, 4); -#else - int num_threads = 1; -#endif - - std::vector thread_num_nan(num_threads, 0); - std::vector thread_num_inf(num_threads, 0); - std::vector thread_min_value(num_threads, static_cast(value_ptr[0])); - std::vector thread_max_value(num_threads, static_cast(value_ptr[0])); - std::vector thread_mean_value(num_threads, static_cast(0)); - -#ifdef _OPENMP -#pragma omp parallel num_threads(num_threads) -#endif - { -#ifdef _OPENMP - int64_t tid = omp_get_thread_num(); - int64_t chunk_size = (numel + num_threads - 1) / num_threads; - int64_t begin = tid * chunk_size; - int64_t end = chunk_size + begin > numel ? numel : chunk_size + begin; -#else - int64_t tid = 0; - int64_t begin = 0; - int64_t end = numel; -#endif - for (int64_t i = begin; i < end; ++i) { - MT value = static_cast(value_ptr[i]); - - thread_min_value[tid] = std::min(thread_min_value[tid], value); - thread_max_value[tid] = std::max(thread_max_value[tid], value); - thread_mean_value[tid] += value / static_cast(numel); - - if (std::isnan(value)) { - thread_num_nan[tid] += 1; - } else if (std::isinf(value)) { - thread_num_inf[tid] += 1; - } - } - } - - int64_t num_nan = 0; - int64_t num_inf = 0; - MT min_value = thread_min_value[0]; - MT max_value = thread_max_value[0]; - MT mean_value = static_cast(0); - for (int i = 0; i < num_threads; ++i) { - num_nan += thread_num_nan[i]; - num_inf += thread_num_inf[i]; - min_value = std::min(thread_min_value[i], min_value); - max_value = std::max(thread_max_value[i], max_value); - mean_value += thread_mean_value[i]; - } - - PrintForDifferentLevel(cpu_hint_str.c_str(), - numel, - num_nan, - num_inf, - max_value, - min_value, - mean_value, - FLAGS_check_nan_inf_level); -} - -template < - typename T, - std::enable_if_t>::value || - std::is_same>::value, - bool> = true> -void CheckNanInfCpuImpl(const T* value_ptr, - const int64_t numel, - const std::string& cpu_hint_str) { - using RealType = typename T::value_type; - - RealType real_sum = 0.0f, imag_sum = 0.0f; - -#ifdef _OPENMP -#pragma omp parallel for reduction(+ : real_sum) reduction(+ : imag_sum) -#endif - for (int64_t i = 0; i < numel; ++i) { - T value = value_ptr[i]; - real_sum += (value.real - value.real); - imag_sum += (value.imag - value.imag); - } - - if (std::isnan(real_sum) || std::isinf(real_sum) || std::isnan(imag_sum) || - std::isinf(imag_sum)) { - // hot fix for compile failed in gcc4.8 - // here also need print detail info of nan or inf later - PADDLE_THROW(platform::errors::PreconditionNotMet( - "There are NAN or INF in %s.", cpu_hint_str)); - } -} - template <> template void TensorCheckerVisitor::apply( diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.cu b/paddle/fluid/framework/details/nan_inf_utils_detail.cu index 8754a33b663db..514e5aa3e5d56 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.cu +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.cu @@ -322,18 +322,26 @@ __global__ void FindGlobalMaxMinAndPrint(const int64_t* block_num_nan_ptr, } template -static char* GetGpuHintStringPtr(const phi::GPUContext& ctx, - const std::string& op_type, +inline std::string GetHintString(const std::string& op_type, const std::string& var_name, - int dev_id) { + const phi::Place& place, + int dev_id = -1) { + std::string op_var = GetCpuHintString(op_type, var_name, place, dev_id); PADDLE_ENFORCE_EQ( (dev_id >= 0 && dev_id < multi_op_var2gpu_str_mutex().size()), true, platform::errors::OutOfRange("GPU dev_id must >=0 and < dev_count=%d", multi_op_var2gpu_str_mutex().size())); + return op_var; +} +template +static char* GetGpuHintStringPtr(const phi::GPUContext& ctx, + const std::string& op_type, + const std::string& var_name, + int dev_id) { std::string op_var = - GetCpuHintString(op_type, var_name, ctx.GetPlace(), dev_id); + GetHintString(op_type, var_name, ctx.GetPlace(), dev_id); char* gpu_str_ptr = nullptr; { @@ -396,6 +404,24 @@ void TensorCheckerVisitor::apply( auto* dev_ctx = reinterpret_cast( platform::DeviceContextPool::Instance().Get(tensor.place())); int dev_id = tensor.place().device; + // Write log to file + auto file_path = GetNanPath(); + if (file_path.size() > 0) { + phi::DenseTensor cpu_tensor; + platform::CPUPlace cpu_place; + cpu_tensor.Resize(tensor.dims()); + // 1. copy from gpu to cpu + paddle::framework::TensorCopySync(tensor, cpu_place, &cpu_tensor); + auto* dev_ctx = reinterpret_cast( + platform::DeviceContextPool::Instance().Get(tensor.place())); + const std::string debug_info = + GetHintString(op_type, var_name, place, dev_id); + // 2. write log to file + CheckNanInfCpuImpl(cpu_tensor.data(), tensor.numel(), debug_info, "gpu"); + return; + } + + // Write log to window char* gpu_str_ptr = GetGpuHintStringPtr(*dev_ctx, op_type, var_name, dev_id); diff --git a/paddle/fluid/framework/details/nan_inf_utils_detail.h b/paddle/fluid/framework/details/nan_inf_utils_detail.h index 0adf23fd02921..fee2a52b428b2 100644 --- a/paddle/fluid/framework/details/nan_inf_utils_detail.h +++ b/paddle/fluid/framework/details/nan_inf_utils_detail.h @@ -13,17 +13,33 @@ // limitations under the License. #pragma once - +#include +#include #include - #include "paddle/fluid/framework/tensor.h" #include "paddle/fluid/platform/complex.h" #include "paddle/fluid/platform/place.h" +#include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/kernels/funcs/eigen/extensions.h" + +#ifdef _WIN32 +#include +#include +#define MKDIR(path) _mkdir(path) +#else +#include +#define MKDIR(path) mkdir(path, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH) +#endif +DECLARE_int32(check_nan_inf_level); namespace paddle { namespace framework { namespace details { +void SetNanInfDebugPath(const std::string& nan_inf_path); + +std::string GetNanPath(); + template ::value, bool> = true> @@ -93,6 +109,49 @@ HOSTDEVICE void PrintForDifferentLevel(const char* debug_info, } } +template +void PrintForDifferentLevelFile(const char* debug_info, + int64_t numel, + int64_t num_nan, + int64_t num_inf, + MT max_value, + MT min_value, + MT mean_value, + int check_nan_inf_level, + const std::string& log_name) { + int dev_id = 0; +#ifdef PADDLE_WITH_HIP + hipGetDevice(&dev_id); +#elif PADDLE_WITH_CUDA + cudaGetDevice(&dev_id); +#endif + auto file_path = GetNanPath(); + MKDIR(file_path.c_str()); + std::string file_name = "worker_" + log_name + "." + std::to_string(dev_id); + std::string path = file_path + file_name; + std::ofstream outfile(path, std::ios::app); + if (!outfile.is_open()) { + return; + } + + if (num_nan > 0 || num_inf > 0) { + outfile << "[PRECISION] [ERROR] in " << debug_info + << ", numel=" << static_cast(numel) // NOLINT + << ", num_nan=" << static_cast(num_nan) // NOLINT + << ", num_inf=" << static_cast(num_inf) // NOLINT + << ", max=" << static_cast(max_value) + << ", min=" << static_cast(min_value) + << ", mean=" << static_cast(mean_value) << std::endl; + } else if (NeedPrint(max_value, min_value, check_nan_inf_level)) { + outfile << "[PRECISION] in " << debug_info + << ", numel=" << static_cast(numel) // NOLINT + << ", max=" << static_cast(max_value) + << ", min=" << static_cast(min_value) + << ", mean=" << static_cast(mean_value) << std::endl; + } + outfile.close(); +} + template inline std::string GetCpuHintString(const std::string& op_type, const std::string& var_name, @@ -120,6 +179,130 @@ inline std::string GetCpuHintString(const std::string& op_type, return ss.str(); } +template < + typename T, + std::enable_if_t>::value && + !std::is_same>::value, + bool> = true> +static void CheckNanInfCpuImpl(const T* value_ptr, + const int64_t numel, + const std::string& cpu_hint_str, + const std::string log_name = "cpu") { + using MT = typename phi::dtype::template MPTypeTrait::Type; + +#ifdef _OPENMP + // Use maximum 4 threads to collect the nan and inf information. + int num_threads = std::max(omp_get_num_threads(), 1); + num_threads = std::min(num_threads, 4); +#else + int num_threads = 1; +#endif + + std::vector thread_num_nan(num_threads, 0); + std::vector thread_num_inf(num_threads, 0); + std::vector thread_min_value(num_threads, static_cast(value_ptr[0])); + std::vector thread_max_value(num_threads, static_cast(value_ptr[0])); + std::vector thread_mean_value(num_threads, static_cast(0)); + +#ifdef _OPENMP +#pragma omp parallel num_threads(num_threads) +#endif + { +#ifdef _OPENMP + int64_t tid = omp_get_thread_num(); + int64_t chunk_size = (numel + num_threads - 1) / num_threads; + int64_t begin = tid * chunk_size; + int64_t end = chunk_size + begin > numel ? numel : chunk_size + begin; +#else + int64_t tid = 0; + int64_t begin = 0; + int64_t end = numel; +#endif + for (int64_t i = begin; i < end; ++i) { + MT value = static_cast(value_ptr[i]); + + thread_min_value[tid] = std::min(thread_min_value[tid], value); + thread_max_value[tid] = std::max(thread_max_value[tid], value); + thread_mean_value[tid] += value / static_cast(numel); + + if (std::isnan(value)) { + thread_num_nan[tid] += 1; + } else if (std::isinf(value)) { + thread_num_inf[tid] += 1; + } + } + } + + int64_t num_nan = 0; + int64_t num_inf = 0; + MT min_value = thread_min_value[0]; + MT max_value = thread_max_value[0]; + MT mean_value = static_cast(0); + for (int i = 0; i < num_threads; ++i) { + num_nan += thread_num_nan[i]; + num_inf += thread_num_inf[i]; + min_value = std::min(thread_min_value[i], min_value); + max_value = std::max(thread_max_value[i], max_value); + mean_value += thread_mean_value[i]; + } + auto file_path = GetNanPath(); + // Write log to file + if (file_path.size() > 0) { + VLOG(4) << "[FLAGS_check_nan_inf_level=" << FLAGS_check_nan_inf_level + << "]. Write log to " << file_path; + PrintForDifferentLevelFile(cpu_hint_str.c_str(), + numel, + num_nan, + num_inf, + max_value, + min_value, + mean_value, + FLAGS_check_nan_inf_level, + log_name); + return; + } + + PrintForDifferentLevel(cpu_hint_str.c_str(), + numel, + num_nan, + num_inf, + max_value, + min_value, + mean_value, + FLAGS_check_nan_inf_level); +} + +template < + typename T, + std::enable_if_t>::value || + std::is_same>::value, + bool> = true> +void CheckNanInfCpuImpl(const T* value_ptr, + const int64_t numel, + const std::string& cpu_hint_str, + const std::string log_name = "cpu") { + using RealType = typename T::value_type; + + RealType real_sum = 0.0f, imag_sum = 0.0f; + +#ifdef _OPENMP +#pragma omp parallel for reduction(+ : real_sum) reduction(+ : imag_sum) +#endif + for (int64_t i = 0; i < numel; ++i) { + T value = value_ptr[i]; + real_sum += (value.real - value.real); + imag_sum += (value.imag - value.imag); + } + + if (std::isnan(real_sum) || std::isinf(real_sum) || std::isnan(imag_sum) || + std::isinf(imag_sum)) { + // hot fix for compile failed in gcc4.8 + // here also need print detail info of nan or inf later + PADDLE_THROW(platform::errors::PreconditionNotMet( + "There are NAN or INF in %s.", cpu_hint_str)); + } +} + template struct TensorCheckerVisitor { TensorCheckerVisitor(const std::string& o, diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index 5569657707389..020a926b4739e 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -34,6 +34,7 @@ limitations under the License. */ #include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/data_layout.h" #include "paddle/fluid/framework/data_type_transform.h" +#include "paddle/fluid/framework/details/nan_inf_utils_detail.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor_cache.h" #include "paddle/fluid/framework/executor_gc_helper.h" @@ -2671,6 +2672,9 @@ All parameter, weight, gradient are variables in Paddle. m.def("use_layout_autotune", [] { return egr::Controller::Instance().UseLayoutAutoTune(); }); + // Add the api for nan op debug + m.def("set_nan_inf_debug_path", + &paddle::framework::details::SetNanInfDebugPath); BindFleetWrapper(&m); BindIO(&m); diff --git a/python/paddle/fluid/tests/unittests/test_nan_inf_dir.py b/python/paddle/fluid/tests/unittests/test_nan_inf_dir.py new file mode 100644 index 0000000000000..49882d192f9f6 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_nan_inf_dir.py @@ -0,0 +1,108 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import sys +import unittest + +import numpy as np + +import paddle + + +class TestNanInfDirCheckResult(unittest.TestCase): + def generate_inputs(self, shape, dtype="float32"): + data = np.random.random(size=shape).astype(dtype) + # [-10, 10) + x = (data * 20 - 10) * np.random.randint( + low=0, high=2, size=shape + ).astype(dtype) + y = np.random.randint(low=0, high=2, size=shape).astype(dtype) + return x, y + + def get_reference_num_nan_inf(self, x): + out = np.log(x) + num_nan = np.sum(np.isnan(out)) + num_inf = np.sum(np.isinf(out)) + print("[reference] num_nan={}, num_inf={}".format(num_nan, num_inf)) + return num_nan, num_inf + + def get_num_nan_inf( + self, x_np, use_cuda=True, add_assert=False, pt="nan_inf_log_dir" + ): + num_nan = 0 + num_inf = 0 + if add_assert: + if use_cuda: + paddle.device.set_device("gpu:0") + else: + paddle.device.set_device("cpu") + x = paddle.to_tensor(x_np) + out = paddle.log(x) + sys.stdout.flush() + if not use_cuda: + os.path.exists(pt) + num_nan = 0 + num_inf = 0 + for root, dirs, files in os.walk(pt): + for file_name in files: + if file_name.startswith('worker_cpu'): + file_path = os.path.join(root, file_name) + with open(file_path, "rb") as fp: + for e in fp: + err_str_list = ( + str(e) + .replace("(", " ") + .replace(")", " ") + .replace(",", " ") + .split(" ") + ) + for err_str in err_str_list: + if "num_nan" in err_str: + num_nan = int(err_str.split("=")[1]) + elif "num_inf" in err_str: + num_inf = int(err_str.split("=")[1]) + print( + "[paddle] num_nan={}, num_inf={}".format(num_nan, num_inf) + ) + return num_nan, num_inf + + def test_num_nan_inf(self): + path = "nan_inf_log_dir" + paddle.fluid.core.set_nan_inf_debug_path(path) + + def _check_num_nan_inf(use_cuda): + shape = [32, 32] + x_np, _ = self.generate_inputs(shape) + num_nan_np, num_inf_np = self.get_reference_num_nan_inf(x_np) + add_assert = (num_nan_np + num_inf_np) > 0 + num_nan, num_inf = self.get_num_nan_inf( + x_np, use_cuda, add_assert, path + ) + if not use_cuda: + assert num_nan == num_nan_np and num_inf == num_inf_np + + paddle.set_flags( + {"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 3} + ) + _check_num_nan_inf(use_cuda=False) + if paddle.fluid.core.is_compiled_with_cuda(): + _check_num_nan_inf(use_cuda=True) + x = paddle.to_tensor([2, 3, 4], 'float32') + y = paddle.to_tensor([1, 5, 2], 'float32') + z = paddle.add(x, y) + + +if __name__ == '__main__': + unittest.main() From c3cd8502bcd50a40834622b21ec3d1fb9549798e Mon Sep 17 00:00:00 2001 From: Zhang Jun Date: Tue, 31 Jan 2023 17:03:16 +0800 Subject: [PATCH 43/89] [pass] Upgrade Constant Folding Pass (#49908) --- paddle/fluid/framework/ir/constant_folding_pass.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/paddle/fluid/framework/ir/constant_folding_pass.cc b/paddle/fluid/framework/ir/constant_folding_pass.cc index cd069e474e648..0bcd7a733dde7 100644 --- a/paddle/fluid/framework/ir/constant_folding_pass.cc +++ b/paddle/fluid/framework/ir/constant_folding_pass.cc @@ -143,6 +143,10 @@ void ConstantFoldingPass::ApplyImpl(ir::Graph *graph) const { } out_desc->SetShape(out_shape); out_desc->SetPersistable(true); + auto *var_desc_out = op_node->Op()->Block()->Var(out_name); + var_desc_out->SetShape(out_shape); + var_desc_out->SetPersistable(true); + var_desc_out->Flush(); auto *global_out_tensor = scope->Var(out_name)->GetMutable(); *global_out_tensor = *local_out_tensor; From 0d9185b99151d851b9d597dd860e47b28d59cfc1 Mon Sep 17 00:00:00 2001 From: wenbin Date: Tue, 31 Jan 2023 17:21:08 +0800 Subject: [PATCH 44/89] Unary (#49914) * disable integer * disable integer * add cast layer --- .../inference/tensorrt/convert/unary_op.cc | 26 ++++++++++-- .../ir/inference/test_trt_convert_unary.py | 41 +++++++++++++++++++ 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/unary_op.cc b/paddle/fluid/inference/tensorrt/convert/unary_op.cc index 9279e25a1836c..3186bf5fd33d0 100644 --- a/paddle/fluid/inference/tensorrt/convert/unary_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/unary_op.cc @@ -52,13 +52,33 @@ class UnaryOpConverter : public OpConverter { nvinfer1::ITensor* input_tensor = engine_->GetITensor(op_desc.Input("X")[0]); auto op_pair = ops.find(op_type_); - - nvinfer1::IUnaryLayer* layer = nullptr; + nvinfer1::ILayer* layer; +#if !IS_TRT_VERSION_GE(8500) + nvinfer1::DataType org_type = input_tensor->getType(); + bool cast = org_type == nvinfer1::DataType::kINT8 || + org_type == nvinfer1::DataType::kINT32; + if (cast) { + layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input_tensor); + if (engine_->precision() == AnalysisConfig::Precision::kFloat32) { + layer->setOutputType(0, nvinfer1::DataType::kFLOAT); + } else { + layer->setOutputType(0, nvinfer1::DataType::kHALF); + } + input_tensor = layer->getOutput(0); + } +#endif for (auto trt_op : op_pair->second) { layer = TRT_ENGINE_ADD_LAYER(engine_, Unary, *input_tensor, trt_op); input_tensor = layer->getOutput(0); } - +#if !IS_TRT_VERSION_GE(8500) + // type restore + if (cast) { + layer = TRT_ENGINE_ADD_LAYER(engine_, Identity, *input_tensor); + layer->setOutputType(0, org_type); + input_tensor = layer->getOutput(0); + } +#endif auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, op_type_, {output_name}, test_mode); } diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py index 673ab597659fe..97e83e79714a5 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_unary.py @@ -42,6 +42,14 @@ def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): else: return np.random.random([batch, 3, 32, 32]).astype(np.float32) + def generate_int_input(dims, batch, attrs: List[Dict[str, Any]]): + if dims == 2: + return np.random.random([3, 32]).astype(np.int32) + elif dims == 3: + return np.random.random([3, 32, 32]).astype(np.int32) + else: + return np.random.random([batch, 3, 32, 32]).astype(np.int32) + for dims in [2, 3, 4]: for batch in [1, 4]: for op_type in [ @@ -96,6 +104,39 @@ def generate_input1(dims, batch, attrs: List[Dict[str, Any]]): yield program_config + for op_type in [ + "exp", + "abs", + ]: + self.dims = dims + self.op_type = op_type + dics = [{}] + + ops_config = [ + { + "op_type": op_type, + "op_inputs": {"X": ["input_data"]}, + "op_outputs": {"Out": ["output_data"]}, + "op_attrs": dics[0], + } + ] + ops = self.generate_op_config(ops_config) + + program_config = ProgramConfig( + ops=ops, + weights={}, + inputs={ + "input_data": TensorConfig( + data_gen=partial( + generate_int_input, dims, batch, dics + ) + ) + }, + outputs=["output_data"], + ) + + yield program_config + def sample_predictor_configs( self, program_config ) -> (paddle_infer.Config, List[int], float): From a5f2e1f7dbfab674b28fcb3fe26242677514c48b Mon Sep 17 00:00:00 2001 From: wangshengxiang <121413869+shengxiangwang@users.noreply.github.com> Date: Tue, 31 Jan 2023 18:31:37 +0800 Subject: [PATCH 45/89] bind pixel_shuffle & pixel_shuffle_grad op for xpu (#50090) --- paddle/phi/backends/xpu/xpu2_op_list.cc | 2 + .../kernels/xpu/pixel_shuffle_grad_kernel.cc | 54 +++++++ .../phi/kernels/xpu/pixel_shuffle_kernel.cc | 54 +++++++ .../xpu/test_pixel_shuffle_op_xpu.py | 143 ++++++++++++++++++ 4 files changed, 253 insertions(+) create mode 100644 paddle/phi/kernels/xpu/pixel_shuffle_grad_kernel.cc create mode 100644 paddle/phi/kernels/xpu/pixel_shuffle_kernel.cc create mode 100644 python/paddle/fluid/tests/unittests/xpu/test_pixel_shuffle_op_xpu.py diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 99cb79035b4b2..67ac2b17a7094 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -415,6 +415,8 @@ XPUOpMap& get_kl2_ops() { {"p_norm_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"pad3d_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"pad3d", XPUKernelSet({phi::DataType::FLOAT32})}, + {"pixel_shuffle", XPUKernelSet({phi::DataType::FLOAT32})}, + {"pixel_shuffle_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"pool2d_grad", XPUKernelSet({phi::DataType::FLOAT32, phi::DataType::FLOAT16})}, {"pool2d", diff --git a/paddle/phi/kernels/xpu/pixel_shuffle_grad_kernel.cc b/paddle/phi/kernels/xpu/pixel_shuffle_grad_kernel.cc new file mode 100644 index 0000000000000..0b1879faff2bf --- /dev/null +++ b/paddle/phi/kernels/xpu/pixel_shuffle_grad_kernel.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/pixel_shuffle_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { +template +void PixelShuffleGradKernel(const Context& ctx, + const DenseTensor& out_grad, + int upscale_factor, + const std::string& data_format, + DenseTensor* x_grad) { + using XPUType = typename XPUTypeTrait::Type; + + const T* x_ptr = out_grad.data(); + T* y_ptr = ctx.template Alloc(x_grad); + + bool is_nchw = data_format == "NCHW"; + + int64_t n = out_grad.dims()[0]; + int64_t xc = out_grad.dims()[is_nchw ? 1 : 3]; + int64_t xh = out_grad.dims()[is_nchw ? 2 : 1]; + int64_t xw = out_grad.dims()[is_nchw ? 3 : 2]; + + int r = pixel_unshuffle(ctx.x_context(), + reinterpret_cast(x_ptr), + reinterpret_cast(y_ptr), + n, + xc, + xh, + xw, + upscale_factor, + is_nchw); + + PADDLE_ENFORCE_XDNN_SUCCESS(r, "pixel_unshuffle"); +} +} // namespace phi + +PD_REGISTER_KERNEL( + pixel_shuffle_grad, XPU, ALL_LAYOUT, phi::PixelShuffleGradKernel, float) {} diff --git a/paddle/phi/kernels/xpu/pixel_shuffle_kernel.cc b/paddle/phi/kernels/xpu/pixel_shuffle_kernel.cc new file mode 100644 index 0000000000000..d35775a5c15c9 --- /dev/null +++ b/paddle/phi/kernels/xpu/pixel_shuffle_kernel.cc @@ -0,0 +1,54 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/pixel_shuffle_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { +template +void PixelShuffleKernel(const Context& ctx, + const DenseTensor& x, + int upscale_factor, + const std::string& data_format, + DenseTensor* out) { + using XPUType = typename XPUTypeTrait::Type; + + const T* x_ptr = x.data(); + T* y_ptr = ctx.template Alloc(out); + + bool is_nchw = data_format == "NCHW"; + + int64_t n = x.dims()[0]; + int64_t xc = x.dims()[is_nchw ? 1 : 3]; + int64_t xh = x.dims()[is_nchw ? 2 : 1]; + int64_t xw = x.dims()[is_nchw ? 3 : 2]; + + int r = pixel_shuffle(ctx.x_context(), + reinterpret_cast(x_ptr), + reinterpret_cast(y_ptr), + n, + xc, + xh, + xw, + upscale_factor, + is_nchw); + + PADDLE_ENFORCE_XDNN_SUCCESS(r, "pixel_shuffle"); +} +} // namespace phi + +PD_REGISTER_KERNEL( + pixel_shuffle, XPU, ALL_LAYOUT, phi::PixelShuffleKernel, float) {} diff --git a/python/paddle/fluid/tests/unittests/xpu/test_pixel_shuffle_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_pixel_shuffle_op_xpu.py new file mode 100644 index 0000000000000..299fe38018c9b --- /dev/null +++ b/python/paddle/fluid/tests/unittests/xpu/test_pixel_shuffle_op_xpu.py @@ -0,0 +1,143 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest + +import numpy as np + +sys.path.append("..") + +from op_test_xpu import XPUOpTest +from xpu.get_test_cover_info import ( + XPUOpTestWrapper, + create_test_class, + get_xpu_op_support_types, +) + +import paddle + +paddle.enable_static() + + +def pixel_shuffle_np(x, up_factor, data_format="NCHW"): + if data_format == "NCHW": + n, c, h, w = x.shape + new_shape = ( + n, + c // (up_factor * up_factor), + up_factor, + up_factor, + h, + w, + ) + # reshape to (num,output_channel,upscale_factor,upscale_factor,h,w) + npresult = np.reshape(x, new_shape) + # transpose to (num,output_channel,h,upscale_factor,w,upscale_factor) + npresult = npresult.transpose(0, 1, 4, 2, 5, 3) + oshape = [n, c // (up_factor * up_factor), h * up_factor, w * up_factor] + npresult = np.reshape(npresult, oshape) + return npresult + else: + n, h, w, c = x.shape + new_shape = ( + n, + h, + w, + c // (up_factor * up_factor), + up_factor, + up_factor, + ) + # reshape to (num,h,w,output_channel,upscale_factor,upscale_factor) + npresult = np.reshape(x, new_shape) + # transpose to (num,h,upscale_factor,w,upscale_factor,output_channel) + npresult = npresult.transpose(0, 1, 4, 2, 5, 3) + oshape = [n, h * up_factor, w * up_factor, c // (up_factor * up_factor)] + npresult = np.reshape(npresult, oshape) + return npresult + + +class XPUTestPixelShuffleOp(XPUOpTestWrapper): + def __init__(self): + self.op_name = "pixel_shuffle" + self.use_dynamic_create_class = False + + class TestPixelShuffleOp(XPUOpTest): + def setUp(self): + self.set_xpu() + self.op_type = "pixel_shuffle" + self.init_dtype() + self.eager_mode = True + + # override + self.init_input_shape() + self.init_attr() + + self.x = np.random.random(self.x_shape).astype(self.dtype) + self.y = pixel_shuffle_np( + self.x, self.attrs["upscale_factor"], self.attrs["data_format"] + ) + + self.inputs = {'X': self.x} + self.outputs = {'Out': self.y} + + def init_input_shape(self): + self.x_shape = [2, 64, 26, 26] + + def init_attr(self): + self.attrs = {'upscale_factor': 2, 'data_format': "NCHW"} + + def set_xpu(self): + self.__class__.no_need_check_grad = False + self.place = paddle.XPUPlace(0) + + def init_dtype(self): + self.dtype = self.in_type + + def test_check_output(self): + self.check_output_with_place(self.place) + + def test_check_grad(self): + self.check_grad_with_place( + self.place, ['X'], 'Out', check_eager=self.eager_mode + ) + + class TestNHWC(TestPixelShuffleOp): + def init_input_shape(self): + self.x_shape = [2, 64, 26, 24] + + def init_attr(self): + self.attrs = {'upscale_factor': 2, 'data_format': "NHWC"} + + class TestUpFactor3(TestPixelShuffleOp): + def init_input_shape(self): + self.x_shape = [2, 27, 5, 5] + + def init_attr(self): + self.attrs = {'upscale_factor': 3, 'data_format': "NCHW"} + + class TestUpFactor3NHWC(TestPixelShuffleOp): + def init_input_shape(self): + self.x_shape = [2, 27, 5, 9] + + def init_attr(self): + self.attrs = {'upscale_factor': 3, 'data_format': "NHWC"} + + +support_types = get_xpu_op_support_types("pixel_shuffle") +for stype in support_types: + create_test_class(globals(), XPUTestPixelShuffleOp, stype) + +if __name__ == "__main__": + unittest.main() From 86a22ad491e760129c9181d1135ca04f5673c907 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Tue, 31 Jan 2023 18:55:54 +0800 Subject: [PATCH 46/89] imigrating from old dynamic graph to new dynamic graph for argmin/argmax/adalta test (#50093) * more ops * revert some ops * reset some ops --- .../fluid/tests/unittests/test_adagrad_op.py | 10 +++++++++- .../fluid/tests/unittests/test_addmm_op.py | 14 ++++++++------ .../tests/unittests/test_arg_min_max_op.py | 16 +++++++++++++++- .../tests/unittests/test_arg_min_max_v2_op.py | 18 +++++++++++++++++- 4 files changed, 49 insertions(+), 9 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_adagrad_op.py b/python/paddle/fluid/tests/unittests/test_adagrad_op.py index 94a754ca3cecf..8eed1871a8759 100644 --- a/python/paddle/fluid/tests/unittests/test_adagrad_op.py +++ b/python/paddle/fluid/tests/unittests/test_adagrad_op.py @@ -16,18 +16,24 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid.core as core from paddle.fluid.op import Operator +def adamgrad_wrapper(param, grad, moment, learning_rate, epsilon): + paddle._C_ops.adagrad_(param, grad, moment, learning_rate, epsilon) + + class TestAdagradOp1(OpTest): '''Test Adagrad operator with explicit attributes''' def setUp(self): self.op_type = "adagrad" + self.python_api = adamgrad_wrapper + self.python_out_sig = ['out'] param = np.random.random((123, 321)).astype("float32") grad = np.random.random((123, 321)).astype("float32") moment = np.zeros((123, 321)).astype("float32") @@ -57,6 +63,8 @@ class TestAdagradOp2(OpTest): def setUp(self): self.op_type = "adagrad" + self.python_api = adamgrad_wrapper + self.python_out_sig = ['out'] param = np.random.random((123, 321)).astype("float32") grad = np.random.random((123, 321)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_addmm_op.py b/python/paddle/fluid/tests/unittests/test_addmm_op.py index 2e4a9515b6aef..7691cf0c7010b 100644 --- a/python/paddle/fluid/tests/unittests/test_addmm_op.py +++ b/python/paddle/fluid/tests/unittests/test_addmm_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -43,19 +43,19 @@ def init_dtype_type(self): pass def test_check_output(self): - self.check_output(check_eager=False) + self.check_output() def test_check_grad_normal(self): - self.check_grad(['Input', 'X', 'Y'], 'Out', check_eager=False) + self.check_grad(['Input', 'X', 'Y'], 'Out') def test_check_grad_x(self): - self.check_grad(['X'], 'Out', no_grad_set=None, check_eager=False) + self.check_grad(['X'], 'Out', no_grad_set=None) def test_check_grad_y(self): - self.check_grad(['Y'], 'Out', no_grad_set=None, check_eager=False) + self.check_grad(['Y'], 'Out', no_grad_set=None) def test_check_grad_input(self): - self.check_grad(['Input'], 'Out', no_grad_set=None, check_eager=False) + self.check_grad(['Input'], 'Out', no_grad_set=None) class TestAddMMOpError(unittest.TestCase): @@ -186,6 +186,7 @@ class TestAddMMOp3(OpTest): # test broadcast def setUp(self): self.op_type = "addmm" + self.python_api = paddle.addmm self.dtype = np.float64 self.init_dtype_type() self.inputs = { @@ -225,6 +226,7 @@ class TestAddMMOp4(OpTest): # test broadcast def setUp(self): self.op_type = "addmm" + self.python_api = paddle.addmm self.dtype = np.float64 self.init_dtype_type() self.inputs = { diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py index 603ea0d6b7a03..56ac56181f07c 100644 --- a/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py +++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_op.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest from test_attribute_var import UnittestBase import paddle @@ -27,6 +27,7 @@ class BaseTestCase(OpTest): def initTestCase(self): self.op_type = 'arg_min' + self.python_api = paddle.tensor.argmin self.dims = (3, 4, 5) self.dtype = 'float32' self.axis = 0 @@ -48,6 +49,7 @@ def test_check_output(self): class TestCase0(BaseTestCase): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (3, 4, 5) self.dtype = 'float32' self.axis = 0 @@ -56,6 +58,7 @@ def initTestCase(self): class TestCase1(BaseTestCase): def initTestCase(self): self.op_type = 'arg_min' + self.python_api = paddle.tensor.argmin self.dims = (3, 4) self.dtype = 'float64' self.axis = 1 @@ -64,6 +67,7 @@ def initTestCase(self): class TestCase2(BaseTestCase): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (3, 4) self.dtype = 'int64' self.axis = 0 @@ -75,6 +79,7 @@ def initTestCase(self): class TestCase0FP16(BaseTestCase): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (3, 4, 5) self.dtype = np.float16 self.axis = 0 @@ -86,6 +91,7 @@ def initTestCase(self): class TestCase1FP16(BaseTestCase): def initTestCase(self): self.op_type = 'arg_min' + self.python_api = paddle.tensor.argmin self.dims = (3, 4) self.dtype = np.float16 self.axis = 1 @@ -94,6 +100,7 @@ def initTestCase(self): class TestCase2_1(BaseTestCase): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (3, 4) self.dtype = 'int64' self.axis = -1 @@ -102,6 +109,7 @@ def initTestCase(self): class TestCase3(BaseTestCase): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (3,) self.dtype = 'int64' self.axis = 0 @@ -110,6 +118,7 @@ def initTestCase(self): class TestCase4(BaseTestCase): def initTestCase(self): self.op_type = 'arg_min' + self.python_api = paddle.tensor.argmin self.dims = (1,) self.dtype = 'int32' self.axis = 0 @@ -118,6 +127,7 @@ def initTestCase(self): class TestCase3_(BaseTestCase): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (3,) self.axis = 0 @@ -125,6 +135,7 @@ def initTestCase(self): class BaseTestComplex1_1(OpTest): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (4, 5, 6) self.dtype = 'int32' self.axis = 2 @@ -148,6 +159,7 @@ def setUp(self): class BaseTestComplex1_2(OpTest): def initTestCase(self): self.op_type = 'arg_min' + self.python_api = paddle.tensor.argmin self.dims = (4, 5, 6) self.dtype = 'int32' self.axis = 2 @@ -171,6 +183,7 @@ def setUp(self): class BaseTestComplex2_1(OpTest): def initTestCase(self): self.op_type = 'arg_max' + self.python_api = paddle.tensor.argmax self.dims = (4, 5, 6) self.dtype = 'int32' self.axis = 2 @@ -199,6 +212,7 @@ def setUp(self): class BaseTestComplex2_2(OpTest): def initTestCase(self): self.op_type = 'arg_min' + self.python_api = paddle.tensor.argmin self.dims = (4, 5, 6) self.dtype = 'int32' self.axis = 2 diff --git a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py index 99dcff5db7b0b..f5cb975019c98 100644 --- a/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_arg_min_max_v2_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -33,6 +33,10 @@ def initTestCase(self): def setUp(self): np.random.seed(123) self.initTestCase() + if op_type == 'arg_min': + self.python_api = paddle.tensor.argmin + else: + self.python_api = paddle.tensor.argmax self.dims = (4, 5, 6) self.dtype = "float64" self.x = 1000 * np.random.random(self.dims).astype(self.dtype) @@ -72,6 +76,10 @@ def initTestCase(self): class ArgMinMaxKernelCase4(ArgMinMaxKernelBaseCase): def setUp(self): self.initTestCase() + if op_type == 'arg_min': + self.python_api = paddle.tensor.argmin + else: + self.python_api = paddle.tensor.argmax self.dims = (4, 5, 6) self.dtype = "float64" self.x = 1000 * np.random.random(self.dims).astype(self.dtype) @@ -85,6 +93,10 @@ def setUp(self): class ArgMinMaxKernelCase5(ArgMinMaxKernelBaseCase): def setUp(self): self.initTestCase() + if op_type == 'arg_min': + self.python_api = paddle.tensor.argmin + else: + self.python_api = paddle.tensor.argmax self.dims = 4 self.dtype = "float64" self.x = 1000 * np.random.random(self.dims).astype(self.dtype) @@ -98,6 +110,10 @@ def setUp(self): class ArgMinMaxKernelCase6(ArgMinMaxKernelBaseCase): def setUp(self): self.initTestCase() + if op_type == 'arg_min': + self.python_api = paddle.tensor.argmin + else: + self.python_api = paddle.tensor.argmax self.dims = 4 self.dtype = "float64" self.x = 1000 * np.random.random(self.dims).astype(self.dtype) From 47ddd36ec43b51205ca39ad0d73b7932312f9b81 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Tue, 31 Jan 2023 19:48:11 +0800 Subject: [PATCH 47/89] update ops for new dynamic graph tests (#50061) * update elementwise ops tests * add more ops * modify sum&split * lint * rm check_dygraph * revert pow * modify add for cpu test * revert reshape * modify min --- .../unittests/test_elementwise_add_op.py | 56 +++++++++++-------- .../unittests/test_elementwise_div_op.py | 15 ++++- .../unittests/test_elementwise_min_op.py | 27 ++++----- .../unittests/test_elementwise_mul_op.py | 34 +++++++++-- .../unittests/test_elementwise_sub_op.py | 32 ++++++++++- .../tests/unittests/test_pixel_shuffle.py | 9 ++- .../fluid/tests/unittests/test_poisson_op.py | 3 +- .../tests/unittests/test_put_along_axis_op.py | 3 +- .../fluid/tests/unittests/test_size_op.py | 7 ++- .../fluid/tests/unittests/test_softmax_op.py | 12 +++- .../tests/unittests/test_spectral_norm_op.py | 8 ++- .../fluid/tests/unittests/test_split_op.py | 12 +++- .../fluid/tests/unittests/test_sum_op.py | 26 ++++++--- .../unittests/test_take_along_axis_op.py | 3 +- 14 files changed, 185 insertions(+), 62 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py index 3bf2b7cdcd703..502ca504c1b8e 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_add_op.py @@ -15,15 +15,18 @@ import unittest import numpy as np +from eager_op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci import paddle import paddle.fluid as fluid import paddle.fluid.core as core -from paddle.fluid.tests.unittests.op_test import ( - OpTest, - convert_float_to_uint16, - skip_check_grad_ci, -) + + +def broadcast_wrapper(shape=[1, 10, 12, 1]): + def add_wrapper(x, y, axis=-1): + return x + y.reshape(shape) + + return add_wrapper class TestElementwiseAddOp(OpTest): @@ -45,14 +48,13 @@ def setUp(self): self.attrs = {'axis': self.axis, 'use_mkldnn': self.use_mkldnn} self.outputs = {'Out': self.out} - def check_eager(self): + def check_dygraph(self): return not self.use_mkldnn and self.axis == -1 def test_check_output(self): # TODO(wangzhongpu): support mkldnn op in dygraph mode self.check_output( - check_dygraph=(not self.use_mkldnn), - check_eager=self.check_eager(), + check_dygraph=self.check_dygraph(), ) def test_check_grad_normal(self): @@ -62,8 +64,7 @@ def test_check_grad_normal(self): self.check_grad( ['X', 'Y'], 'Out', - check_dygraph=(not self.use_mkldnn), - check_eager=self.check_eager(), + check_dygraph=self.check_dygraph(), ) def test_check_grad_ingore_x(self): @@ -74,8 +75,7 @@ def test_check_grad_ingore_x(self): ['Y'], 'Out', no_grad_set=set("X"), - check_dygraph=(not self.use_mkldnn), - check_eager=self.check_eager(), + check_dygraph=self.check_dygraph(), ) def test_check_grad_ingore_y(self): @@ -86,8 +86,7 @@ def test_check_grad_ingore_y(self): ['X'], 'Out', no_grad_set=set('Y'), - check_dygraph=(not self.use_mkldnn), - check_eager=self.check_eager(), + check_dygraph=self.check_dygraph(), ) def init_input_output(self): @@ -136,7 +135,8 @@ def test_check_output(self): place = core.CUDAPlace(0) if core.is_float16_supported(place): self.check_output_with_place( - place, atol=1e-3, check_dygraph=(not self.use_mkldnn) + place, + atol=1e-3, ) @@ -149,6 +149,7 @@ def test_check_output(self): class TestBF16ElementwiseAddOp(OpTest): def setUp(self): self.op_type = "elementwise_add" + self.python_api = paddle.add self.dtype = np.uint16 self.x = np.random.uniform(0.1, 1, [13, 17]).astype(np.float32) @@ -170,23 +171,19 @@ def setUp(self): def test_check_output(self): place = core.CUDAPlace(0) - self.check_output_with_place(place, check_eager=False) + self.check_output_with_place(place) def test_check_grad_normal(self): place = core.CUDAPlace(0) - self.check_grad_with_place(place, ['X', 'Y'], 'Out', check_eager=False) + self.check_grad_with_place(place, ['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['Y'], 'Out', no_grad_set=set("X"), check_eager=False - ) + self.check_grad_with_place(place, ['Y'], 'Out', no_grad_set=set("X")) def test_check_grad_ingore_y(self): place = core.CUDAPlace(0) - self.check_grad_with_place( - place, ['X'], 'Out', no_grad_set=set('Y'), check_eager=False - ) + self.check_grad_with_place(place, ['X'], 'Out', no_grad_set=set('Y')) @skip_check_grad_ci( @@ -248,6 +245,7 @@ def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) self.out = self.x + self.y.reshape(100, 1, 1) + self.python_api = broadcast_wrapper(shape=[100, 1, 1]) def init_axis(self): self.axis = 0 @@ -258,6 +256,7 @@ def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) self.out = self.x + self.y.reshape(100, 1, 1) + self.python_api = broadcast_wrapper(shape=[100, 1, 1]) def init_axis(self): self.axis = 0 @@ -268,6 +267,7 @@ def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) self.out = self.x + self.y.reshape(1, 100, 1) + self.python_api = broadcast_wrapper(shape=[1, 100, 1]) def init_axis(self): self.axis = 1 @@ -278,6 +278,7 @@ def init_input_output(self): self.x = np.random.rand(2, 100, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) self.out = self.x + self.y.reshape(1, 100, 1) + self.python_api = broadcast_wrapper(shape=[1, 100, 1]) def init_axis(self): self.axis = 1 @@ -288,6 +289,7 @@ def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) self.out = self.x + self.y.reshape(1, 1, 100) + self.python_api = broadcast_wrapper(shape=[1, 1, 100]) class TestFP16ElementwiseAddOp_broadcast_2(TestFP16ElementwiseAddOp): @@ -295,6 +297,7 @@ def init_input_output(self): self.x = np.random.rand(2, 3, 100).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) self.out = self.x + self.y.reshape(1, 1, 100) + self.python_api = broadcast_wrapper(shape=[1, 1, 100]) class TestElementwiseAddOp_broadcast_3(TestElementwiseAddOp): @@ -302,6 +305,7 @@ def init_input_output(self): self.x = np.random.rand(2, 10, 12, 1).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) self.out = self.x + self.y.reshape(1, 10, 12, 1) + self.python_api = broadcast_wrapper(shape=[1, 10, 12, 1]) def init_axis(self): self.axis = 1 @@ -312,6 +316,7 @@ def init_input_output(self): self.x = np.random.rand(2, 10, 12, 3).astype(self.dtype) self.y = np.random.rand(10, 12).astype(self.dtype) self.out = self.x + self.y.reshape(1, 10, 12, 1) + self.python_api = broadcast_wrapper(shape=[1, 10, 12, 1]) def init_axis(self): self.axis = 1 @@ -322,6 +327,7 @@ def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) self.out = self.x + self.y.reshape(100, 1, 1, 1) + self.python_api = broadcast_wrapper(shape=[100, 1, 1, 1]) def init_axis(self): self.axis = 0 @@ -332,6 +338,7 @@ def init_input_output(self): self.x = np.random.rand(100, 2, 1, 2).astype(self.dtype) self.y = np.random.rand(100, 1).astype(self.dtype) self.out = self.x + self.y.reshape(100, 1, 1, 1) + self.python_api = broadcast_wrapper(shape=[100, 1, 1, 1]) def init_axis(self): self.axis = 0 @@ -597,6 +604,7 @@ def init_data(self): class TestComplexElementwiseAddOp(OpTest): def setUp(self): self.op_type = "elementwise_add" + self.python_api = paddle.add self.dtype = np.float64 self.shape = (2, 3, 4, 5) self.init_input_output() @@ -629,7 +637,7 @@ def init_grad_input_output(self): self.grad_y = self.grad_out def test_check_output(self): - self.check_output(check_eager=False) + self.check_output() def test_check_grad_normal(self): self.check_grad( diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py index 943486827237d..c17a41b0bfad5 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_div_op.py @@ -15,13 +15,20 @@ import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci +from eager_op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci import paddle from paddle import fluid from paddle.fluid import core +def broadcast_wrapper(shape=[1, 10, 12, 1]): + def div_wrapper(x, y, axis=-1): + return paddle.divide(x, y.reshape(shape)) + + return div_wrapper + + class ElementwiseDivOp(OpTest): def setUp(self): self.op_type = "elementwise_div" @@ -193,6 +200,7 @@ def init_shape(self): self.x_shape = [100, 3, 4] self.y_shape = [100] self.attrs = {'axis': 0} + self.python_api = broadcast_wrapper(shape=[100, 1, 1]) def compute_output(self, x, y): return x / y.reshape(100, 1, 1) @@ -209,6 +217,7 @@ def init_shape(self): self.x_shape = [2, 100, 4] self.y_shape = [100] self.attrs = {'axis': 1} + self.python_api = broadcast_wrapper(shape=[1, 100, 1]) def compute_output(self, x, y): return x / y.reshape(1, 100, 1) @@ -224,6 +233,7 @@ class TestElementwiseDivOpBroadcast2(ElementwiseDivOp): def init_shape(self): self.x_shape = [2, 3, 100] self.y_shape = [100] + self.python_api = broadcast_wrapper(shape=[1, 1, 100]) def compute_output(self, x, y): return x / y.reshape(1, 1, 100) @@ -240,6 +250,7 @@ def init_shape(self): self.x_shape = [2, 10, 12, 5] self.y_shape = [10, 12] self.attrs = {'axis': 1} + self.python_api = broadcast_wrapper(shape=[1, 10, 12, 1]) def compute_output(self, x, y): return x / y.reshape(1, 10, 12, 1) @@ -393,7 +404,7 @@ def init_grad_input_output(self): self.grad_y = -self.grad_out * np.conj(self.x / self.y / self.y) def test_check_output(self): - self.check_output(check_eager=False) + self.check_output() def test_check_grad_normal(self): self.check_grad( diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py index c9835b5cb1566..02f1d1dd6d275 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_min_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, skip_check_grad_ci +from eager_op_test import OpTest, skip_check_grad_ci import paddle import paddle.fluid as fluid @@ -25,6 +25,13 @@ paddle.enable_static() +def broadcast_wrapper(shape=[1, 10, 12, 1]): + def min_wrapper(x, y, axis=-1): + return paddle.minimum(x, y.reshape(shape)) + + return min_wrapper + + class TestElementwiseOp(OpTest): def setUp(self): self.op_type = "elementwise_min" @@ -39,16 +46,10 @@ def setUp(self): self.outputs = {'Out': np.minimum(self.inputs['X'], self.inputs['Y'])} def test_check_output(self): - if hasattr(self, 'attrs'): - self.check_output(check_eager=False) - else: - self.check_output(check_eager=True) + self.check_output() def test_check_grad_normal(self): - if hasattr(self, 'attrs'): - self.check_grad(['X', 'Y'], 'Out', check_eager=False) - else: - self.check_grad(['X', 'Y'], 'Out', check_eager=True) + self.check_grad(['X', 'Y'], 'Out') def test_check_grad_ingore_x(self): self.check_grad( @@ -118,7 +119,7 @@ def setUp(self): class TestElementwiseMinOp_broadcast_0(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_min" - self.python_api = paddle.minimum + self.python_api = broadcast_wrapper(shape=[100, 1, 1]) x = np.random.uniform(0.5, 1, (100, 3, 2)).astype(np.float64) sgn = np.random.choice([-1, 1], (100,)).astype(np.float64) y = x[:, 0, 0] + sgn * np.random.uniform(1, 2, (100,)).astype( @@ -137,7 +138,7 @@ def setUp(self): class TestElementwiseMinOp_broadcast_1(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_min" - self.python_api = paddle.minimum + self.python_api = broadcast_wrapper(shape=[1, 100, 1]) x = np.random.uniform(0.5, 1, (2, 100, 3)).astype(np.float64) sgn = np.random.choice([-1, 1], (100,)).astype(np.float64) y = x[0, :, 0] + sgn * np.random.uniform(1, 2, (100,)).astype( @@ -156,7 +157,7 @@ def setUp(self): class TestElementwiseMinOp_broadcast_2(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_min" - self.python_api = paddle.minimum + self.python_api = broadcast_wrapper(shape=[1, 1, 100]) x = np.random.uniform(0.5, 1, (2, 3, 100)).astype(np.float64) sgn = np.random.choice([-1, 1], (100,)).astype(np.float64) y = x[0, 0, :] + sgn * np.random.uniform(1, 2, (100,)).astype( @@ -174,7 +175,7 @@ def setUp(self): class TestElementwiseMinOp_broadcast_3(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_min" - self.python_api = paddle.minimum + self.python_api = broadcast_wrapper(shape=[1, 25, 4, 1]) x = np.random.uniform(0.5, 1, (2, 25, 4, 1)).astype(np.float64) sgn = np.random.choice([-1, 1], (25, 4)).astype(np.float64) y = x[0, :, :, 0] + sgn * np.random.uniform(1, 2, (25, 4)).astype( diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py index e34d9d0dfd32b..4fe6a15ef8efc 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_mul_op.py @@ -15,14 +15,24 @@ import unittest import numpy as np +from eager_op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci import paddle import paddle.fluid.core as core -from paddle.fluid.tests.unittests.op_test import ( - OpTest, - convert_float_to_uint16, - skip_check_grad_ci, -) + + +def mul(x, y, axis=-1, use_mkldnn=False): + return x * y + + +setattr(paddle, "mul", mul) + + +def broadcast_wrapper(shape=[1, 10, 12, 1]): + def mul_wrapper(x, y, axis=-1): + return x * y.reshape(shape) + + return mul_wrapper class ElementwiseMulOp(OpTest): @@ -31,6 +41,7 @@ def init_kernel_type(self): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.dtype = np.float64 self.axis = -1 self.init_dtype() @@ -107,6 +118,7 @@ def init_input_output(self): class TestBF16ElementwiseMulOp(OpTest): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.dtype = np.uint16 self.x = np.random.uniform(0.1, 1, [13, 17]).astype(np.float32) @@ -145,6 +157,7 @@ def test_check_grad_ingore_y(self): class TestElementwiseMulOp_scalar(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.inputs = { 'X': np.random.rand(10, 3, 4).astype(np.float64), 'Y': np.random.rand(1).astype(np.float64), @@ -156,6 +169,7 @@ def setUp(self): class TestElementwiseMulOp_Vector(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.inputs = { 'X': np.random.random((100,)).astype("float64"), 'Y': np.random.random((100,)).astype("float64"), @@ -168,6 +182,7 @@ class TestElementwiseMulOp_broadcast_0(ElementwiseMulOp): def init_input_output(self): self.x = np.random.rand(100, 2, 3).astype(self.dtype) self.y = np.random.rand(100).astype(self.dtype) + self.python_api = broadcast_wrapper(shape=[100, 1, 1]) self.out = self.x * self.y.reshape(100, 1, 1) def init_axis(self): @@ -177,6 +192,7 @@ def init_axis(self): class TestElementwiseMulOp_broadcast_1(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = broadcast_wrapper(shape=[1, 100, 1]) self.inputs = { 'X': np.random.rand(2, 100, 3).astype(np.float64), 'Y': np.random.rand(100).astype(np.float64), @@ -192,6 +208,7 @@ def setUp(self): class TestElementwiseMulOp_broadcast_2(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = broadcast_wrapper(shape=[1, 1, 100]) self.inputs = { 'X': np.random.rand(2, 3, 100).astype(np.float64), 'Y': np.random.rand(100).astype(np.float64), @@ -206,6 +223,7 @@ def setUp(self): class TestElementwiseMulOp_broadcast_3(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = broadcast_wrapper(shape=[1, 10, 12, 1]) self.inputs = { 'X': np.random.rand(2, 10, 12, 3).astype(np.float64), 'Y': np.random.rand(10, 12).astype(np.float64), @@ -221,6 +239,7 @@ def setUp(self): class TestElementwiseMulOp_broadcast_4(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.inputs = { 'X': np.random.rand(10, 2, 11).astype(np.float64), 'Y': np.random.rand(10, 1, 11).astype(np.float64), @@ -232,6 +251,7 @@ def setUp(self): class TestElementwiseMulOp_broadcast_5(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.inputs = { 'X': np.random.rand(10, 4, 2, 3).astype(np.float64), 'Y': np.random.rand(10, 4, 1, 3).astype(np.float64), @@ -251,6 +271,7 @@ def init_dtype(self): class TestElementwiseMulOp_commonuse_1(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.inputs = { 'X': np.random.rand(2, 3, 100).astype(np.float64), 'Y': np.random.rand(1, 1, 100).astype(np.float64), @@ -262,6 +283,7 @@ def setUp(self): class TestElementwiseMulOp_commonuse_2(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.inputs = { 'X': np.random.rand(30, 3, 1, 5).astype(np.float64), 'Y': np.random.rand(30, 1, 4, 1).astype(np.float64), @@ -273,6 +295,7 @@ def setUp(self): class TestElementwiseMulOp_xsize_lessthan_ysize(ElementwiseMulOp): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.inputs = { 'X': np.random.rand(10, 10).astype(np.float64), 'Y': np.random.rand(2, 2, 10, 10).astype(np.float64), @@ -289,6 +312,7 @@ def setUp(self): class TestComplexElementwiseMulOp(OpTest): def setUp(self): self.op_type = "elementwise_mul" + self.python_api = paddle.mul self.init_base_dtype() self.init_input_output() self.init_grad_input_output() diff --git a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py index 398ef711e2fea..1391dd2e9da5e 100644 --- a/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py +++ b/python/paddle/fluid/tests/unittests/test_elementwise_sub_op.py @@ -15,15 +15,26 @@ import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci +from eager_op_test import OpTest, convert_float_to_uint16, skip_check_grad_ci import paddle import paddle.fluid as fluid +def sub_wrapper(shape=None): + def inner_wrapper(x, y, axis=-1): + if shape is None: + return x - y + else: + return x - y.reshape(shape) + + return inner_wrapper + + class TestElementwiseOp(OpTest): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float64"), 'Y': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float64"), @@ -50,6 +61,7 @@ def test_check_grad_ingore_y(self): class TestElementwiseSubOp_ZeroDim1(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.uniform(0.1, 1, []).astype("float64"), 'Y': np.random.uniform(0.1, 1, []).astype("float64"), @@ -60,6 +72,7 @@ def setUp(self): class TestElementwiseSubOp_ZeroDim2(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float64"), 'Y': np.random.uniform(0.1, 1, []).astype("float64"), @@ -70,6 +83,7 @@ def setUp(self): class TestElementwiseSubOp_ZeroDim3(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.uniform(0.1, 1, []).astype("float64"), 'Y': np.random.uniform(0.1, 1, [2, 3, 4, 5]).astype("float64"), @@ -80,6 +94,7 @@ def setUp(self): class TestBF16ElementwiseOp(OpTest): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.dtype = np.uint16 x = np.random.uniform(0.1, 1, [13, 17]).astype(np.float32) y = np.random.uniform(0.1, 1, [13, 17]).astype(np.float32) @@ -110,6 +125,7 @@ def test_check_grad_ingore_y(self): class TestElementwiseSubOp_scalar(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.rand(10, 3, 4).astype(np.float64), 'Y': np.random.rand(1).astype(np.float64), @@ -120,6 +136,7 @@ def setUp(self): class TestElementwiseSubOp_Vector(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.random((100,)).astype("float64"), 'Y': np.random.random((100,)).astype("float64"), @@ -130,6 +147,7 @@ def setUp(self): class TestElementwiseSubOp_broadcast_0(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper(shape=[100, 1, 1]) self.inputs = { 'X': np.random.rand(100, 3, 2).astype(np.float64), 'Y': np.random.rand(100).astype(np.float64), @@ -144,6 +162,7 @@ def setUp(self): class TestElementwiseSubOp_broadcast_1(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper(shape=[1, 100, 1]) self.inputs = { 'X': np.random.rand(2, 100, 3).astype(np.float64), 'Y': np.random.rand(100).astype(np.float64), @@ -158,6 +177,7 @@ def setUp(self): class TestElementwiseSubOp_broadcast_2(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper(shape=[1, 1, 100]) self.inputs = { 'X': np.random.rand(2, 3, 100).astype(np.float64), 'Y': np.random.rand(100).astype(np.float64), @@ -171,6 +191,7 @@ def setUp(self): class TestElementwiseSubOp_broadcast_3(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper(shape=[1, 10, 12, 1]) self.inputs = { 'X': np.random.rand(2, 10, 12, 3).astype(np.float64), 'Y': np.random.rand(10, 12).astype(np.float64), @@ -185,6 +206,7 @@ def setUp(self): class TestElementwiseSubOp_broadcast_4(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.rand(2, 5, 3, 12).astype(np.float64), 'Y': np.random.rand(2, 5, 1, 12).astype(np.float64), @@ -195,6 +217,7 @@ def setUp(self): class TestElementwiseSubOp_commonuse_1(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.rand(2, 3, 100).astype(np.float64), 'Y': np.random.rand(1, 1, 100).astype(np.float64), @@ -205,6 +228,7 @@ def setUp(self): class TestElementwiseSubOp_commonuse_2(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.inputs = { 'X': np.random.rand(10, 3, 1, 4).astype(np.float64), 'Y': np.random.rand(10, 1, 12, 1).astype(np.float64), @@ -215,6 +239,11 @@ def setUp(self): class TestElementwiseSubOp_xsize_lessthan_ysize(TestElementwiseOp): def setUp(self): self.op_type = "elementwise_sub" + + def sub_func(x, y, axis=2): + return x.reshape([1, 1, 10, 12]) - y + + self.python_api = sub_func self.inputs = { 'X': np.random.rand(10, 12).astype(np.float64), 'Y': np.random.rand(2, 3, 10, 12).astype(np.float64), @@ -230,6 +259,7 @@ def setUp(self): class TestComplexElementwiseSubOp(OpTest): def setUp(self): self.op_type = "elementwise_sub" + self.python_api = sub_wrapper() self.dtype = np.float64 self.shape = (2, 3, 4, 5) self.init_input_output() diff --git a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py index 9600f5a872c56..0ef6b3e77824b 100644 --- a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py +++ b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -85,10 +85,13 @@ def init_data_format(self): self.format = "NCHW" def test_check_output(self): - self.check_output(check_eager=True) + self.check_output() def test_check_grad(self): - self.check_grad(['X'], 'Out', check_eager=True) + self.check_grad( + ['X'], + 'Out', + ) class TestChannelLast(TestPixelShuffleOp): diff --git a/python/paddle/fluid/tests/unittests/test_poisson_op.py b/python/paddle/fluid/tests/unittests/test_poisson_op.py index e2720edb01313..ee66d578014c7 100644 --- a/python/paddle/fluid/tests/unittests/test_poisson_op.py +++ b/python/paddle/fluid/tests/unittests/test_poisson_op.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle @@ -41,6 +41,7 @@ def output_hist(out, lam, a, b): class TestPoissonOp1(OpTest): def setUp(self): self.op_type = "poisson" + self.python_api = paddle.tensor.poisson self.config() self.attrs = {} diff --git a/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py b/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py index 3b2cf82fbfd39..7470dae1846ab 100644 --- a/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py +++ b/python/paddle/fluid/tests/unittests/test_put_along_axis_op.py @@ -16,7 +16,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle from paddle.framework import core @@ -30,6 +30,7 @@ def setUp(self): self.reduce_op = "assign" self.dtype = 'float64' self.op_type = "put_along_axis" + self.python_api = paddle.tensor.put_along_axis self.xnp = np.random.random(self.x_shape).astype(self.x_type) # numpy put_along_axis is an inplace opearion. self.xnp_result = copy.deepcopy(self.xnp) diff --git a/python/paddle/fluid/tests/unittests/test_size_op.py b/python/paddle/fluid/tests/unittests/test_size_op.py index b3ae19b8ef20e..edea44abf0890 100644 --- a/python/paddle/fluid/tests/unittests/test_size_op.py +++ b/python/paddle/fluid/tests/unittests/test_size_op.py @@ -15,15 +15,20 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid +def size_wrapper(input): + return paddle.numel(paddle.to_tensor(input)) + + class TestSizeOp(OpTest): def setUp(self): self.op_type = "size" + self.python_api = size_wrapper self.shape = [] self.config() input = np.zeros(self.shape, dtype='bool') diff --git a/python/paddle/fluid/tests/unittests/test_softmax_op.py b/python/paddle/fluid/tests/unittests/test_softmax_op.py index 290d72b2485b2..8696cc532820f 100644 --- a/python/paddle/fluid/tests/unittests/test_softmax_op.py +++ b/python/paddle/fluid/tests/unittests/test_softmax_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16 +from eager_op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid as fluid @@ -43,6 +43,12 @@ def ref_softmax(x, axis=None, dtype=None): return np.apply_along_axis(stable_softmax, axis, x_t) +def softmax_wrapper( + x, axis=-1, dtype=None, name=None, use_cudnn=False, use_mkldnn=False +): + return paddle.nn.functional.softmax(x, axis=axis, dtype=dtype) + + class TestSoftmaxOp(OpTest): def get_x_shape(self): return [10, 10] @@ -52,6 +58,7 @@ def get_axis(self): def setUp(self): self.op_type = "softmax" + self.python_api = softmax_wrapper self.use_cudnn = False self.use_mkldnn = False # explicilty use float32 for ROCm, as MIOpen does not yet support float64 @@ -109,6 +116,7 @@ def test_check_grad(self): class TestSoftmaxOp_ZeroDim1(TestSoftmaxOp): def setUp(self): self.op_type = "softmax" + self.python_api = softmax_wrapper self.use_cudnn = False self.use_mkldnn = False # explicilty use float32 for ROCm, as MIOpen does not yet support float64 @@ -133,6 +141,7 @@ def setUp(self): class TestSoftmaxOp_ZeroDim2(TestSoftmaxOp): def setUp(self): self.op_type = "softmax" + self.python_api = softmax_wrapper self.use_cudnn = True self.use_mkldnn = False # explicilty use float32 for ROCm, as MIOpen does not yet support float64 @@ -366,6 +375,7 @@ def get_x_shape(self): class TestSoftmaxBF16Op(OpTest): def setUp(self): self.op_type = "softmax" + self.python_api = softmax_wrapper self.use_cudnn = self.init_cudnn() self.use_mkldnn = False self.dtype = np.uint16 diff --git a/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py index 033ee7908866d..c60780f90c49b 100644 --- a/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_spectral_norm_op.py @@ -15,9 +15,10 @@ import unittest import numpy as np -from op_test import OpTest, skip_check_grad_ci +from eager_op_test import OpTest, skip_check_grad_ci import paddle +from paddle import _C_ops from paddle.fluid.framework import Program, program_guard paddle.enable_static() @@ -47,6 +48,10 @@ def spectral_norm(weight, u, v, dim, power_iters, eps): return weight / sigma +def spectral_norm_wrapper(weight, u, v, dim, power_iters, eps): + return _C_ops.spectral_norm(weight, u, v, dim, power_iters, eps) + + @skip_check_grad_ci( reason="Spectral norm do not check grad when power_iters > 0 " "because grad is not calculated in power iterations, " @@ -56,6 +61,7 @@ class TestSpectralNormOpNoGrad(OpTest): def setUp(self): self.initTestCase() self.op_type = 'spectral_norm' + self.python_api = spectral_norm_wrapper weight = np.random.random(self.weight_shape).astype('float64') u = np.random.normal(0.0, 1.0, self.u_shape).astype('float64') v = np.random.normal(0.0, 1.0, self.v_shape).astype('float64') diff --git a/python/paddle/fluid/tests/unittests/test_split_op.py b/python/paddle/fluid/tests/unittests/test_split_op.py index 40e7bff55e0bc..d250302165bcb 100644 --- a/python/paddle/fluid/tests/unittests/test_split_op.py +++ b/python/paddle/fluid/tests/unittests/test_split_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16 +from eager_op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid as fluid @@ -24,6 +24,8 @@ class TestSplitOp(OpTest): def setUp(self): + self.python_api = paddle.split + self.python_out_sig = ['out0', 'out1', 'out2'] self._set_op_type() self.dtype = self.get_dtype() axis = 1 @@ -62,6 +64,8 @@ def test_check_grad(self): # test with attr(num) class TestSplitOp_2(OpTest): def setUp(self): + self.python_api = paddle.split + self.python_out_sig = ['out0', 'out1', 'out2'] self._set_op_type() self.dtype = self.get_dtype() self.init_data() @@ -98,6 +102,8 @@ def test_check_grad(self): # attr(axis) is Tensor class TestSplitOp_AxisTensor(OpTest): def setUp(self): + self.python_api = paddle.split + self.python_out_sig = ['out0', 'out1', 'out2'] self._set_op_type() self.dtype = self.get_dtype() self.init_data() @@ -133,6 +139,8 @@ def test_check_grad(self): # attr(sections) is list containing Tensor class TestSplitOp_SectionsTensor(OpTest): def setUp(self): + self.python_api = paddle.split + self.python_out_sig = ['out0', 'out1', 'out2'] self._set_op_type() self.dtype = self.get_dtype() self.init_data() @@ -178,6 +186,8 @@ def test_check_grad(self): class TestSplitOp_unk_section(OpTest): def setUp(self): + self.python_api = paddle.split + self.python_out_sig = ['out0', 'out1', 'out2'] self._set_op_type() self.dtype = self.get_dtype() self.init_data() diff --git a/python/paddle/fluid/tests/unittests/test_sum_op.py b/python/paddle/fluid/tests/unittests/test_sum_op.py index 6e9ff86cb8b7f..b712b0bb161f6 100644 --- a/python/paddle/fluid/tests/unittests/test_sum_op.py +++ b/python/paddle/fluid/tests/unittests/test_sum_op.py @@ -19,6 +19,11 @@ import gradient_checker import numpy as np from decorator_helper import prog_scope +from eager_op_test import ( + OpTest, + convert_float_to_uint16, + convert_uint16_to_float, +) import paddle import paddle.fluid as fluid @@ -26,16 +31,19 @@ import paddle.inference as paddle_infer from paddle import enable_static from paddle.fluid.op import Operator -from paddle.fluid.tests.unittests.op_test import ( - OpTest, - convert_float_to_uint16, - convert_uint16_to_float, -) + + +def sum_wrapper(X, use_mkldnn=False): + res = 0 + for x in X: + res += x + return res class TestSumOp(OpTest): def setUp(self): self.op_type = "sum" + self.python_api = sum_wrapper self.init_kernel_type() self.use_mkldnn = False self.init_kernel_type() @@ -341,10 +349,14 @@ def init_kernel_type(self): self.dtype = np.uint16 def test_check_output(self): - self.check_output() + # new dynamic graph mode does not support unit16 type + self.check_output(check_dygraph=False) def test_check_grad(self): - self.check_grad(['x0'], 'Out', numeric_grad_delta=0.5) + # new dynamic graph mode does not support unit16 type + self.check_grad( + ['x0'], 'Out', numeric_grad_delta=0.5, check_dygraph=False + ) class API_Test_Add_n(unittest.TestCase): diff --git a/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py b/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py index da3fa64417fe6..7abd86d19f676 100644 --- a/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py +++ b/python/paddle/fluid/tests/unittests/test_take_along_axis_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle from paddle.framework import core @@ -27,6 +27,7 @@ class TestTakeAlongAxisOp(OpTest): def setUp(self): self.init_data() self.op_type = "take_along_axis" + self.python_api = paddle.tensor.take_along_axis self.xnp = np.random.random(self.x_shape).astype(self.x_type) self.target = np.take_along_axis(self.xnp, self.index, self.axis) broadcast_shape_list = list(self.x_shape) From b0ee022b02aea2c6580a93127cf84da0598c0080 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A7=9C=E6=B0=B8=E4=B9=85?= <34344716+yjjiang11@users.noreply.github.com> Date: Tue, 31 Jan 2023 19:48:35 +0800 Subject: [PATCH 48/89] migrating dot/sign/fill/norm from old dynamic graph to new dynamic graph (#49895) * check dygraph on for op tests * reset eigh and modify prelu&sign * update eager_op_test * lint * add more ops * fix reduce * modify reduce test * reset reduce_op * modify matmul test * revert prelu --- .../fluid/tests/unittests/test_dot_op.py | 26 ++++++++-------- .../tests/unittests/test_fill_any_like_op.py | 10 ++++++- .../tests/unittests/test_fill_constant_op.py | 21 +++++++++++-- .../fluid/tests/unittests/test_log_loss_op.py | 5 +++- .../fluid/tests/unittests/test_logspace.py | 7 ++++- .../unittests/test_lookup_table_v2_op.py | 7 +++-- .../tests/unittests/test_matmul_v2_op.py | 30 +++++++------------ .../tests/unittests/test_matrix_power_op.py | 3 +- .../fluid/tests/unittests/test_norm_op.py | 16 +++++++--- .../fluid/tests/unittests/test_numel_op.py | 3 +- .../tests/unittests/test_one_hot_v2_op.py | 11 ++++++- .../tests/unittests/test_shard_index_op.py | 5 +++- .../fluid/tests/unittests/test_sign_op.py | 1 + 13 files changed, 98 insertions(+), 47 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_dot_op.py b/python/paddle/fluid/tests/unittests/test_dot_op.py index d32057bfb0d2d..aa61c1e177869 100644 --- a/python/paddle/fluid/tests/unittests/test_dot_op.py +++ b/python/paddle/fluid/tests/unittests/test_dot_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -38,7 +38,7 @@ def setUp(self): self.attrs = {} def test_check_output(self): - self.check_output(check_eager=True) + self.check_output() def test_check_grad_normal(self): if core.is_compiled_with_rocm(): @@ -46,10 +46,12 @@ def test_check_grad_normal(self): ['X', 'Y'], 'Out', user_defined_grads=[self.inputs['Y'], self.inputs['X']], - check_eager=True, ) else: - self.check_grad(['X', 'Y'], 'Out', check_eager=True) + self.check_grad( + ['X', 'Y'], + 'Out', + ) def test_check_grad_ingore_x(self): if core.is_compiled_with_rocm(): @@ -58,11 +60,12 @@ def test_check_grad_ingore_x(self): 'Out', no_grad_set=set("X"), user_defined_grads=[self.inputs['X']], - check_eager=True, ) else: self.check_grad( - ['Y'], 'Out', no_grad_set=set("X"), check_eager=True + ['Y'], + 'Out', + no_grad_set=set("X"), ) def test_check_grad_ingore_y(self): @@ -72,11 +75,12 @@ def test_check_grad_ingore_y(self): 'Out', no_grad_set=set('Y'), user_defined_grads=[self.inputs['Y']], - check_eager=True, ) else: self.check_grad( - ['X'], 'Out', no_grad_set=set('Y'), check_eager=True + ['X'], + 'Out', + no_grad_set=set('Y'), ) def init_input_output(self): @@ -187,7 +191,7 @@ def init_grad_input_output(self): self.grad_y = self.grad_out * np.conj(self.x) def test_check_output(self): - self.check_output(check_eager=True) + self.check_output() def test_check_grad_normal(self): self.check_grad( @@ -195,7 +199,6 @@ def test_check_grad_normal(self): 'Out', user_defined_grads=[self.grad_x, self.grad_y], user_defined_grad_outputs=[self.grad_out], - check_eager=True, ) def test_check_grad_ingore_x(self): @@ -205,7 +208,6 @@ def test_check_grad_ingore_x(self): no_grad_set=set("X"), user_defined_grads=[self.grad_y], user_defined_grad_outputs=[self.grad_out], - check_eager=True, ) def test_check_grad_ingore_y(self): @@ -215,13 +217,13 @@ def test_check_grad_ingore_y(self): no_grad_set=set('Y'), user_defined_grads=[self.grad_x], user_defined_grad_outputs=[self.grad_out], - check_eager=True, ) class TestComplexDotOp2D(OpTest): def setUp(self): self.op_type = "dot" + self.python_api = paddle.dot self.init_base_dtype() self.init_input_output() self.init_grad_input_output() diff --git a/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py b/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py index 9fa333d623bf9..d87b47270d4fa 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_any_like_op.py @@ -15,15 +15,21 @@ import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16 +from eager_op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid.core as core +def fill_any_like_wrapper(x, value): + x.fill_(value) + return x + + class TestFillAnyLikeOp(OpTest): def setUp(self): self.op_type = "fill_any_like" + self.python_api = fill_any_like_wrapper self.dtype = np.int32 self.value = 0.0 self.init() @@ -50,6 +56,7 @@ def init(self): class TestFillAnyLikeOpBfloat16(OpTest): def setUp(self): self.op_type = "fill_any_like" + self.python_api = fill_any_like_wrapper self.dtype = np.uint16 self.value = 0.0 self.inputs = {'X': np.random.random((219, 232)).astype(np.float32)} @@ -83,6 +90,7 @@ def init(self): class TestFillAnyLikeOpType(TestFillAnyLikeOp): def setUp(self): self.op_type = "fill_any_like" + self.python_api = fill_any_like_wrapper self.dtype = np.int32 self.value = 0.0 self.init() diff --git a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py index 38ef0379747db..3151744aa4cc1 100644 --- a/python/paddle/fluid/tests/unittests/test_fill_constant_op.py +++ b/python/paddle/fluid/tests/unittests/test_fill_constant_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16 +from eager_op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid as fluid @@ -24,11 +24,17 @@ from paddle.fluid.op import Operator +def fill_wrapper(shape, value=0.0): + out = paddle.full(shape=shape, fill_value=value) + return out + + # Situation 1: Attr(shape) is a list(without tensor) class TestFillConstantOp1(OpTest): def setUp(self): '''Test fill_constant op with specified value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.inputs = {} self.attrs = {'shape': [123, 92], 'value': 3.8} @@ -42,6 +48,7 @@ class TestFillConstantOp2(OpTest): def setUp(self): '''Test fill_constant op with default value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.inputs = {} self.attrs = {'shape': [123, 92]} @@ -55,6 +62,7 @@ class TestFillConstantOp3(OpTest): def setUp(self): '''Test fill_constant op with specified int64 value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.inputs = {} self.attrs = {'shape': [123, 92], 'value': 10000000000} @@ -68,6 +76,7 @@ class TestFillConstantOp4(OpTest): def setUp(self): '''Test fill_constant op with specified int value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.inputs = {} self.attrs = {'shape': [123, 92], 'value': 3} @@ -84,6 +93,7 @@ class TestFillConstantBF16Op(OpTest): def setUp(self): '''Test fill_constant op with specified value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.dtype = np.uint16 self.inputs = {} self.attrs = { @@ -130,6 +140,7 @@ class TestFillConstantOp1_ShapeTensorList(OpTest): def setUp(self): '''Test fill_constant op with specified value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.init_data() shape_tensor_list = [] for index, ele in enumerate(self.shape): @@ -154,6 +165,7 @@ class TestFillConstantOp2_ShapeTensorList(OpTest): def setUp(self): '''Test fill_constant op with default value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.init_data() shape_tensor_list = [] for index, ele in enumerate(self.shape): @@ -192,6 +204,7 @@ class TestFillConstantOp1_ShapeTensor(OpTest): def setUp(self): '''Test fill_constant op with specified value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.init_data() self.inputs = {"ShapeTensor": np.array(self.shape).astype("int32")} @@ -211,6 +224,7 @@ class TestFillConstantOp1_ValueTensor(OpTest): def setUp(self): '''Test fill_constant op with specified value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.init_data() self.inputs = { @@ -234,6 +248,7 @@ class TestFillConstantOp2_ValueTensor(OpTest): def setUp(self): '''Test fill_constant op with specified value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.init_data() self.inputs = { @@ -452,6 +467,7 @@ class TestFillConstantOp_ValueTensorBf16(OpTest): def setUp(self): '''Test fill_constant op with specified value''' self.op_type = "fill_constant" + self.python_api = fill_wrapper self.init_data() self.inputs = { @@ -470,7 +486,8 @@ def init_data(self): self.mkldnn_data_type = "bfloat16" def test_check_output(self): - self.check_output_with_place(core.CPUPlace()) + # no dynamic graph test for mkldnn + self.check_output_with_place(core.CPUPlace(), check_dygraph=False) if __name__ == "__main__": diff --git a/python/paddle/fluid/tests/unittests/test_log_loss_op.py b/python/paddle/fluid/tests/unittests/test_log_loss_op.py index 25bede0af214b..cb1b50b49a853 100644 --- a/python/paddle/fluid/tests/unittests/test_log_loss_op.py +++ b/python/paddle/fluid/tests/unittests/test_log_loss_op.py @@ -15,7 +15,9 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest + +from paddle.nn import functional as F def sigmoid_array(x): @@ -25,6 +27,7 @@ def sigmoid_array(x): class TestLogLossOp(OpTest): def setUp(self): self.op_type = 'log_loss' + self.python_api = F.log_loss samples_num = 100 x = np.random.random((samples_num, 1)).astype("float32") diff --git a/python/paddle/fluid/tests/unittests/test_logspace.py b/python/paddle/fluid/tests/unittests/test_logspace.py index 2a0d466a600d8..dee098dd5f34d 100644 --- a/python/paddle/fluid/tests/unittests/test_logspace.py +++ b/python/paddle/fluid/tests/unittests/test_logspace.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle @@ -23,6 +23,7 @@ class TestLogspaceOpCommonCase(OpTest): def setUp(self): self.op_type = "logspace" + self.python_api = paddle.logspace dtype = 'float32' self.inputs = { 'Start': np.array([0]).astype(dtype), @@ -41,6 +42,7 @@ def test_check_output(self): class TestLogspaceOpReverseCase(OpTest): def setUp(self): self.op_type = "logspace" + self.python_api = paddle.logspace dtype = 'float32' self.inputs = { 'Start': np.array([10]).astype(dtype), @@ -59,6 +61,7 @@ def test_check_output(self): class TestLogspaceOpNumOneCase(OpTest): def setUp(self): self.op_type = "logspace" + self.python_api = paddle.logspace dtype = 'float32' self.inputs = { 'Start': np.array([10]).astype(dtype), @@ -77,6 +80,7 @@ def test_check_output(self): class TestLogspaceOpMinusBaseCase(OpTest): def setUp(self): self.op_type = "logspace" + self.python_api = paddle.logspace dtype = 'float32' self.inputs = { 'Start': np.array([0]).astype(dtype), @@ -95,6 +99,7 @@ def test_check_output(self): class TestLogspaceOpZeroBaseCase(OpTest): def setUp(self): self.op_type = "logspace" + self.python_api = paddle.logspace dtype = 'float32' self.inputs = { 'Start': np.array([0]).astype(dtype), diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index eb9c4c60893e0..74b6eec7198c6 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, skip_check_grad_ci +from eager_op_test import OpTest, skip_check_grad_ci import paddle import paddle.fluid as fluid @@ -57,10 +57,10 @@ def id_dtype(self): return "int64" def test_check_output(self): - self.check_output(check_eager=True) + self.check_output() def test_check_grad(self): - self.check_grad(['W'], 'Out', no_grad_set=set('Ids'), check_eager=True) + self.check_grad(['W'], 'Out', no_grad_set=set('Ids')) class TestLookupTableOpInt16(OpTest): @@ -81,6 +81,7 @@ def id_dtype(self): class TestLookupTableOpWithTensorIds(OpTest): def setUp(self): self.op_type = "lookup_table_v2" + self.python_api = paddle.nn.functional.embedding table = np.random.random((17, 31)).astype("float64") ids = np.random.randint(low=0, high=17, size=(2, 4, 5)).astype("int32") self.inputs = {'W': table, 'Ids': ids} diff --git a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py index 8136425595a1c..e78ea74260d1e 100644 --- a/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_matmul_v2_op.py @@ -15,12 +15,12 @@ import unittest import numpy as np -from op_test import OpTest, convert_float_to_uint16, get_numeric_gradient +from eager_op_test import OpTest, convert_float_to_uint16, get_numeric_gradient +from testsuite import create_op import paddle import paddle.fluid as fluid import paddle.fluid.core as core -from paddle.fluid.tests.unittests.testsuite import create_op def reference_matmul(X, Y, transpose_X=False, transpose_Y=False): @@ -72,6 +72,7 @@ def setUp(self): self.init_kernel_type() self.config() self.op_type = "matmul_v2" + self.python_api = paddle.tensor.matmul if self.is_bfloat16_op(): x = np.random.random(self.x_shape).astype(np.float32) y = np.random.random(self.y_shape).astype(np.float32) @@ -102,15 +103,13 @@ def setUp(self): self.outputs = {'Out': result} def test_check_output(self): - self.check_output(check_eager=False) + self.check_output() def test_check_grad(self): if core.is_compiled_with_rocm(): - self.check_grad( - ['X', 'Y'], 'Out', max_relative_error=1e-2, check_eager=False - ) + self.check_grad(['X', 'Y'], 'Out', max_relative_error=1e-2) else: - self.check_grad(['X', 'Y'], 'Out', check_eager=False) + self.check_grad(['X', 'Y'], 'Out') class TestMatMulOp2(TestMatMulV2Op): @@ -344,9 +343,7 @@ def test_check_output(self): if core.is_compiled_with_cuda(): place = core.CUDAPlace(0) if core.is_float16_supported(place): - self.check_output_with_place( - place, atol=atol, check_eager=False - ) + self.check_output_with_place(place, atol=atol) def test_check_grad(self): place = core.CUDAPlace(0) @@ -356,7 +353,6 @@ def test_check_grad(self): ['X', 'Y'], 'Out', max_relative_error=max_relative_error, - check_eager=False, ) cls_name = "{0}_{1}".format(parent.__name__, "Fp16") @@ -562,6 +558,7 @@ def test_compute_type_fp16_nan(self): class TestComplexMatMulOp(OpTest): def setUp(self): self.op_type = "matmul_v2" + self.python_api = paddle.tensor.matmul self.init_base_dtype() self.init_input_output() self.init_grad_input_output() @@ -593,7 +590,7 @@ def init_grad_input_output(self): self.grad_y = np.matmul(np.conj(self.x).T, self.grad_out) def test_check_output(self): - self.check_output(check_eager=False) + self.check_output() def test_check_grad_normal(self): self.check_grad( @@ -601,7 +598,6 @@ def test_check_grad_normal(self): 'Out', user_defined_grads=[self.grad_x, self.grad_y], user_defined_grad_outputs=[self.grad_out], - check_eager=False, ) def test_check_grad_ingore_x(self): @@ -611,7 +607,6 @@ def test_check_grad_ingore_x(self): no_grad_set=set("X"), user_defined_grads=[self.grad_y], user_defined_grad_outputs=[self.grad_out], - check_eager=False, ) def test_check_grad_ingore_y(self): @@ -621,13 +616,13 @@ def test_check_grad_ingore_y(self): no_grad_set=set('Y'), user_defined_grads=[self.grad_x], user_defined_grad_outputs=[self.grad_out], - check_eager=False, ) class TestComplexMatMulOpBroadcast(OpTest): def setUp(self): self.op_type = "matmul_v2" + self.python_api = paddle.tensor.matmul self.init_base_dtype() self.init_input_output() self.init_grad_input_output() @@ -661,7 +656,7 @@ def init_grad_input_output(self): ) def test_check_output(self): - self.check_output(check_eager=False) + self.check_output() def test_check_grad_normal(self): self.check_grad( @@ -669,7 +664,6 @@ def test_check_grad_normal(self): 'Out', user_defined_grads=[self.grad_x, self.grad_y], user_defined_grad_outputs=[self.grad_out], - check_eager=False, ) def test_check_grad_ingore_x(self): @@ -679,7 +673,6 @@ def test_check_grad_ingore_x(self): no_grad_set=set("X"), user_defined_grads=[self.grad_y], user_defined_grad_outputs=[self.grad_out], - check_eager=False, ) def test_check_grad_ingore_y(self): @@ -689,7 +682,6 @@ def test_check_grad_ingore_y(self): no_grad_set=set('Y'), user_defined_grads=[self.grad_x], user_defined_grad_outputs=[self.grad_out], - check_eager=False, ) diff --git a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py index 7f26a7170191f..6381aeeca9868 100644 --- a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py +++ b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -32,6 +32,7 @@ def config(self): def setUp(self): self.op_type = "matrix_power" + self.python_api = paddle.tensor.matrix_power self.config() np.random.seed(123) diff --git a/python/paddle/fluid/tests/unittests/test_norm_op.py b/python/paddle/fluid/tests/unittests/test_norm_op.py index 73938f2d1b1c0..7b4b8dc60a02a 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_op.py +++ b/python/paddle/fluid/tests/unittests/test_norm_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest, skip_check_grad_ci +from eager_op_test import OpTest, skip_check_grad_ci import paddle import paddle.fluid as fluid @@ -29,10 +29,14 @@ def l2_norm(x, axis, epsilon): return y, r +def norm_wrapper(x, axis=1, epsilon=1e-12, is_test=False): + return paddle.nn.functional.normalize(x, axis=axis, epsilon=epsilon) + + class TestNormOp(OpTest): def setUp(self): self.op_type = "norm" - self.python_api = paddle.nn.functional.normalize + self.python_api = norm_wrapper self.init_test_case() self.init_dtype() x = np.random.random(self.shape).astype(self.dtype) @@ -40,6 +44,7 @@ def setUp(self): self.inputs = {'X': x} self.attrs = {'epsilon': self.epsilon, 'axis': self.axis} self.outputs = {'Out': y, 'Norm': norm} + self.python_out_sig = ['Out'] def test_check_output(self): self.check_output() @@ -126,19 +131,22 @@ def test_check_grad(self): class TestNormTestOp(OpTest): def setUp(self): self.op_type = "norm" + self.python_api = norm_wrapper self.init_test_case() x = np.random.random(self.shape).astype("float64") y, norm = l2_norm(x, self.axis, self.epsilon) self.inputs = {'X': x} self.attrs = { 'epsilon': self.epsilon, - 'axis': self.axis, + 'axis': int(self.axis), 'is_test': True, } self.outputs = {'Out': y} + self.python_out_sig = ["out"] def test_check_output(self): - self.check_output() + # dynamic graph just supports float tensor + self.check_output(check_dygraph=True) def test_check_grad(self): pass diff --git a/python/paddle/fluid/tests/unittests/test_numel_op.py b/python/paddle/fluid/tests/unittests/test_numel_op.py index 1878c8409f5a3..a2414ed369b9b 100644 --- a/python/paddle/fluid/tests/unittests/test_numel_op.py +++ b/python/paddle/fluid/tests/unittests/test_numel_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -24,6 +24,7 @@ class TestNumelOp(OpTest): def setUp(self): self.op_type = "size" + self.python_api = paddle.numel self.init() x = np.random.random((self.shape)).astype("float64") self.inputs = { diff --git a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py index 30bb75e0fa783..5d78b371b5fe9 100644 --- a/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_one_hot_v2_op.py @@ -15,7 +15,7 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest import paddle import paddle.fluid as fluid @@ -23,9 +23,15 @@ from paddle.fluid.framework import Program, program_guard +def one_hot_wrapper(x, depth_tensor, **keargs): + return paddle.nn.functional.one_hot(x, depth_tensor) + + class TestOneHotOp(OpTest): def setUp(self): self.op_type = 'one_hot_v2' + self.python_api = one_hot_wrapper + self.python_out_sig = ['Out'] depth = 10 depth_np = np.array(10).astype('int32') dimension = 12 @@ -49,6 +55,7 @@ def test_check_output(self): class TestOneHotOp_attr(OpTest): def setUp(self): self.op_type = 'one_hot_v2' + self.python_api = one_hot_wrapper depth = 10 dimension = 12 x_lod = [[4, 1, 3, 3]] @@ -73,6 +80,7 @@ def test_check_output(self): class TestOneHotOp_default_dtype(OpTest): def setUp(self): self.op_type = 'one_hot_v2' + self.python_api = one_hot_wrapper depth = 10 depth_np = np.array(10).astype('int32') dimension = 12 @@ -96,6 +104,7 @@ def test_check_output(self): class TestOneHotOp_default_dtype_attr(OpTest): def setUp(self): self.op_type = 'one_hot_v2' + self.python_api = one_hot_wrapper depth = 10 dimension = 12 x_lod = [[4, 1, 3, 3]] diff --git a/python/paddle/fluid/tests/unittests/test_shard_index_op.py b/python/paddle/fluid/tests/unittests/test_shard_index_op.py index dfbb98a791372..77cbecd641c14 100644 --- a/python/paddle/fluid/tests/unittests/test_shard_index_op.py +++ b/python/paddle/fluid/tests/unittests/test_shard_index_op.py @@ -15,11 +15,14 @@ import unittest import numpy as np -from op_test import OpTest +from eager_op_test import OpTest + +import paddle def common_setup(self, index_num, nshards, shard_id, ignore_value): self.op_type = 'shard_index' + self.python_api = paddle.tensor.shard_index x_lod = [[i for i in range(10)]] N = sum(x_lod[0]) x = [np.random.randint(0, index_num - 1) for i in range(N)] diff --git a/python/paddle/fluid/tests/unittests/test_sign_op.py b/python/paddle/fluid/tests/unittests/test_sign_op.py index 7834736260d9d..1f7d3b8228a77 100644 --- a/python/paddle/fluid/tests/unittests/test_sign_op.py +++ b/python/paddle/fluid/tests/unittests/test_sign_op.py @@ -28,6 +28,7 @@ class TestSignOp(OpTest): def setUp(self): self.op_type = "sign" + self.python_api = paddle.sign self.inputs = { 'X': np.random.uniform(-10, 10, (10, 10)).astype("float64") } From 111075a349054acb67d272450da4dc5f81ad61c8 Mon Sep 17 00:00:00 2001 From: wenbin Date: Tue, 31 Jan 2023 20:07:54 +0800 Subject: [PATCH 49/89] gn_silu (#49928) * gn_silu * add ut * set TIMEOUT * correct comments * comments * disable windows ut * rename parameter --- paddle/fluid/framework/ir/CMakeLists.txt | 1 + .../fluid/framework/ir/groupnorm_act_pass.cc | 167 ++++++++++++++++++ .../fluid/framework/ir/groupnorm_act_pass.h | 81 +++++++++ .../inference/api/paddle_pass_builder.cc | 1 + .../tensorrt/convert/group_norm_op.cc | 6 + .../plugin/common/groupNormPluginCommon.h | 4 +- .../tensorrt/plugin/group_norm_op_plugin.cu | 8 +- .../tensorrt/plugin/group_norm_op_plugin.h | 8 +- .../plugin/preln_groupnorm_act_op_plugin.cu | 6 +- .../plugin/skip_groupnorm_act_op_plugin.cu | 6 +- .../unittests/ir/inference/CMakeLists.txt | 3 + .../test_groupnorm_act_pass_fuse_pass.py | 150 ++++++++++++++++ 12 files changed, 428 insertions(+), 13 deletions(-) create mode 100644 paddle/fluid/framework/ir/groupnorm_act_pass.cc create mode 100644 paddle/fluid/framework/ir/groupnorm_act_pass.h create mode 100644 python/paddle/fluid/tests/unittests/ir/inference/test_groupnorm_act_pass_fuse_pass.py diff --git a/paddle/fluid/framework/ir/CMakeLists.txt b/paddle/fluid/framework/ir/CMakeLists.txt index b387dc1d6cc26..23d5b0de24722 100755 --- a/paddle/fluid/framework/ir/CMakeLists.txt +++ b/paddle/fluid/framework/ir/CMakeLists.txt @@ -144,6 +144,7 @@ if(WITH_TENSORRT) pass_library(trt_support_nhwc_pass inference) pass_library(elementwise_groupnorm_act_pass inference) pass_library(preln_elementwise_groupnorm_act_pass inference) + pass_library(groupnorm_act_pass inference) pass_library(trt_embedding_eltwise_layernorm_fuse_pass inference) pass_library(preln_embedding_eltwise_layernorm_fuse_pass inference) endif() diff --git a/paddle/fluid/framework/ir/groupnorm_act_pass.cc b/paddle/fluid/framework/ir/groupnorm_act_pass.cc new file mode 100644 index 0000000000000..397a7437757cc --- /dev/null +++ b/paddle/fluid/framework/ir/groupnorm_act_pass.cc @@ -0,0 +1,167 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/fluid/framework/ir/groupnorm_act_pass.h" + +#include + +#include "paddle/fluid/framework/ir/graph_pattern_detector.h" +#include "paddle/fluid/framework/op_version_registry.h" + +namespace paddle { +namespace framework { +namespace ir { +class Node; +} // namespace ir +} // namespace framework +} // namespace paddle + +namespace paddle { +namespace framework { +namespace ir { +namespace patterns { + +struct GroupNormAct : public PatternBase { + GroupNormAct(PDPattern *pattern, const std::string &name_scope) + : PatternBase(pattern, name_scope, "groupnorm_act") {} + + void operator()(PDNode *x); + // declare operator node's name + PATTERN_DECL_NODE(group_norm); + // declare variable node's name + PATTERN_DECL_NODE(elementwise_out); + + PATTERN_DECL_NODE(group_norm_bias); + PATTERN_DECL_NODE(group_norm_scale); + PATTERN_DECL_NODE(group_norm_out); + PATTERN_DECL_NODE(act); + PATTERN_DECL_NODE(act_out); +}; + +void GroupNormAct::operator()(PDNode *x) { + // Create nodes for group_norm op. + auto *group_norm = + pattern->NewNode(group_norm_repr())->assert_is_op("group_norm"); + auto *group_norm_bias_var = pattern->NewNode(group_norm_bias_repr()) + ->AsInput() + ->assert_is_persistable_var() + ->assert_is_op_input("group_norm", "Bias"); + + auto *group_norm_scale_var = pattern->NewNode(group_norm_scale_repr()) + ->AsInput() + ->assert_is_persistable_var() + ->assert_is_op_input("group_norm", "Scale"); + + auto *group_norm_out_var = pattern->NewNode(group_norm_out_repr()) + ->AsOutput() + ->assert_is_op_output("group_norm", "Y") + ->assert_is_op_input("silu", "X"); + + // Add links for group_norm op. + group_norm->LinksFrom({x, group_norm_bias_var, group_norm_scale_var}) + .LinksTo({group_norm_out_var}); + + auto *act = pattern->NewNode(act_repr())->assert_is_op("silu"); + auto *act_out = pattern->NewNode(act_out_repr()) + ->AsOutput() + ->assert_is_op_output("silu", "Out"); + + act->LinksFrom({group_norm_out_var}).LinksTo({act_out}); +} + +} // namespace patterns + +int GroupNormActFusePass::ApplyGNSiluPattern(ir::Graph *graph) const { + PADDLE_ENFORCE_NOT_NULL( + graph, platform::errors::PreconditionNotMet("graph should not be null.")); + FusePassBase::Init("groupnorm_silu_fuse", graph); + + int found_subgraph_count = 0; + + GraphPatternDetector gpd; + PDNode *x = nullptr; + + x = gpd.mutable_pattern() + ->NewNode("groupnorm_act_fuse/x") + ->AsInput() + ->assert_var_not_persistable() + ->assert_is_op_input("group_norm", "X"); + + patterns::GroupNormAct fused_pattern(gpd.mutable_pattern(), + "groupnorm_act_fuse"); + fused_pattern(x); + + auto handler = [&](const GraphPatternDetector::subgraph_t &subgraph, + Graph *graph) { + if (subgraph.count(x) <= 0) { + LOG(WARNING) << "The subgraph is empty."; + return; + } + + VLOG(4) << "handle groupnorm act fuse"; + + GET_IR_NODE_FROM_SUBGRAPH(group_norm, group_norm, fused_pattern); + GET_IR_NODE_FROM_SUBGRAPH(group_norm_bias, group_norm_bias, fused_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + group_norm_scale, group_norm_scale, fused_pattern); + GET_IR_NODE_FROM_SUBGRAPH(group_norm_out, group_norm_out, fused_pattern); + GET_IR_NODE_FROM_SUBGRAPH(act, act, fused_pattern); + GET_IR_NODE_FROM_SUBGRAPH(act_out, act_out, fused_pattern); + + if (!IsCompat(subgraph, graph)) { + LOG(WARNING) << "groupnorm act pass in op compat failed."; + return; + } + + std::unordered_set del_node_set; + // Create an skip_groupnorm_act op node + OpDesc new_desc(*group_norm->Op()); + new_desc.SetAttr("with_silu", true); + new_desc.SetOutput("Y", {act_out->Name()}); + new_desc.Flush(); + + auto fused_node = graph->CreateOpNode(&new_desc); // OpDesc will be copied. + + del_node_set.insert(group_norm); + del_node_set.insert(group_norm_out); + del_node_set.insert(act); + GraphSafeRemoveNodes(graph, del_node_set); + + IR_NODE_LINK_TO(subgraph.at(x), fused_node); + IR_NODE_LINK_TO(group_norm_scale, fused_node); + IR_NODE_LINK_TO(group_norm_bias, fused_node); + IR_NODE_LINK_TO(fused_node, act_out); + found_subgraph_count++; + }; + + gpd(graph, handler); + return found_subgraph_count; +} + +void GroupNormActFusePass::ApplyImpl(ir::Graph *graph) const { + FusePassBase::Init("groupnorm_act_fuse_pass", graph); + int found_subgraph_count = ApplyGNSiluPattern(graph); + AddStatis(found_subgraph_count); +} + +} // namespace ir +} // namespace framework +} // namespace paddle + +REGISTER_PASS(groupnorm_act_pass, paddle::framework::ir::GroupNormActFusePass); +REGISTER_PASS_CAPABILITY(groupnorm_act_pass) + .AddCombination( + paddle::framework::compatible::OpVersionComparatorCombination() + .EQ("silu", 0) + .EQ("group_norm", 0)); diff --git a/paddle/fluid/framework/ir/groupnorm_act_pass.h b/paddle/fluid/framework/ir/groupnorm_act_pass.h new file mode 100644 index 0000000000000..16e4d332d29f0 --- /dev/null +++ b/paddle/fluid/framework/ir/groupnorm_act_pass.h @@ -0,0 +1,81 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/fluid/framework/ir/fuse_pass_base.h" + +namespace paddle { +namespace framework { +namespace ir { +// +// | | +// group_norm group_norm +// | -> | +// silu +// | + +class Graph; + +class GroupNormActFusePass : public FusePassBase { + public: + GroupNormActFusePass() { + AddOpCompat(OpCompat("group_norm")) + .AddInput("X") + .IsTensor() + .End() + .AddInput("Scale") + .IsTensor() + .End() + .AddInput("Bias") + .IsTensor() + .End() + .AddOutput("Y") + .IsTensor() + .End() + .AddOutput("Mean") + .IsTensor() + .End() + .AddOutput("Variance") + .IsTensor() + .End() + .AddAttr("epsilon") + .IsNumGE(0.0f) + .IsNumLE(1.0f) + .End() + .AddAttr("groups") + .IsNumGE(1) + .End() + .AddAttr("data_layout") + .IsStringIn({"NCHW"}) + .End(); + AddOpCompat(OpCompat("silu")) + .AddInput("X") + .IsTensor() + .End() + .AddOutput("Out") + .IsTensor() + .End(); + } + + virtual ~GroupNormActFusePass() {} + + protected: + void ApplyImpl(ir::Graph* graph) const override; + int ApplyGNSiluPattern(ir::Graph* graph) const; +}; + +} // namespace ir +} // namespace framework +} // namespace paddle diff --git a/paddle/fluid/inference/api/paddle_pass_builder.cc b/paddle/fluid/inference/api/paddle_pass_builder.cc index 9f28343525c12..b5582518eacd2 100644 --- a/paddle/fluid/inference/api/paddle_pass_builder.cc +++ b/paddle/fluid/inference/api/paddle_pass_builder.cc @@ -136,6 +136,7 @@ const std::vector kTRTSubgraphPasses({ #else "elementwise_groupnorm_act_pass", // "preln_elementwise_groupnorm_act_pass", // + "groupnorm_act_pass", // #endif "tensorrt_subgraph_pass", // "conv_bn_fuse_pass", // diff --git a/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc b/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc index 2afc86dfc815d..4384f7d2b3cb9 100644 --- a/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/group_norm_op.cc @@ -46,6 +46,11 @@ class GroupNormOpConverter : public OpConverter { std::string scale_name = op_desc.Input("Scale").front(); std::string bias_name = op_desc.Input("Bias").front(); + bool with_silu = false; + if (op_desc.HasAttr("with_silu")) { + with_silu = PADDLE_GET_CONST(bool, op_desc.GetAttr("with_silu")); + } + // get the presistable var's data auto GetWeight = [&](const std::string& var_name, framework::DDim* dims) -> TensorRTEngine::Weight { @@ -77,6 +82,7 @@ class GroupNormOpConverter : public OpConverter { groups, mean_shape, variance_shape, + with_silu, with_fp16); nvinfer1::ILayer* groupnorm_layer = engine_->AddDynamicPlugin(&input_itensor, 1, plugin); diff --git a/paddle/fluid/inference/tensorrt/plugin/common/groupNormPluginCommon.h b/paddle/fluid/inference/tensorrt/plugin/common/groupNormPluginCommon.h index 81d507e866a1c..915ee1b5e23ac 100644 --- a/paddle/fluid/inference/tensorrt/plugin/common/groupNormPluginCommon.h +++ b/paddle/fluid/inference/tensorrt/plugin/common/groupNormPluginCommon.h @@ -49,8 +49,8 @@ struct GroupNormNHWCParams { int32_t c; // The number of groups. int32_t groups; - // Do we apply the Swish activation function? - bool withSwish; + // Do we apply the Silu activation function? + bool withSilu; // Precomputed values and parameters to control the execution of the kernels. diff --git a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu index 77c00d47d4cea..fc139a9734b30 100644 --- a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.cu @@ -247,8 +247,8 @@ __global__ void groupNormNHWCScaleKernel(const GroupNormNHWCParams params) { f2.x = gammaF2.x * f2.x + betaF2.x; f2.y = gammaF2.y * f2.y + betaF2.y; - // Apply Swish if needed. - if (params.withSwish) { + // Apply Silu if needed. + if (params.withSilu) { f2.x = f2.x * sigmoid(f2.x); f2.y = f2.y * sigmoid(f2.y); } @@ -457,7 +457,7 @@ bool GroupNormPluginDynamic::supportsFormatCombination( if (pos == 0) { if (with_fp16_) { return ((in.type == nvinfer1::DataType::kHALF) && - (in.format == nvinfer1::PluginFormat::kLINEAR || + ((!with_silu_ && in.format == nvinfer1::PluginFormat::kLINEAR) || in.format == nvinfer1::PluginFormat::kHWC8)); } else { return (in.type == nvinfer1::DataType::kFLOAT) && @@ -624,7 +624,7 @@ int GroupNormPluginDynamic::enqueue( cPerBlock = 8; } - params_.withSwish = false; + params_.withSilu = with_silu_; params_.dst = static_cast(outputs[0]); params_.srcX = static_cast(inputs[0]); params_.gamma = scale_gpu_; diff --git a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h index 1fa505c077ea8..3feb35e0708bc 100644 --- a/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h +++ b/paddle/fluid/inference/tensorrt/plugin/group_norm_op_plugin.h @@ -164,11 +164,13 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { int groups, std::vector mean_shape, std::vector variance_shape, + bool with_silu, bool with_fp16) : groups_(groups), eps_(eps), mean_shape_(mean_shape), variance_shape_(variance_shape), + with_silu_(with_silu), with_fp16_(with_fp16) { scale_.resize(scale_num); bias_.resize(bias_num); @@ -183,6 +185,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { DeserializeValue(&serialData, &serialLength, &groups_); DeserializeValue(&serialData, &serialLength, &mean_shape_); DeserializeValue(&serialData, &serialLength, &variance_shape_); + DeserializeValue(&serialData, &serialLength, &with_silu_); DeserializeValue(&serialData, &serialLength, &with_fp16_); } nvinfer1::IPluginV2DynamicExt* clone() const TRT_NOEXCEPT override { @@ -194,6 +197,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { groups_, mean_shape_, variance_shape_, + with_silu_, with_fp16_); ptr->scale_gpu_ = scale_gpu_; ptr->bias_gpu_ = bias_gpu_; @@ -210,7 +214,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { return SerializedSize(scale_) + SerializedSize(bias_) + SerializedSize(eps_) + SerializedSize(groups_) + SerializedSize(mean_shape_) + SerializedSize(variance_shape_) + - SerializedSize(with_fp16_); + SerializedSize(with_silu_) + SerializedSize(with_fp16_); } void serialize(void* buffer) const TRT_NOEXCEPT override { SerializeValue(&buffer, scale_); @@ -219,6 +223,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { SerializeValue(&buffer, groups_); SerializeValue(&buffer, mean_shape_); SerializeValue(&buffer, variance_shape_); + SerializeValue(&buffer, with_silu_); SerializeValue(&buffer, with_fp16_); } nvinfer1::DimsExprs getOutputDimensions( @@ -277,6 +282,7 @@ class GroupNormPluginDynamic : public DynamicPluginTensorRT { std::vector mean_shape_; std::vector variance_shape_; GroupNormNHWCParams params_; + bool with_silu_; bool with_fp16_; }; class GroupNormPluginDynamicCreator : public TensorRTPluginCreator { diff --git a/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu index a756a826bfb15..d3ca36770a4d2 100644 --- a/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/preln_groupnorm_act_op_plugin.cu @@ -330,8 +330,8 @@ __global__ void prelnGroupNormNHWCScaleKernel(GroupNormNHWCParams params) { f2.x = gammaF2.x * f2.x + betaF2.x; f2.y = gammaF2.y * f2.y + betaF2.y; - // Apply Swish if needed. - if (params.withSwish) { + // Apply Silu if needed. + if (params.withSilu) { f2.x = f2.x * sigmoid(f2.x); f2.y = f2.y * sigmoid(f2.y); } @@ -431,7 +431,7 @@ int PrelnGroupnormActPluginDynamic::enqueue( if (cPerBlock > input_desc[0].dims.d[1]) { cPerBlock = 8; } - params_.withSwish = with_silu_; + params_.withSilu = with_silu_; params_.dst = static_cast(outputs[1]); params_.eleOut = static_cast(outputs[0]); params_.srcX = static_cast(inputs[0]); diff --git a/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu b/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu index adba9324472a2..997205e918936 100644 --- a/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu +++ b/paddle/fluid/inference/tensorrt/plugin/skip_groupnorm_act_op_plugin.cu @@ -340,8 +340,8 @@ __global__ void skipGroupNormNHWCScaleKernel(GroupNormNHWCParams params) { f2.x = gammaF2.x * f2.x + betaF2.x; f2.y = gammaF2.y * f2.y + betaF2.y; - // Apply Swish if needed. - if (params.withSwish) { + // Apply Silu if needed. + if (params.withSilu) { f2.x = f2.x * sigmoid(f2.x); f2.y = f2.y * sigmoid(f2.y); } @@ -439,7 +439,7 @@ int SkipGroupnormActPluginDynamic::enqueue( if (cPerBlock > input_desc[0].dims.d[1]) { cPerBlock = 8; } - params_.withSwish = true; + params_.withSilu = true; params_.dst = static_cast(outputs[0]); params_.srcX = static_cast(inputs[0]); params_.srcY = static_cast(inputs[1]); diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index d456a86aa9d28..bdcf6ab951022 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -39,6 +39,7 @@ if(WIN32) "test_preln_groupnorm_act_fuse_pass") list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_element_groupnorm_act_fuse_pass") + list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_groupnorm_act_pass_fuse_pass") list(REMOVE_ITEM TEST_TRT_IR_PASSES "test_trt_convert_fused_token_prune") list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_fused_token_prune") endif() @@ -225,6 +226,8 @@ if(WITH_GPU AND TENSORRT_FOUND) PROPERTIES TIMEOUT 120) set_tests_properties(test_preln_groupnorm_act_fuse_pass PROPERTIES TIMEOUT 120) + set_tests_properties(test_groupnorm_act_pass_fuse_pass PROPERTIES TIMEOUT + 120) endif() endif() diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_groupnorm_act_pass_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_groupnorm_act_pass_fuse_pass.py new file mode 100644 index 0000000000000..c9f821b21d4e9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_groupnorm_act_pass_fuse_pass.py @@ -0,0 +1,150 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from functools import partial + +import hypothesis.strategies as st +import numpy as np +from auto_scan_test import PassAutoScanTest +from program_config import OpConfig, ProgramConfig, TensorConfig + +import paddle.inference as paddle_infer + + +class TestElementGNActPass(PassAutoScanTest): + # + # | fuse | + # groupnorm -> groupnorm(with_silu) + # | | + # silu + # | + # + # + + def sample_predictor_configs(self, program_config): + # trt dynamic_shape + config = self.create_trt_inference_config() + config.enable_tensorrt_engine( + max_batch_size=1, + workspace_size=102400, + min_subgraph_size=0, + precision_mode=paddle_infer.PrecisionType.Half, + use_static=False, + use_calib_mode=False, + ) + config.set_trt_dynamic_shape_info( + { + "input_data": [1, 160, 1, 1], + }, + { + "input_data": [4, 1280, 64, 64], + }, + { + "input_data": [1, 320, 32, 32], + }, + ) + yield config, ['group_norm'], (3e-3, 1e-3) + + def sample_program_config(self, draw): + axis = draw(st.sampled_from([0, -1])) + epsilon = draw(st.floats(min_value=0.0000001, max_value=0.001)) + batch_size = draw(st.integers(min_value=1, max_value=4)) + + groups = draw(st.sampled_from([4, 8, 16, 32])) + hw = draw(st.sampled_from([1, 8, 16, 32])) + channel = draw(st.sampled_from([320, 1280])) + + def generate_input(attrs): + return np.random.random( + [attrs[1]["batch_size"], *attrs[1]["input_dim"]] + ).astype(np.float32) + + def generate_weight(attrs): + return np.random.random(attrs[1]['input_dim'][0]).astype(np.float32) + + attrs = [ + { + 'epsilon': epsilon, + 'groups': groups, + }, + { + 'batch_size': batch_size, + 'input_dim': [channel, hw, hw], + }, + ] + + group_norm_op = OpConfig( + type="group_norm", + inputs={ + "X": ["input_data"], + "Bias": ["group_norm_bias"], + "Scale": ["group_norm_scale"], + }, + outputs={ + "Y": ["group_norm_output1"], + "Mean": ["group_norm_output2"], + "Variance": ["group_norm_output3"], + }, + attrs={ + "data_layout": "NCHW", + "groups": attrs[0]["groups"], + "epsilon": attrs[0]["epsilon"], + }, + ) + silu_op = OpConfig( + type="silu", + inputs={ + "X": ["group_norm_output1"], + }, + outputs={ + "Out": ["silu_output"], + }, + ) + + program_config = ProgramConfig( + ops=[ + group_norm_op, + silu_op, + ], + weights={ + "group_norm_bias": TensorConfig( + data_gen=partial(generate_weight, attrs) + ), + "group_norm_scale": TensorConfig( + data_gen=partial(generate_weight, attrs) + ), + }, + inputs={ + "input_data": TensorConfig( + data_gen=partial(generate_input, attrs) + ), + }, + outputs=["silu_output"], + ) + + return program_config + + def test(self): + self.run_and_statis( + quant=False, + max_examples=50, + passes=["groupnorm_act_pass"], + max_duration=250, + min_success_num=50, + ) + + +if __name__ == "__main__": + unittest.main() From dc1b6511dd6673f82adc77d13b9526ac60523d3b Mon Sep 17 00:00:00 2001 From: RedContritio Date: Tue, 31 Jan 2023 20:18:33 +0800 Subject: [PATCH 50/89] support empty input for unique_consecutive (#49978) --- .../kernels/cpu/unique_consecutive_functor.h | 6 ++- .../unittests/test_unique_consecutive_op.py | 38 ++++++++++++++++++- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/paddle/phi/kernels/cpu/unique_consecutive_functor.h b/paddle/phi/kernels/cpu/unique_consecutive_functor.h index 314c371bf7a64..73d196bbb98d9 100644 --- a/paddle/phi/kernels/cpu/unique_consecutive_functor.h +++ b/paddle/phi/kernels/cpu/unique_consecutive_functor.h @@ -51,9 +51,11 @@ static void UniqueConsecutiveFlattenedTensor(const Context& context, } } - int64_t output_size = p - out_vec.data() + 1; + bool is_empty = in.numel() == 0; + int64_t output_size = is_empty ? 0 : (p - out_vec.data() + 1); + if (return_counts) { - *q = in.numel() - last; + if (!is_empty) *q = in.numel() - last; counts_vec.resize(output_size); } out_vec.resize(output_size); diff --git a/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py b/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py index 86872aff9c7da..2c60fce518d37 100644 --- a/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py +++ b/python/paddle/fluid/tests/unittests/test_unique_consecutive_op.py @@ -32,12 +32,14 @@ def reference_unique_consecutive(X, return_inverse=False, return_counts=False): return_counts(bool, optional): If True, also return the counts for each unique consecutive element. """ X = list(X) + is_empty = len(X) == 0 counts_vec = [1] * len(X) i = 0 counts = 1 last = 0 inverse_vec = [0] * len(X) - inverse_vec[last] = i + if not is_empty: + inverse_vec[last] = i cnt = 0 while i < len(X) - 1: if X[i] == X[i + 1]: @@ -271,6 +273,40 @@ def test_dygraph(self): ) +class TestUniqueConsecutiveEmptyInput(OpTest): + """empty input""" + + def config(self): + self.return_inverse = True + self.return_counts = True + self.python_api = paddle.unique_consecutive + + def init_kernel_type(self): + self.dtype = "float32" if core.is_compiled_with_rocm() else "float64" + + def setUp(self): + self.init_kernel_type() + self.config() + self.op_type = "unique_consecutive" + x = np.array([]).astype(self.dtype) + result = reference_unique_consecutive( + x, self.return_inverse, self.return_counts + ) + out = reference_unique_consecutive(x) + out = np.array(out).astype(self.dtype) + self.inputs = { + 'X': x, + } + self.python_out_sig = ["Out"] + self.attrs = {'dtype': int(core.VarDesc.VarType.INT32)} + self.outputs = { + 'Out': out, + } + + def test_check_output(self): + self.check_output(check_eager=True) + + if __name__ == "__main__": paddle.enable_static() unittest.main() From 057ba778fa19c1b9670150d5ea5e83d6c8d64d04 Mon Sep 17 00:00:00 2001 From: limingshu <61349199+JamesLim-sy@users.noreply.github.com> Date: Wed, 1 Feb 2023 00:38:49 +0800 Subject: [PATCH 51/89] H2D data transfer optimization for split kernel (#49086) * profile reduce kernel for fp16 and reduceHigherdim * use reinterpret_cast * fix for CI on ROCm * add Macro for ROCm * ROCm CI config * ROCm CI config * unit test repair * pull * add common_funcs.h * reduceType * Update reduce_function.h * not higher * rename * implement of matmul using cublasLt instead of cublas * cublasLt bugfix * Update matmul_kernel_impl.h * Update matmul_kernel_impl_via_blasLt.h * for-loop-algo * PR comments changes * add macro * ci unused variable isCublasLt * ci unused variable isCublasLt macro * split matmul to autotune * rewrite the split kernel with segmented_array * rewrite the split kernel with segmented_array * rewrite the split kernel with segmented_array * add some method for cuda_graph * fix bugs for rocm * change for ci-error * i dont know why ci-model-benchmark gives a shit error, so i recover codes with original one to see if original codes work. * add some changes for passing mode_benchmark and coverage ci * fix ci error * fix ci-rocm error * add some changes for header --------- Co-authored-by: zhangbopd <1299246947@qq.com> Co-authored-by: Bo Zhang <105368690+zhangbopd@users.noreply.github.com> --- .../kernels/funcs/concat_and_split_functor.cu | 494 +++++++++--------- paddle/phi/kernels/funcs/segmented_array.h | 70 ++- paddle/phi/kernels/funcs/stack_and_unstack.h | 2 +- 3 files changed, 305 insertions(+), 261 deletions(-) diff --git a/paddle/phi/kernels/funcs/concat_and_split_functor.cu b/paddle/phi/kernels/funcs/concat_and_split_functor.cu index fa663528eb015..dc9150e4f2c56 100644 --- a/paddle/phi/kernels/funcs/concat_and_split_functor.cu +++ b/paddle/phi/kernels/funcs/concat_and_split_functor.cu @@ -14,8 +14,8 @@ limitations under the License. */ #include "paddle/phi/kernels/funcs/concat_and_split_functor.h" #include "paddle/fluid/memory/malloc.h" -#include "paddle/phi/backends/gpu/cuda/cuda_graph_with_memory_pool.h" #include "paddle/phi/backends/gpu/gpu_launch_config.h" +#include "paddle/phi/kernels/funcs/segmented_array.h" namespace phi { namespace funcs { @@ -45,6 +45,12 @@ static inline void GetBlockDims(const phi::GPUContext& context, *grid_dims = dim3(grid_cols, grid_rows, 1); } +#if !defined(_WIN32) +#define PADDLE_ALIGN(x) __attribute__((aligned(x))) +#else +#define PADDLE_ALIGN(x) +#endif + template struct PointerWrapper { public: @@ -55,12 +61,29 @@ struct PointerWrapper { PointerWrapper(const phi::GPUContext& ctx, const std::vector& ins, const T** pre_alloced_host_ptr) { + SetInputAddr(ins); + } + + protected: + void SetInputAddr(const std::vector& ins) { for (auto i = 0; i < ins.size(); ++i) { ins_addr[i] = ins[i].data(); } } }; +template +struct PADDLE_ALIGN(256) AlignedPointerWrapper + : public PointerWrapper { + public: + AlignedPointerWrapper() {} + AlignedPointerWrapper(const phi::GPUContext& ctx, + const std::vector& ins, + const T** pre_alloced_host_ptr) { + this->SetInputAddr(ins); + } +}; + template struct PointerToPointer { public: @@ -93,7 +116,7 @@ struct PointerToPointer { }; template -struct PointerAndColWrapper { +struct PADDLE_ALIGN(256) PointerAndColWrapper { public: IndexT col_length[Size]; PointerAndColWrapper(const phi::GPUContext& ctx, @@ -151,6 +174,8 @@ struct PointerToPointerAndCol { PointerToPointer ins_ptr_wrapper; }; +#undef PADDLE_ALIGN + template struct alignas(MovSize) Packed { __device__ Packed() { @@ -358,10 +383,10 @@ void DispatchConcatWithSameShapeKernelLimitNum( dim3 grid_dims; GetBlockDims(ctx, out_row, out_col, &block_dims, &grid_dims); -#define IMPL_CONCAT_CUDA_KERNEL_CASE(size_, ...) \ - case size_: { \ - PointerWrapper ptr_array(ctx, ins, inputs_data); \ - __VA_ARGS__; \ +#define IMPL_CONCAT_CUDA_KERNEL_CASE(size_, ...) \ + case size_: { \ + AlignedPointerWrapper ptr_array(ctx, ins, inputs_data); \ + __VA_ARGS__; \ } break; switch (phi::backends::gpu::RoundToNextHighPowOfTwo(limit_num, 4)) { @@ -519,108 +544,6 @@ void DispatchConcatKernel(const phi::GPUContext& ctx, } } -template -__global__ void SplitKernel_(const T* input_data, - const int64_t in_row, - const int64_t in_col, - const int64_t* out_cols, - int out_cols_size, - T** outputs_data) { - int64_t curr_segment = 0; - int64_t curr_offset = out_cols[0]; - CUDA_KERNEL_LOOP_TYPE(tid_x, in_col, int64_t) { - int64_t curr_col_offset = out_cols[curr_segment + 1]; - while (curr_col_offset <= tid_x) { - curr_offset = curr_col_offset; - ++curr_segment; - curr_col_offset = out_cols[curr_segment + 1]; - } - - int64_t local_col = tid_x - curr_offset; - int64_t segment_width = curr_col_offset - curr_offset; - T* output_ptr = outputs_data[curr_segment]; - if (output_ptr != nullptr) { - int64_t tid_y = blockIdx.y * blockDim.y + threadIdx.y; - for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) - output_ptr[tid_y * segment_width + local_col] = - input_data[tid_y * in_col + tid_x]; - } - } -} - -template -__device__ void SplitKernelDetail(const T* input_data, - const int64_t in_row, - const int64_t in_col, - const int64_t fixed_out_col, - T** outputs_data) { - CUDA_KERNEL_LOOP_TYPE(tid_x, in_col, int64_t) { - int64_t split = tid_x / fixed_out_col; - int64_t in_offset = tid_x - split * fixed_out_col; - T* output_ptr = outputs_data[split]; - if (output_ptr != nullptr) { - int64_t tid_y = blockIdx.y * blockDim.y + threadIdx.y; - for (; tid_y < in_row; tid_y += blockDim.y * gridDim.y) - output_ptr[tid_y * fixed_out_col + in_offset] = - input_data[tid_y * in_col + tid_x]; - } - } -} - -template -__global__ void SplitKernel_(const T* input_data, - const int64_t in_row, - const int64_t in_col, - const int64_t fixed_out_col, - T** outputs_data) { - SplitKernelDetail(input_data, in_row, in_col, fixed_out_col, outputs_data); -} - -template -__global__ void SplitKernel_(const T* input_data, - const int64_t in_row, - const int64_t in_col, - const int64_t fixed_out_col, - T* outputs_addr0, - T* outputs_addr1) { - T* outputs_data[2]; - outputs_data[0] = outputs_addr0; - outputs_data[1] = outputs_addr1; - SplitKernelDetail(input_data, in_row, in_col, fixed_out_col, outputs_data); -} - -template -__global__ void SplitKernel_(const T* input_data, - const int64_t in_row, - const int64_t in_col, - const int64_t fixed_out_col, - T* outputs_addr0, - T* outputs_addr1, - T* outputs_addr2) { - T* outputs_data[3]; - outputs_data[0] = outputs_addr0; - outputs_data[1] = outputs_addr1; - outputs_data[2] = outputs_addr2; - SplitKernelDetail(input_data, in_row, in_col, fixed_out_col, outputs_data); -} - -template -__global__ void SplitKernel_(const T* input_data, - const int64_t in_row, - const int64_t in_col, - const int64_t fixed_out_col, - T* outputs_addr0, - T* outputs_addr1, - T* outputs_addr2, - T* outputs_addr3) { - T* outputs_data[4]; - outputs_data[0] = outputs_addr0; - outputs_data[1] = outputs_addr1; - outputs_data[2] = outputs_addr2; - outputs_data[3] = outputs_addr3; - SplitKernelDetail(input_data, in_row, in_col, fixed_out_col, outputs_data); -} - /* * All tensors' dimension should be the same and the values of * each dimension must be the same, except the axis dimension. @@ -708,37 +631,152 @@ struct ConcatFunctor { } }; -template -class SplitFunctor { +template +struct PointerAndColArray + : public funcs::PointerArraySetter { public: - void operator()(const phi::GPUContext& context, - const phi::DenseTensor& input, - const std::vector& ref_inputs, - int axis, - std::vector* outputs) { - // NOTE(zhiqiu): split a tensor of shape [0,3,4] at axis=1, result in 3 - // tensors of shape [0,1,4] - if (input.numel() == 0) { - return; + funcs::ValueArray val_array; + + PointerAndColArray() {} + PointerAndColArray(const phi::GPUContext& ctx, + const int out_col_num, + IndexT* out_cols, + std::vector* t, + T** pre_alloc_host_buf = nullptr) + : funcs::PointerArraySetter( + ctx, + t, + /*need_alloc=*/false, + /*use_cuda_graph=*/true, + pre_alloc_host_buf) { + IndexT* dev_ptr = nullptr; + if (Size == SegmentedArraySize::kVariableLength) { + size_t num_bytes = out_col_num * sizeof(IndexT); + dev_ptr = reinterpret_cast(this->AllocAndCopy( + ctx, reinterpret_cast(out_cols), num_bytes, true)); + val_array.Set(dev_ptr, out_col_num); + } else { + val_array.Set(out_cols, out_col_num); + } + } +}; + +template +__global__ void SplitTensorWithSameShape(const T* input_data, + const IndexT out_row, + const IndexT cumulative_col, + const IndexT fixed_out_col, + DataArrayT data_array) { + CUDA_KERNEL_LOOP_TYPE(tid_x, cumulative_col, IndexT) { + IndexT split = tid_x / fixed_out_col; + IndexT in_offset = tid_x - split * fixed_out_col; + T* output_ptr = data_array.data[split]; + if (output_ptr != nullptr) { + IndexT tid_y = blockIdx.y * blockDim.y + threadIdx.y; + for (; tid_y < out_row; tid_y += blockDim.y * gridDim.y) + output_ptr[tid_y * fixed_out_col + in_offset] = + input_data[tid_y * cumulative_col + tid_x]; + } + } +} + +template +__global__ void SplitTensorWithDifferentShape(const T* input_data, + const IndexT out_row, + const IndexT cumulative_col, + DataArrayT data_array, + ValArrayT col_array) { + IndexT curr_segment = 0; + IndexT curr_offset = col_array.data[0]; + CUDA_KERNEL_LOOP_TYPE(tid_x, cumulative_col, IndexT) { + IndexT curr_col_offset = col_array.data[curr_segment + 1]; + while (curr_col_offset <= tid_x) { + curr_offset = curr_col_offset; + ++curr_segment; + curr_col_offset = col_array.data[curr_segment + 1]; } - // TODO(zcd): Add input data validity checking - int o_num = outputs->size(); - int64_t out_row = 1; - auto dim_0 = ref_inputs[0]->dims(); - for (int i = 0; i < axis; ++i) { - out_row *= dim_0[i]; + IndexT local_col = tid_x - curr_offset; + IndexT segment_width = curr_col_offset - curr_offset; + T* output_ptr = data_array.data[curr_segment]; + if (output_ptr != nullptr) { + IndexT tid_y = blockIdx.y * blockDim.y + threadIdx.y; + for (; tid_y < out_row; tid_y += blockDim.y * gridDim.y) + output_ptr[tid_y * segment_width + local_col] = + input_data[tid_y * cumulative_col + tid_x]; } + } +} + +template +void SplitFunctionDispatchWithSameShape(const phi::GPUContext& ctx, + const IndexT out_col, + const IndexT out_row, + const IndexT cumulative_col, + const T* input_data, + std::vector* outs, + T** pre_alloc_host_buf) { + dim3 grid_dims; + dim3 block_dims; + GetBlockDims(ctx, out_row, cumulative_col, &block_dims, &grid_dims); + + funcs::PointerArraySetter setter( + ctx, + outs, + /*need_alloc=*/false, + /*use_cuda_graph=*/true, + pre_alloc_host_buf); + SplitTensorWithSameShape + <<>>( + input_data, out_row, cumulative_col, out_col, setter.array); +} + +template +void SplitFunctionDispatchWithDifferentShape( + const phi::GPUContext& ctx, + const int out_col_num, + const IndexT out_row, + const IndexT cumulative_col, + const T* input_data, + std::vector* outs, + IndexT* output_cols, + T** pre_alloc_host_buf) { + dim3 grid_dims; + dim3 block_dims; + GetBlockDims(ctx, out_row, cumulative_col, &block_dims, &grid_dims); + PointerAndColArray setter( + ctx, out_col_num, output_cols, outs, pre_alloc_host_buf); + + SplitTensorWithDifferentShape + <<>>( + input_data, out_row, cumulative_col, setter.array, setter.val_array); +} - int64_t out0_col = ref_inputs[0]->numel() / out_row; - int64_t in_col = 0, in_row = out_row; - bool has_same_shape = true; +template +void SplitFunctorDispatchWithIndexType( + const phi::GPUContext& ctx, + int axis, + const phi::DenseTensor& input, + const std::vector& ref_ins, + std::vector* outs) { + // TODO(zcd): Add input data validity checking + int out_num = outs->size(); + IndexT out_row = 1; + auto ref_dim = ref_ins[0]->dims(); + for (int i = 0; i < axis; ++i) { + out_row *= ref_dim[i]; + } + IndexT out_col = ref_ins[0]->numel() / out_row; + IndexT cumulative_col = 0; + bool has_same_shape = true; - int outputs_cols_num = o_num + 1; - std::vector outputs_data_vec(o_num); - std::vector outputs_cols_vec(outputs_cols_num); - T** outputs_data = outputs_data_vec.data(); - int64_t* outputs_cols = outputs_cols_vec.data(); + int out_cols_num = out_num + 1; + std::vector outputs_cols_vec(out_cols_num, 0); + IndexT* outs_cols = outputs_cols_vec.data(); + T** outs_data = nullptr; // There are some differences between hip runtime and NV runtime. // In NV, when the pageable memory data less than 64K is transferred from @@ -748,128 +786,90 @@ class SplitFunctor { // 3.2.6.1. Concurrent Execution between Host and Device // Memory copies from host to device of a memory block of 64 KB or less #ifdef PADDLE_WITH_HIP - paddle::memory::AllocationPtr data_alloc, cols_alloc; - // TODO(chentianyu03): try to find a method to remove the Alloc function - data_alloc = paddle::memory::Alloc(paddle::platform::CUDAPinnedPlace(), - o_num * sizeof(T*)); - outputs_data = reinterpret_cast(data_alloc->ptr()); - // TODO(chentianyu03): try to find a method to remove the Alloc function - cols_alloc = paddle::memory::Alloc(paddle::platform::CUDAPinnedPlace(), - (outputs_cols_num) * sizeof(int64_t)); - outputs_cols = reinterpret_cast(cols_alloc->ptr()); + paddle::memory::AllocationPtr data_alloc, cols_alloc; + // TODO(chentianyu03): try to find a method to remove the Alloc function + data_alloc = paddle::memory::Alloc(paddle::platform::CUDAPinnedPlace(), + out_num * sizeof(T*)); + outs_data = reinterpret_cast(data_alloc->ptr()); + // TODO(chentianyu03): try to find a method to remove the Alloc function + cols_alloc = paddle::memory::Alloc(paddle::platform::CUDAPinnedPlace(), + (out_cols_num) * sizeof(IndexT)); + outs_cols = reinterpret_cast(cols_alloc->ptr()); #endif - outputs_cols[0] = 0; - for (int i = 0; i < o_num; ++i) { - int64_t t_col = ref_inputs.at(i)->numel() / out_row; - if (has_same_shape) { - if (t_col != out0_col) has_same_shape = false; - } - in_col += t_col; - outputs_cols[i + 1] = in_col; - if (outputs->at(i) != nullptr) { - outputs_data[i] = outputs->at(i)->data(); - } else { - outputs_data[i] = nullptr; - } + outs_cols[0] = 0; + for (int i = 0; i < out_num; ++i) { + IndexT t_col = ref_ins.at(i)->numel() / out_row; + if (has_same_shape) { + has_same_shape &= (t_col == cumulative_col); } - - dim3 block_dims; - dim3 grid_dims; - GetBlockDims(context, out_row, in_col, &block_dims, &grid_dims); - - paddle::memory::allocation::AllocationPtr tmp_dev_outs_data; - T** dev_out_gpu_data = nullptr; - if (!has_same_shape || o_num < 2 || o_num > 4) { - // TODO(chentianyu03): try to find a method to remove the Alloc function - tmp_dev_outs_data = paddle::memory::Alloc( - context.GetPlace(), - o_num * sizeof(T*), - phi::Stream(reinterpret_cast(context.stream()))); - auto* restored = phi::backends::gpu::RestoreHostMemIfCapturingCUDAGraph( - outputs_data, o_num); - paddle::memory::Copy(context.GetPlace(), - tmp_dev_outs_data->ptr(), - phi::CPUPlace(), - restored, - o_num * sizeof(T*), - context.stream()); - dev_out_gpu_data = reinterpret_cast(tmp_dev_outs_data->ptr()); + cumulative_col += t_col; + outs_cols[i + 1] = cumulative_col; + } + int limit_num = has_same_shape ? out_num : out_cols_num; + if (has_same_shape) { + switch (funcs::CalcArraySize(limit_num)) { + SEGMENTED_ARRAY_KERNEL_HELPER( + SplitFunctionDispatchWithSameShape( + ctx, + out_col, + out_row, + cumulative_col, + input.data(), + outs, + outs_data)); } - - if (has_same_shape) { - if (o_num == 2) { - SplitKernel_<<>>( - input.data(), - in_row, - in_col, - out0_col, - outputs_data[0], - outputs_data[1]); - } else if (o_num == 3) { - SplitKernel_<<>>( - input.data(), - in_row, - in_col, - out0_col, - outputs_data[0], - outputs_data[1], - outputs_data[2]); - } else if (o_num == 4) { - SplitKernel_<<>>( - input.data(), - in_row, - in_col, - out0_col, - outputs_data[0], - outputs_data[1], - outputs_data[2], - outputs_data[3]); - } else { - SplitKernel_<<>>( - input.data(), in_row, in_col, out0_col, dev_out_gpu_data); - } - } else { - auto tmp_dev_ins_col_data = - // TODO(chentianyu03): try to find a method to remove the Alloc - // function - paddle::memory::Alloc( - context.GetPlace(), - outputs_cols_num * sizeof(int64_t), - phi::Stream(reinterpret_cast(context.stream()))); - auto* restored = phi::backends::gpu::RestoreHostMemIfCapturingCUDAGraph( - outputs_cols, outputs_cols_num); - paddle::memory::Copy(context.GetPlace(), - tmp_dev_ins_col_data->ptr(), - phi::CPUPlace(), - restored, - outputs_cols_num * sizeof(int64_t), - context.stream()); - int64_t* dev_outs_col_data = - reinterpret_cast(tmp_dev_ins_col_data->ptr()); - - SplitKernel_<<>>( - input.data(), - in_row, - in_col, - dev_outs_col_data, - static_cast(outputs_cols_num), - dev_out_gpu_data); + } else { + switch (funcs::CalcArraySize(limit_num)) { + SEGMENTED_ARRAY_KERNEL_HELPER( + SplitFunctionDispatchWithDifferentShape( + ctx, + out_cols_num, + out_row, + cumulative_col, + input.data(), + outs, + outs_cols, + outs_data)); } + } #ifdef PADDLE_WITH_HIP - // Prevent the pinned memory value from being covered and release the memory - // after the launch kernel of the stream is executed (reapply pinned memory - // next time) - auto* data_alloc_released = data_alloc.release(); - auto* cols_alloc_released = cols_alloc.release(); - context.AddStreamCallback([data_alloc_released, cols_alloc_released] { - paddle::memory::allocation::Allocator::AllocationDeleter( - data_alloc_released); - paddle::memory::allocation::Allocator::AllocationDeleter( - cols_alloc_released); - }); + // Prevent pinned memory from being covered and release the memory after + // kernel launch of the stream is executed (reapply pinned memory next time) + auto* data_alloc_released = data_alloc.release(); + auto* cols_alloc_released = cols_alloc.release(); + ctx.AddStreamCallback([data_alloc_released, cols_alloc_released] { + paddle::memory::allocation::Allocator::AllocationDeleter( + data_alloc_released); + paddle::memory::allocation::Allocator::AllocationDeleter( + cols_alloc_released); + }); #endif +} + +template +class SplitFunctor { + public: + void operator()(const phi::GPUContext& context, + const phi::DenseTensor& input, + const std::vector& ref_inputs, + int axis, + std::vector* outputs) { + int64_t numel = input.numel(); + // NOTE(zhiqiu): split a tensor of shape [0,3,4] at axis=1, result in + // 3 tensors of shape [0,1,4] + if (input.numel() == 0) { + return; + } + + if (numel < std::numeric_limits::max()) { + SplitFunctorDispatchWithIndexType( + context, axis, input, ref_inputs, outputs); + } else { + SplitFunctorDispatchWithIndexType( + context, axis, input, ref_inputs, outputs); + } } }; diff --git a/paddle/phi/kernels/funcs/segmented_array.h b/paddle/phi/kernels/funcs/segmented_array.h index aa03eb4e9fcd2..cacaa8f81fe86 100644 --- a/paddle/phi/kernels/funcs/segmented_array.h +++ b/paddle/phi/kernels/funcs/segmented_array.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/phi/backends/gpu/cuda/cuda_graph_with_memory_pool.h" #include "paddle/phi/core/dense_tensor.h" namespace phi { @@ -34,6 +35,26 @@ enum class SegmentedArraySize { kFixed64 = 64, }; +template (Size)> +struct PADDLE_ALIGN(256) ValueArray { + public: + T data[Num]; + + void Set(T* ptr, const int num) { + for (auto i = 0; i < num; ++i) { + data[i] = ptr[i]; + } + } +}; + +template +struct PADDLE_ALIGN(256) ValueArray { + public: + T* data{nullptr}; + + void Set(T* ptr, const int num) { data = ptr; } +}; + template struct PADDLE_ALIGN(256) ConstPointerArray { public: @@ -62,8 +83,8 @@ struct PADDLE_ALIGN(256) PointerArray { public: T* data[static_cast(Size)]; - void Set(const std::vector& ptrs, T** dev_ptr = nullptr) { - for (auto i = 0; i < ptrs.size(); ++i) { + void Set(T** ptrs, const int num, T** dev_ptr = nullptr) { + for (auto i = 0; i < num; ++i) { data[i] = ptrs[i]; } } @@ -74,9 +95,7 @@ struct PADDLE_ALIGN(256) PointerArray { public: T** data{nullptr}; - void Set(const std::vector& ptrs, T** dev_ptr = nullptr) { - data = dev_ptr; - } + void Set(T** ptrs, const int num, T** dev_ptr = nullptr) { data = dev_ptr; } }; #undef PADDLE_ALIGN @@ -84,13 +103,24 @@ struct PADDLE_ALIGN(256) PointerArray { template struct ArraySetterBase { protected: - void* AllocAndCopy(const Context& ctx, void* src, size_t num_bytes) { + void* AllocAndCopy(const Context& ctx, + void* src, + size_t num_bytes, + bool use_cuda_graph = false) { allocation = paddle::memory::Alloc( ctx.GetPlace(), num_bytes, phi::Stream(reinterpret_cast(ctx.stream()))); + + int8_t* restored = reinterpret_cast(src); +#ifdef PADDLE_WITH_CUDA + if (use_cuda_graph) { + restored = phi::backends::gpu::RestoreHostMemIfCapturingCUDAGraph( + restored, num_bytes); + } +#endif phi::backends::gpu::GpuMemcpyAsync(allocation->ptr(), - src, + restored, num_bytes, phi::gpuMemcpyHostToDevice, ctx.stream()); @@ -131,13 +161,28 @@ struct PointerArraySetter : public ArraySetterBase { public: PointerArray array; - PointerArraySetter(const Context& ctx, std::vector* t) { + // need_alloc : tensor data needs extra buffer or not. + // use_cuda_graph: tensor data shall be captured by cuda_graph or not. + // pre_alloc_host_buf: tensor data is temporaily stored by pinned memory or + // not. + PointerArraySetter(const Context& ctx, + std::vector* t, + bool need_alloc = false, + bool use_cuda_graph = false, + T** pre_alloc_host_buf = nullptr) { ptrs.resize(t->size()); + T** data_ptr = ptrs.data(); +#ifdef PADDLE_WITH_HIP + if (pre_alloc_host_buf) { + data_ptr = pre_alloc_host_buf; + } +#endif for (int i = 0; i < t->size(); ++i) { if (t->at(i) && (t->at(i)->numel() > 0)) { - ptrs[i] = ctx.template Alloc(t->at(i)); + data_ptr[i] = + need_alloc ? ctx.template Alloc(t->at(i)) : t->at(i)->data(); } else { - ptrs[i] = nullptr; + data_ptr[i] = nullptr; } } @@ -145,10 +190,9 @@ struct PointerArraySetter : public ArraySetterBase { if (Size == SegmentedArraySize::kVariableLength) { size_t num_bytes = t->size() * sizeof(T*); dev_ptr = reinterpret_cast(this->AllocAndCopy( - ctx, reinterpret_cast(ptrs.data()), num_bytes)); + ctx, reinterpret_cast(data_ptr), num_bytes, use_cuda_graph)); } - - array.Set(ptrs, dev_ptr); + array.Set(data_ptr, t->size(), dev_ptr); } private: diff --git a/paddle/phi/kernels/funcs/stack_and_unstack.h b/paddle/phi/kernels/funcs/stack_and_unstack.h index c516d4892bf62..0b2b5443383a9 100644 --- a/paddle/phi/kernels/funcs/stack_and_unstack.h +++ b/paddle/phi/kernels/funcs/stack_and_unstack.h @@ -192,7 +192,7 @@ void LaunchUnStackKernel(const Context& ctx, << ", out_col=" << out_col << ", num_splits=" << num_splits; auto x_ptr = x.data(); - PointerArraySetter setter(ctx, outs); + PointerArraySetter setter(ctx, outs, /*need_alloc=*/true); if (out_col == 1) { // For the case axis == (x.dims().size() - 1) From 9f23114793dfb44445fb39df63f1dc92bdff9c53 Mon Sep 17 00:00:00 2001 From: Aurelius84 Date: Wed, 1 Feb 2023 09:22:54 +0800 Subject: [PATCH 52/89] [PrimCinn]Fix some vars are wrongly gc in CINN+InterpreterCore (#50116) * [PrimCinn]Fix some vars are wrongly gc in CINN+InterpreterCore * fix baseline unittest config * fix code style --- paddle/fluid/operators/cinn/cinn_launch_context.cc | 8 ++++++-- .../unittests/prim/prim/vjp/static/test_comp_add_grad.py | 2 +- .../prim/prim/vjp/static/test_comp_add_tanh_grad.py | 2 +- .../unittests/prim/prim/vjp/static/test_comp_div_grad.py | 2 +- .../unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py | 2 +- .../unittests/prim/prim/vjp/static/test_comp_sub_grad.py | 2 +- .../unittests/prim/prim/vjp/static/test_comp_tanh_grad.py | 2 +- 7 files changed, 12 insertions(+), 8 deletions(-) diff --git a/paddle/fluid/operators/cinn/cinn_launch_context.cc b/paddle/fluid/operators/cinn/cinn_launch_context.cc index af429e0f01e33..0b999ccab016f 100644 --- a/paddle/fluid/operators/cinn/cinn_launch_context.cc +++ b/paddle/fluid/operators/cinn/cinn_launch_context.cc @@ -119,12 +119,16 @@ CinnLaunchContext::CinnLaunchContext(const framework::ir::Graph& graph, // collect variables name list to be skipped in GC skip_eager_vars_.reserve(input_var_names.size() + output_var_names.size()); auto add_skip_var_fn = [&outer_varinfo, this](const std::string& var_name) { + // Always consider Input/Output of Graph as skip_gc_vars, because + // InterpreterCore has no eager_deletion_op to deal with it. + + VLOG(4) << "Append a skip_gc_var for InterpreterCore:" << var_name; + skip_gc_vars_.insert(var_name); // if a var exists at the outer_varinfo map, that means it will be // erased by the following eager_deletion_op of current cinn_launch op if (!outer_varinfo.count(var_name)) { skip_eager_vars_.emplace_back(var_name); - skip_gc_vars_.insert(var_name); - VLOG(4) << "Append a skip_gc_var:" << var_name; + VLOG(4) << "Append a skip_gc_var for PE:" << var_name; } }; std::for_each( diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py index 1673ff083e7cf..50ef9f6f13036 100644 --- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py +++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_grad.py @@ -91,7 +91,7 @@ def train(self, use_prim, use_cinn): def test_cinn(self): paddle.disable_static() dy_res = self.train(use_prim=False, use_cinn=False) - comp_st_cinn_res = self.train(use_prim=True, use_cinn=False) + comp_st_cinn_res = self.train(use_prim=True, use_cinn=True) for i in range(len(dy_res)): np.testing.assert_allclose( diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py index 5dd7417130bc1..b037cc73bfd54 100644 --- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py +++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_add_tanh_grad.py @@ -92,7 +92,7 @@ def train(self, use_prim, use_cinn): def test_cinn(self): paddle.disable_static() dy_res = self.train(use_prim=False, use_cinn=False) - comp_st_cinn_res = self.train(use_prim=True, use_cinn=False) + comp_st_cinn_res = self.train(use_prim=True, use_cinn=True) for i in range(len(dy_res)): np.testing.assert_allclose( diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py index 95d3c3027fd9d..606b55b5a95c0 100644 --- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py +++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_div_grad.py @@ -91,7 +91,7 @@ def train(self, use_prim, use_cinn): def test_cinn(self): paddle.disable_static() dy_res = self.train(use_prim=False, use_cinn=False) - comp_st_cinn_res = self.train(use_prim=True, use_cinn=False) + comp_st_cinn_res = self.train(use_prim=True, use_cinn=True) for i in range(len(dy_res)): np.testing.assert_allclose( diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py index 8df50c768c2b7..8e623100dd09c 100644 --- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py +++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sqrt_grad.py @@ -70,7 +70,7 @@ def train(self, use_prim, use_cinn): def test_cinn(self): paddle.disable_static() dy_res = self.train(use_prim=False, use_cinn=False) - comp_st_cinn_res = self.train(use_prim=True, use_cinn=False) + comp_st_cinn_res = self.train(use_prim=True, use_cinn=True) for i in range(len(dy_res)): np.testing.assert_allclose( diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py index 693bf8b942bab..3245d118760b2 100644 --- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py +++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_sub_grad.py @@ -92,7 +92,7 @@ def train(self, use_prim, use_cinn): def test_cinn(self): paddle.disable_static() dy_res = self.train(use_prim=False, use_cinn=False) - comp_st_cinn_res = self.train(use_prim=True, use_cinn=False) + comp_st_cinn_res = self.train(use_prim=True, use_cinn=True) for i in range(len(dy_res)): np.testing.assert_allclose( diff --git a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py index e643cf620a811..d28f84a685b0d 100644 --- a/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py +++ b/python/paddle/fluid/tests/unittests/prim/prim/vjp/static/test_comp_tanh_grad.py @@ -70,7 +70,7 @@ def train(self, use_prim, use_cinn): def test_cinn(self): paddle.disable_static() dy_res = self.train(use_prim=False, use_cinn=False) - comp_st_cinn_res = self.train(use_prim=True, use_cinn=False) + comp_st_cinn_res = self.train(use_prim=True, use_cinn=True) for i in range(len(dy_res)): np.testing.assert_allclose( From 3e9d854842aba82b900b7c578c2d125c3a3d18cf Mon Sep 17 00:00:00 2001 From: LiYuRio <63526175+LiYuRio@users.noreply.github.com> Date: Wed, 1 Feb 2023 10:32:13 +0800 Subject: [PATCH 53/89] fix gc and infinite buffer size (#50122) --- .../fleet_executor/compute_interceptor.cc | 42 +++++++++++-------- .../fleet_executor/compute_interceptor.h | 2 + .../fleet_executor/fleet_executor.cc | 27 ++++++------ python/paddle/fluid/executor.py | 7 ++-- .../test_fleet_executor_cond_interceptor.py | 17 +++++--- 5 files changed, 56 insertions(+), 39 deletions(-) diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc index 9aedaa131400f..a03ac900e9f66 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.cc @@ -50,14 +50,17 @@ void ComputeInterceptor::IncreaseReady(int64_t up_id) { auto max_ready_size = it->second.first; auto ready_size = it->second.second; ready_size += 1; - PADDLE_ENFORCE_LE(ready_size, - max_ready_size, - platform::errors::OutOfRange( - "upstream=%lld ready_size must <= max_ready_size, but " - "now ready_size=%lld, max_ready_size=%lld", - up_id, - ready_size, - max_ready_size)); + if (max_ready_size != INFINITE_BUFFER_SIZE) { + PADDLE_ENFORCE_LE( + ready_size, + max_ready_size, + platform::errors::OutOfRange( + "upstream=%lld ready_size must <= max_ready_size, but " + "now ready_size=%lld, max_ready_size=%lld", + up_id, + ready_size, + max_ready_size)); + } it->second.second = ready_size; } @@ -96,6 +99,9 @@ bool ComputeInterceptor::CanWriteOutput() { for (auto& outs : out_buffs_) { auto max_buffer_size = outs.second.first; auto used_size = outs.second.second; + if (max_buffer_size == INFINITE_BUFFER_SIZE) { + continue; + } // full, return false if (used_size == max_buffer_size) { VLOG(3) << "Interceptor " << GetInterceptorId() @@ -112,15 +118,17 @@ void ComputeInterceptor::SendDataReadyToDownStream() { auto max_buff_size = outs.second.first; auto used_size = outs.second.second; used_size += 1; - PADDLE_ENFORCE_LE( - used_size, - max_buff_size, - platform::errors::OutOfRange("downstream=%lld used buff size must <= " - "max_buff_size, but now used_size=%lld, " - "max_buff_size=%lld", - down_id, - used_size, - max_buff_size)); + if (max_buff_size != INFINITE_BUFFER_SIZE) { + PADDLE_ENFORCE_LE( + used_size, + max_buff_size, + platform::errors::OutOfRange("downstream=%lld used buff size must <= " + "max_buff_size, but now used_size=%lld, " + "max_buff_size=%lld", + down_id, + used_size, + max_buff_size)); + } outs.second.second = used_size; InterceptorMessage ready_msg; diff --git a/paddle/fluid/distributed/fleet_executor/compute_interceptor.h b/paddle/fluid/distributed/fleet_executor/compute_interceptor.h index 9709cd4437f10..eade47fd8787e 100644 --- a/paddle/fluid/distributed/fleet_executor/compute_interceptor.h +++ b/paddle/fluid/distributed/fleet_executor/compute_interceptor.h @@ -22,6 +22,8 @@ namespace paddle { namespace distributed { +const int64_t INFINITE_BUFFER_SIZE = -1; + class ComputeInterceptor : public Interceptor { public: ComputeInterceptor(int64_t interceptor_id, TaskNode* node); diff --git a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc index 88363696ede25..ae3776d2c5bea 100644 --- a/paddle/fluid/distributed/fleet_executor/fleet_executor.cc +++ b/paddle/fluid/distributed/fleet_executor/fleet_executor.cc @@ -111,21 +111,22 @@ void FleetExecutor::Init( task_node->SetUnusedVars(unused_vars); if (task_node->type() == "Cond") { std::vector while_block_vars; - std::vector vars_in_parent; - std::vector vars_in_sub; - for (auto& var : program_desc.Block(0).AllVars()) { - vars_in_parent.emplace_back(var->Name()); - } + VLOG(3) << "Vars in while sub block:"; for (auto& var : program_desc.Block(1).AllVars()) { - vars_in_sub.emplace_back(var->Name()); + VLOG(3) << var->Name(); + while_block_vars.emplace_back(var->Name()); + } + for (const auto& pair : unused_vars) { + if (pair.first->Type() == "while") { + for (const auto& var_name : pair.second) { + while_block_vars.emplace_back(var_name); + } + } + } + VLOG(3) << "Vars below will be removed after while:"; + for (const auto& name : while_block_vars) { + VLOG(3) << name; } - std::sort(vars_in_parent.begin(), vars_in_parent.end()); - std::sort(vars_in_sub.begin(), vars_in_sub.end()); - std::set_difference(vars_in_sub.begin(), - vars_in_sub.end(), - vars_in_parent.begin(), - vars_in_parent.end(), - std::back_inserter(while_block_vars)); task_node->SetWhileBlockVars(while_block_vars); } int64_t interceptor_id = task_node->task_id(); diff --git a/python/paddle/fluid/executor.py b/python/paddle/fluid/executor.py index da9d12802434f..6e094588e686a 100755 --- a/python/paddle/fluid/executor.py +++ b/python/paddle/fluid/executor.py @@ -2534,8 +2534,9 @@ def _prepare_fleet_executor_carrier( place = core.Place() place.set_place(self.place) - # NOTE: the last argument is used to force create some vars in root scope, - # won't be used during train. + inference_root_scope_vars = ( + fleet_opt["fetch_var"] if "fetch_var" in fleet_opt else [] + ) self._fleet_executor.init( carrier_id, program.desc, @@ -2544,7 +2545,7 @@ def _prepare_fleet_executor_carrier( num_micro_batches, tasks, task_id_to_rank, - [], + inference_root_scope_vars, micro_scope_list, ) diff --git a/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py b/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py index 1ca8c869a96bd..f6418cdee2cce 100644 --- a/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py +++ b/python/paddle/fluid/tests/unittests/test_fleet_executor_cond_interceptor.py @@ -165,19 +165,24 @@ def test_cond_interceptor(self): lazy_initialize=True, ) + infinite_buff_size = -1 task_a.add_downstream_task(task_b.task_id(), 2) task_b.add_upstream_task(task_a.task_id(), 2) - task_b.add_downstream_task(task_c.task_id(), 100) - task_c.add_upstream_task(task_b.task_id(), 100) + task_b.add_downstream_task(task_c.task_id(), infinite_buff_size) + task_c.add_upstream_task(task_b.task_id(), infinite_buff_size) task_c.add_downstream_task(task_d.task_id(), 2) task_d.add_upstream_task(task_c.task_id(), 2) - task_d.add_downstream_task(task_b.task_id(), 100, core.DependType.LOOP) - task_b.add_upstream_task(task_d.task_id(), 100, core.DependType.LOOP) + task_d.add_downstream_task( + task_b.task_id(), infinite_buff_size, core.DependType.LOOP + ) + task_b.add_upstream_task( + task_d.task_id(), infinite_buff_size, core.DependType.LOOP + ) task_b.add_downstream_task( - task_e.task_id(), 100, core.DependType.STOP_LOOP + task_e.task_id(), infinite_buff_size, core.DependType.STOP_LOOP ) task_e.add_upstream_task( - task_b.task_id(), 100, core.DependType.STOP_LOOP + task_b.task_id(), infinite_buff_size, core.DependType.STOP_LOOP ) main_program._pipeline_opt = { From 7f1a1570c68985b8649edbe484b812ab82df26bb Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 1 Feb 2023 10:37:18 +0800 Subject: [PATCH 54/89] Fix Python IndexError of case1: paddle.linalg.lstsq (#49985) --- .../tests/unittests/test_linalg_lstsq_op.py | 33 +++++++++++++++++++ python/paddle/tensor/linalg.py | 19 +++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py index 82576ab1bd1bf..94dc901a56d0c 100644 --- a/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py +++ b/python/paddle/fluid/tests/unittests/test_linalg_lstsq_op.py @@ -278,5 +278,38 @@ def init_config(self): self._input_shape_2 = (50, 300) +class TestLinalgLstsqAPIError(unittest.TestCase): + def setUp(self): + pass + + def test_api_errors(self): + def test_x_bad_shape(): + x = paddle.to_tensor(np.random.random(size=(5)), dtype=np.float32) + y = paddle.to_tensor( + np.random.random(size=(5, 15)), dtype=np.float32 + ) + out = paddle.linalg.lstsq(x, y, driver='gelsy') + + def test_y_bad_shape(): + x = paddle.to_tensor( + np.random.random(size=(5, 10)), dtype=np.float32 + ) + y = paddle.to_tensor(np.random.random(size=(5)), dtype=np.float32) + out = paddle.linalg.lstsq(x, y, driver='gelsy') + + def test_shape_dismatch(): + x = paddle.to_tensor( + np.random.random(size=(5, 10)), dtype=np.float32 + ) + y = paddle.to_tensor( + np.random.random(size=(4, 15)), dtype=np.float32 + ) + out = paddle.linalg.lstsq(x, y, driver='gelsy') + + self.assertRaises(ValueError, test_x_bad_shape) + self.assertRaises(ValueError, test_y_bad_shape) + self.assertRaises(ValueError, test_shape_dismatch) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 4cce1b01968a1..46f11130c0354 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -3171,13 +3171,26 @@ def lstsq(x, y, rcond=None, driver=None, name=None): else: raise RuntimeError("Only support lstsq api for CPU or CUDA device.") - if x.dtype == y.dtype and x.dtype in (paddle.float32, paddle.float64): - pass - else: + if not (x.dtype == y.dtype and x.dtype in (paddle.float32, paddle.float64)): raise ValueError( "Only support x and y have the same dtype such as 'float32' and 'float64'." ) + if x.ndim < 2: + raise ValueError( + f"The shape of x should be (*, M, N), but received ndim is [{x.ndim} < 2]" + ) + + if y.ndim < 2: + raise ValueError( + f"The shape of y should be (*, M, K), but received ndim is [{y.ndim} < 2]" + ) + + if x.shape[-2] != y.shape[-2]: + raise ValueError( + f"x with shape (*, M = {x.shape[-2]}, N) and y with shape (*, M = {y.shape[-2]}, K) should have same M." + ) + if rcond is None: if x.dtype == paddle.float32: rcond = 1e-7 * max(x.shape[-2], x.shape[-1]) From 9ce8cfcf04fd53f1aa57d8e08d82b39eed3aaf3f Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 1 Feb 2023 10:44:04 +0800 Subject: [PATCH 55/89] =?UTF-8?q?Fix=20UFA=E9=9D=9E=E6=B3=95=E5=9C=B0?= =?UTF-8?q?=E5=9D=80=E8=AE=BF=E9=97=AE(UFA=20illegal=20address=20access)?= =?UTF-8?q?=20of=20case4:=20paddle.unbind=20(#49995)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add axis check for unbind * add axis range check for unbind * update unittest and axis validation for unbind * add unittest invalid axis for unbind * restore axis extract for unbind --- paddle/phi/infermeta/unary.cc | 13 +++++++++++++ .../fluid/tests/unittests/test_unbind_op.py | 7 +++++++ python/paddle/tensor/manipulation.py | 15 ++++++++++----- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 3b3202c291725..eb05437ada8a5 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -4253,7 +4253,20 @@ void UnbindInferMeta(const MetaTensor& x, std::vector outs) { auto in_dims = x.dims(); std::vector out_dim; + + PADDLE_ENFORCE_GE( + axis, + -in_dims.size(), + phi::errors::InvalidArgument( + "axis must be in range(%d, %d).", -in_dims.size(), in_dims.size())); + PADDLE_ENFORCE_LT( + axis, + in_dims.size(), + phi::errors::InvalidArgument( + "axis must be in range(%d, %d).", -in_dims.size(), in_dims.size())); + axis = axis < 0 ? in_dims.size() + axis : axis; + for (int i = 0; i < in_dims.size(); ++i) { if (i != axis) out_dim.push_back(in_dims[i]); } diff --git a/python/paddle/fluid/tests/unittests/test_unbind_op.py b/python/paddle/fluid/tests/unittests/test_unbind_op.py index 6ec82a96bc165..8cafc1b5a8e1b 100644 --- a/python/paddle/fluid/tests/unittests/test_unbind_op.py +++ b/python/paddle/fluid/tests/unittests/test_unbind_op.py @@ -25,6 +25,7 @@ class TestUnbind(unittest.TestCase): def test_unbind(self): + paddle.enable_static() x_1 = fluid.data(shape=[2, 3], dtype='float32', name='x_1') [out_0, out_1] = tensor.unbind(input=x_1, axis=0) @@ -59,6 +60,7 @@ def test_unbind_dygraph(self): class TestLayersUnbind(unittest.TestCase): def test_layers_unbind(self): + paddle.enable_static() x_1 = fluid.data(shape=[2, 3], dtype='float32', name='x_1') [out_0, out_1] = paddle.unbind(input=x_1, axis=0) @@ -214,6 +216,11 @@ def test_table_Variable(): self.assertRaises(TypeError, test_table_Variable) + def test_invalid_axis(): + tensor.unbind(input=x, axis=2) + + self.assertRaises(ValueError, test_invalid_axis) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 923e6923d6d63..b5308e6cee63d 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -2755,14 +2755,19 @@ def unbind(input, axis=0): # x2.shape [3, 5] # x3.shape [3, 5] """ + if not isinstance(axis, (int)): + raise TypeError( + "The type of 'axis' must be int, but received %s." % (type(axis)) + ) + + if axis not in range(-input.ndim, input.ndim): + raise ValueError( + f'The axis must in range({-input.ndim}, {input.ndim}).' + ) + if in_dygraph_mode(): return _C_ops.unbind(input, axis) else: - if not isinstance(axis, (int)): - raise TypeError( - "The type of 'axis' must be int, but received %s." - % (type(axis)) - ) if isinstance(axis, np.generic): axis = np.asscalar(axis) input_shape = input.shape From 0855d9828c62af0b60e4d625e4ac973d1309147c Mon Sep 17 00:00:00 2001 From: zxcd <228587199@qq.com> Date: Wed, 1 Feb 2023 10:47:05 +0800 Subject: [PATCH 56/89] add clip_grad_norm_ API (#49935) * add clip_grad_norm_ api. * fix docs and some details according to the comments. * fix code style. * fix no_grad problem, and fix doc. * fix code style. * fix doc and remove type information --- .../tests/unittests/test_clip_grad_norm_.py | 121 ++++++++++++++++++ python/paddle/nn/utils/__init__.py | 4 +- python/paddle/nn/utils/clip_grad_norm_.py | 107 ++++++++++++++++ 3 files changed, 231 insertions(+), 1 deletion(-) create mode 100644 python/paddle/fluid/tests/unittests/test_clip_grad_norm_.py create mode 100644 python/paddle/nn/utils/clip_grad_norm_.py diff --git a/python/paddle/fluid/tests/unittests/test_clip_grad_norm_.py b/python/paddle/fluid/tests/unittests/test_clip_grad_norm_.py new file mode 100644 index 0000000000000..308c59d094ec5 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_clip_grad_norm_.py @@ -0,0 +1,121 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle.nn.utils.clip_grad_norm_ import clip_grad_norm_ + + +class TestClipGradNorm(unittest.TestCase): + def test_basic(self): + run_test_equal( + self, + shape=[16, 16], + dtype=np.float32, + max_norm=5, + norm_type=2, + ) + run_test_equal( + self, + shape=(100,), + dtype=np.float32, + max_norm=1e20, + norm_type=2, + ) + run_test_equal( + self, + shape=[4, 8, 16], + dtype=np.float32, + max_norm=1.0, + norm_type=float("inf"), + ) + + def test_errors(self): + def TestValueError(): + input_pd = paddle.to_tensor( + np.random.random([1, 2]).astype(np.float32) + ) + input_pd.grad = paddle.to_tensor( + np.random.random([1, 2]).astype(np.float32) + ) + clip_grad_norm_(input_pd, max_norm=2, norm_type=float("-inf")) + + self.assertRaises(ValueError, TestValueError) + + def TestRuntimeError(): + input_pd = paddle.to_tensor( + np.random.random([1, 2]).astype(np.float32) + ) + input_pd.grad = paddle.full([1, 2], float("inf")) + clip_grad_norm_( + input_pd, max_norm=2, norm_type=2, error_if_nonfinite=True + ) + + self.assertRaises(RuntimeError, TestRuntimeError) + + def TestRuntimeErrorStaticMode(): + paddle.enable_static() + input_pd = paddle.to_tensor( + np.random.random([1, 2]).astype(np.float32) + ) + input_pd.grad = paddle.to_tensor( + np.random.random([1, 2]).astype(np.float32) + ) + clip_grad_norm_(input_pd, max_norm=2, norm_type=float("inf")) + paddle.disable_static() + + self.assertRaises(RuntimeError, TestRuntimeErrorStaticMode) + + +def run_test_equal( + self, + shape, + dtype, + max_norm, + norm_type: float = 2.0, + error_if_nonfinite: bool = False, +): + input = np.random.random(shape).astype(dtype) + grad = np.random.random(shape).astype(dtype) + input_pd = paddle.to_tensor(input) + input_pd.grad = paddle.to_tensor(grad) + + if norm_type == 2: + grad = grad.reshape(1, grad.size) + output = np.linalg.norm(grad, 'fro') + elif norm_type == np.inf: + output = np.amax(np.abs(grad)) + else: + output = np.linalg.norm(grad, norm_type) + clip_grad_norm_result = clip_grad_norm_( + input_pd, + max_norm=max_norm, + norm_type=norm_type, + error_if_nonfinite=error_if_nonfinite, + ) + + np.testing.assert_allclose( + clip_grad_norm_result.numpy(), + output, + rtol=1e-05, + atol=1e-05, + equal_nan=False, + ) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/nn/utils/__init__.py b/python/paddle/nn/utils/__init__.py index 23e1e233cc0dc..82b17c8c05d24 100644 --- a/python/paddle/nn/utils/__init__.py +++ b/python/paddle/nn/utils/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ vector_to_parameters, _stride_column, ) # noqa: F401 +from .clip_grad_norm_ import clip_grad_norm_ # noqa: F401 __all__ = [ # noqa 'weight_norm', @@ -26,4 +27,5 @@ 'spectral_norm', 'parameters_to_vector', 'vector_to_parameters', + 'clip_grad_norm_', ] diff --git a/python/paddle/nn/utils/clip_grad_norm_.py b/python/paddle/nn/utils/clip_grad_norm_.py new file mode 100644 index 0000000000000..3a3ecb38b4428 --- /dev/null +++ b/python/paddle/nn/utils/clip_grad_norm_.py @@ -0,0 +1,107 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddle + +__all__ = ['clip_grad_norm_'] + + +def clip_grad_norm_( + parameters, + max_norm, + norm_type=2.0, + error_if_nonfinite=False, +): + r"""Clips gradient norm of the iteratable parameters. + + Norms are calculated together on all gradients, just as they are + connected into one vector. The gradient will be modified in place. + + This API can only run in dynamic graph mode, not static graph mode. + + Args: + parameters (Iterable[paddle.Tensor] or paddle.Tensor): Tensors or a single Tensor + that will be normalized gradients + max_norm (float or int): max norm of the gradients + norm_type (float or int): type of the used p-norm. Can be `inf` for + infinity norm. + error_if_nonfinite (bool): if True, throw an error if the total + norm of the gradients from :attr:`parameters` is `nan`, + `inf`, or `-inf`. + + Returns: + Total norm of the parameter gradients (treated as a single vector). + Example: + .. code-block:: python + import paddle + + x = paddle.uniform([10, 10], min=-1.0, max=1.0, dtype='float32') + max_norm = float(5.0) + linear = paddle.nn.Linear(in_features=10, out_features=10) + out = linear(x) + loss = paddle.mean(out) + loss.backward() + + paddle.nn.utils.clip_grad_norm_(linear.parameters(), max_norm) + + sdg = paddle.optimizer.SGD(learning_rate=0.1, parameters=linear.parameters()) + sdg.step() + """ + if not paddle.in_dynamic_mode(): + raise RuntimeError('this API can only run in dynamic mode.') + + if isinstance(parameters, paddle.Tensor): + parameters = [parameters] + + support_norm_type = [float("inf"), 0, 1, 2] + if norm_type not in support_norm_type: + raise ValueError(f'norm_type only support {support_norm_type}') + + grads = [p.grad for p in parameters if p.grad is not None] + max_norm = float(max_norm) + norm_type = float(norm_type) + if len(grads) == 0: + return paddle.to_tensor(0.0) + if norm_type == float("inf"): + norms = [g.detach().abs().max() for g in grads] + total_norm = ( + norms[0] if len(norms) == 1 else paddle.max(paddle.stack(norms)) + ) + else: + total_norm = paddle.linalg.norm( + paddle.stack( + [paddle.linalg.norm(g.detach(), norm_type) for g in grads] + ), + norm_type, + ) + + if error_if_nonfinite and paddle.logical_or( + total_norm.isnan(), total_norm.isinf() + ): + raise RuntimeError( + f'The total norm of {norm_type} order of the gradients from ' + '`parameters` is non-finite, so it cannot be clipped. In any case, ' + 'disable this error and scale the gradient by non-finite norm, ' + 'set `error_if_nonfinite=False`' + ) + clip_coef = max_norm / (total_norm + 1e-6) + # Note: when the coef is clamped to 1, it is redundant to multiply the clamped coef, but this + # avoids the `if clip_coef < 1:` condition. + clip_coef_clamped = paddle.clip(clip_coef, max=1.0) + with paddle.no_grad(): + for _, p in enumerate(parameters): + g = p.grad + if g is not None: + p.grad = paddle.multiply(x=g, y=clip_coef_clamped) + return total_norm From 2b636166d24dcf695117ba576efa532ff24fc73b Mon Sep 17 00:00:00 2001 From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com> Date: Wed, 1 Feb 2023 10:49:05 +0800 Subject: [PATCH 57/89] nccl 2.7.8 to 2.10.3 (#50121) --- .../dockerfile/build_scripts/install_nccl2.sh | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/dockerfile/build_scripts/install_nccl2.sh b/tools/dockerfile/build_scripts/install_nccl2.sh index 6d44dbb90542f..d39e74f3cf537 100644 --- a/tools/dockerfile/build_scripts/install_nccl2.sh +++ b/tools/dockerfile/build_scripts/install_nccl2.sh @@ -17,18 +17,18 @@ VERSION=$(nvcc --version | grep release | grep -oEi "release ([0-9]+)\.([0-9])"| sed "s/release //") if [ "$VERSION" == "10.0" ]; then DEB="nccl-repo-ubuntu1604-2.4.7-ga-cuda10.0_1-1_amd64.deb" -elif [ "$VERSION" == "10.2" ] || [ "$VERSION" == "10.1" ] || [ "$VERSION" == "11.0" ] || [ "$VERSION" == "11.2" ] || [ "$VERSION" == "11.3" ] || [ "$VERSION" == "11.4" ] || [ "$VERSION" == "11.5" ] || [ "$VERSION" == "11.6" ]; then +elif [ "$VERSION" == "10.2" ] || [ "$VERSION" == "10.1" ] || [ "$VERSION" == "11.0" ] || [ "$VERSION" == "11.2" ] || [ "$VERSION" == "11.3" ] || [ "$VERSION" == "11.4" ] || [ "$VERSION" == "11.5" ] || [ "$VERSION" == "11.6" ] || [ "$VERSION" == "11.8" ]; then if [ -f "/etc/redhat-release" ];then - rm -f /usr/local/lib/libnccl.so - wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-2.7.8-1+cuda10.2.x86_64.rpm - wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-devel-2.7.8-1+cuda10.2.x86_64.rpm - wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-static-2.7.8-1+cuda10.2.x86_64.rpm - rpm -ivh libnccl-2.7.8-1+cuda10.2.x86_64.rpm - rpm -ivh libnccl-devel-2.7.8-1+cuda10.2.x86_64.rpm - rpm -ivh libnccl-static-2.7.8-1+cuda10.2.x86_64.rpm && rm -f libnccl-* + rm -f /usr/local/lib/libnccl.so + wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-2.10.3-1+cuda11.4.x86_64.rpm + wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-devel-2.10.3-1+cuda11.4.x86_64.rpm + wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-static-2.10.3-1+cuda11.4.x86_64.rpm + rpm -Fivh libnccl-2.10.3-1+cuda11.4.x86_64.rpm + rpm -Fivh libnccl-devel-2.10.3-1+cuda11.4.x86_64.rpm + rpm -Fivh libnccl-static-2.10.3-1+cuda11.4.x86_64.rpm && rm -f libnccl-* exit 0 fi - DEB="nccl-repo-ubuntu1604-2.7.8-ga-cuda10.2_1-1_amd64.deb" + DEB="nccl-repo-ubuntu1804-2.10.3-cuda11.4_1.0-1_amd64.deb" elif [ "$VERSION" == "9.0" ]; then DEB="nccl-repo-ubuntu1604-2.3.7-ga-cuda9.0_1-1_amd64.deb" else From 73f3e67673ecf63e8899b595b6d28d9b2ba8fe0a Mon Sep 17 00:00:00 2001 From: wangxiaoning <71813629+wangxn12138@users.noreply.github.com> Date: Wed, 1 Feb 2023 10:56:22 +0800 Subject: [PATCH 58/89] clean ps_trainer_pass (#50117) --- python/paddle/distributed/passes/ps_trainer_pass.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/paddle/distributed/passes/ps_trainer_pass.py b/python/paddle/distributed/passes/ps_trainer_pass.py index 2a60b0df5f5eb..f25ede7f05ee5 100755 --- a/python/paddle/distributed/passes/ps_trainer_pass.py +++ b/python/paddle/distributed/passes/ps_trainer_pass.py @@ -464,7 +464,7 @@ def dag_check_up_and_reorder(program, inputs, outputs): "is_sparse": True, }, ) - PSGPU = paddle.fluid.core.PSGPU() + PSGPU = core.PSGPU() try: gpu_slot = [int(var.name) for var in gpups_inputs] except (ValueError): @@ -1052,7 +1052,7 @@ def _apply_single_impl(self, main_program, startup_program, pass_ctx): block_vars_detail = find_block_joints( program, program_block_ops, heter_ops ) - heter_program = framework.Program() + heter_program = paddle.framework.Program() self._create_heter_program( program, attrs, @@ -1628,13 +1628,13 @@ def _apply_single_impl(self, main_program, startup_program, pass_ctx): debug_program(_main_file, prog_b) if not self.is_part_b: - self.partA_program = framework.Program() + self.partA_program = paddle.framework.Program() self._get_partA_program(prog_a.global_block()) pass_ctx._attrs['part_a_main_program'] = self.partA_program self._clear_op_device_flag(self.partA_program) check_program(self.partA_program) else: - self.partB_program = framework.Program() + self.partB_program = paddle.framework.Program() self._get_partB_program(prog_b.global_block()) pass_ctx._attrs['part_b_main_program'] = self.partB_program self._clear_op_device_flag(self.partB_program) From af6730909071ff7b4de73ba6744ad4641a016d04 Mon Sep 17 00:00:00 2001 From: zhoutianzi666 <39978853+zhoutianzi666@users.noreply.github.com> Date: Wed, 1 Feb 2023 14:03:24 +0800 Subject: [PATCH 59/89] add dynamic shape support for running paddle-trt in calib_mode (#50033) --- .../operators/tensorrt/tensorrt_engine_op.h | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h index 579549a4c3ec4..2f5da3c44b97f 100644 --- a/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h +++ b/paddle/fluid/operators/tensorrt/tensorrt_engine_op.h @@ -438,11 +438,32 @@ class TensorRTEngineOp : public framework::OperatorBase { calib_res->calib_.reset(new TRTInt8Calibrator( calib_buffers, runtime_batch, calibration_engine_key_, dev_place)); calib_res->thr_.reset(new std::thread([&]() { + std::map> min_input_shape; + std::map> max_input_shape; + std::map> opt_input_shape; + std::map> min_shape_tensor; + std::map> max_shape_tensor; + std::map> opt_shape_tensor; + if (shape_range_info_path_.size()) + inference::DeserializeShapeRangeInfo(shape_range_info_path_, + &min_input_shape, + &max_input_shape, + &opt_input_shape, + &min_shape_tensor, + &max_shape_tensor, + &opt_shape_tensor); + calib_res->engine_.reset(new TensorRTEngine(max_batch_size_, workspace_size_, precision_mode_, calib_res->calib_.get(), - dev_place.device)); + dev_place.device, + min_input_shape, + max_input_shape, + opt_input_shape, + min_shape_tensor, + max_shape_tensor, + opt_shape_tensor)); VLOG(3) << "start the calib trt engine thread"; PrepareTRTEngine(scope, calib_res->engine_.get()); })); From bdae548148b32b92a5ca922ea28abd7ed0c28517 Mon Sep 17 00:00:00 2001 From: limingshu <61349199+JamesLim-sy@users.noreply.github.com> Date: Wed, 1 Feb 2023 14:09:23 +0800 Subject: [PATCH 60/89] Combination of multiple paddle::memory::allocate operation into one for ops (#49126) * A leap of try for cudaLaunchCooperativeKernel * fix bugs * Totally replace the lar cuda kernel * Fix bugs * fix code according to comments * fix codes according to review comments * adding some function overload * relocate the power operation. * add bf16 support for index select relevant ops * revert bf16 type change. * add changes for more op * fix code writting bugs --- .../phi/kernels/funcs/elementwise_grad_base.h | 63 ++++++++----------- paddle/phi/kernels/funcs/matrix_inverse.cu.cc | 26 +++----- .../kernels/funcs/values_vectors_functor.h | 10 +-- 3 files changed, 39 insertions(+), 60 deletions(-) diff --git a/paddle/phi/kernels/funcs/elementwise_grad_base.h b/paddle/phi/kernels/funcs/elementwise_grad_base.h index b9ffb4e3f1237..f577f1781ff09 100644 --- a/paddle/phi/kernels/funcs/elementwise_grad_base.h +++ b/paddle/phi/kernels/funcs/elementwise_grad_base.h @@ -1530,37 +1530,31 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, ComputeBroadcastKernelSize( y_dims_array, out_dims_array, &y_blocks, &y_threads, max_dim); - auto x_strides_array_tmp = paddle::memory::Alloc( + // One part buffer for x_strides_array, rest for y_strides_array and + // out_dims_array. + size_t tmp_total_bytes = bytes * 3; + auto tmp_buffer = paddle::memory::Alloc( ctx.GetPlace(), - bytes, + tmp_total_bytes, phi::Stream(reinterpret_cast(ctx.stream()))); - int *x_strides_array_gpu = - reinterpret_cast(x_strides_array_tmp->ptr()); + int *x_strides_array_gpu = reinterpret_cast(tmp_buffer->ptr()); + int *y_strides_array_gpu = + reinterpret_cast(x_strides_array_gpu + max_dim); + int *out_dims_array_gpu = + reinterpret_cast(y_strides_array_gpu + max_dim); + paddle::memory::Copy(gplace, x_strides_array_gpu, cplace, x_strides_array.data(), bytes, ctx.stream()); - - auto y_strides_array_tmp = paddle::memory::Alloc( - ctx.GetPlace(), - bytes, - phi::Stream(reinterpret_cast(ctx.stream()))); - int *y_strides_array_gpu = - reinterpret_cast(y_strides_array_tmp->ptr()); paddle::memory::Copy(gplace, y_strides_array_gpu, cplace, y_strides_array.data(), bytes, ctx.stream()); - - auto out_dims_array_tmp = paddle::memory::Alloc( - ctx.GetPlace(), - bytes, - phi::Stream(reinterpret_cast(ctx.stream()))); - int *out_dims_array_gpu = reinterpret_cast(out_dims_array_tmp->ptr()); paddle::memory::Copy( gplace, out_dims_array_gpu, cplace, out_dims_array, bytes, ctx.stream()); @@ -1569,24 +1563,21 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, int x_block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, x_threads); int y_block_size = std::min(ELEMWISE_MAX_BLOCK_DIM, y_threads); if (dx) { - auto x_strides_order_tmp = paddle::memory::Alloc( + size_t dx_total_bytes = bytes * 2; + auto dx_tmp_buffer = paddle::memory::Alloc( ctx.GetPlace(), - bytes, + dx_total_bytes, phi::Stream(reinterpret_cast(ctx.stream()))); - int *x_strides_order_gpu = - reinterpret_cast(x_strides_order_tmp->ptr()); + int *x_strides_order_gpu = reinterpret_cast(dx_tmp_buffer->ptr()); + int *x_dims_order_gpu = + reinterpret_cast(x_strides_order_gpu + max_dim); + paddle::memory::Copy(gplace, x_strides_order_gpu, cplace, x_strides_order.data(), bytes, ctx.stream()); - - auto x_dims_order_tmp = paddle::memory::Alloc( - ctx.GetPlace(), - bytes, - phi::Stream(reinterpret_cast(ctx.stream()))); - int *x_dims_order_gpu = reinterpret_cast(x_dims_order_tmp->ptr()); paddle::memory::Copy(gplace, x_dims_order_gpu, cplace, @@ -1610,24 +1601,22 @@ void CommonGradBroadcastCUDA(const DenseTensor &x, dx_op); } if (dy) { - auto y_strides_order_tmp = paddle::memory::Alloc( + // One part buffer for y_strides_order_gpu, the other for y_dims_order_gpu + size_t dy_total_bytes = bytes * 2; + auto dy_tmp_buffer = paddle::memory::Alloc( ctx.GetPlace(), - bytes, + dy_total_bytes, phi::Stream(reinterpret_cast(ctx.stream()))); - int *y_strides_order_gpu = - reinterpret_cast(y_strides_order_tmp->ptr()); + int *y_strides_order_gpu = reinterpret_cast(dy_tmp_buffer->ptr()); + int *y_dims_order_gpu = + reinterpret_cast(y_strides_order_gpu + max_dim); + paddle::memory::Copy(gplace, y_strides_order_gpu, cplace, y_strides_order.data(), bytes, ctx.stream()); - - auto y_dims_order_tmp = paddle::memory::Alloc( - ctx.GetPlace(), - bytes, - phi::Stream(reinterpret_cast(ctx.stream()))); - int *y_dims_order_gpu = reinterpret_cast(y_dims_order_tmp->ptr()); paddle::memory::Copy(gplace, y_dims_order_gpu, cplace, diff --git a/paddle/phi/kernels/funcs/matrix_inverse.cu.cc b/paddle/phi/kernels/funcs/matrix_inverse.cu.cc index c43c3c04755f3..3961f82c8fd0f 100644 --- a/paddle/phi/kernels/funcs/matrix_inverse.cu.cc +++ b/paddle/phi/kernels/funcs/matrix_inverse.cu.cc @@ -55,11 +55,14 @@ void MatrixInverseFunctor::operator()(const Context& dev_ctx, cpu_ptrs[i + batch_size] = a_inv->data() + i * n * n; } - // Copy the addresses of A and A_inv from host to device. + // Copy the addresses of A and A_inv from host to device, + // and allocate device memory for info and pivots. + int num_ints = n < 32 ? batch_size : batch_size * (n + 1); + size_t total_bytes = cpu_ptrs.size() * sizeof(T*) + num_ints * sizeof(int); paddle::memory::allocation::AllocationPtr tmp_gpu_ptrs_data = paddle::memory::Alloc( dev_ctx.GetPlace(), - cpu_ptrs.size() * sizeof(T*), + total_bytes, phi::Stream(reinterpret_cast(dev_ctx.stream()))); paddle::memory::Copy(dev_ctx.GetPlace(), tmp_gpu_ptrs_data->ptr(), @@ -67,20 +70,12 @@ void MatrixInverseFunctor::operator()(const Context& dev_ctx, static_cast(cpu_ptrs.data()), cpu_ptrs.size() * sizeof(T*), dev_ctx.stream()); - T** gpu_inv_ptrs = - reinterpret_cast(tmp_gpu_ptrs_data->ptr()) + batch_size; - - // Allocate device memory for info and pivots. - int num_ints = n < 32 ? batch_size : batch_size * (n + 1); - paddle::memory::allocation::AllocationPtr tmp_gpu_info_data = - paddle::memory::Alloc( - dev_ctx.GetPlace(), - num_ints * sizeof(int), - phi::Stream(reinterpret_cast(dev_ctx.stream()))); - int* gpu_info_ptr = reinterpret_cast(tmp_gpu_info_data->ptr()); + T** gpu_inv_pivot_info = reinterpret_cast(tmp_gpu_ptrs_data->ptr()); + T** gpu_inv_ptrs = gpu_inv_pivot_info + batch_size; + int* gpu_info_ptr = + reinterpret_cast(gpu_inv_pivot_info + cpu_ptrs.size()); auto blas = phi::funcs::GetBlas(dev_ctx); - std::vector info; // only for singular checking info.resize(batch_size); // This functions in cuBLAS is intended to be used for matrices of small @@ -100,8 +95,7 @@ void MatrixInverseFunctor::operator()(const Context& dev_ctx, // This function performs the LU factorization of each matrix A by the // equation P * A = L * U. L and U are written back to original matrix A, // and diagonal elements of L are discarded. - int* gpu_pivot_ptr = - reinterpret_cast(tmp_gpu_info_data->ptr()) + batch_size; + int* gpu_pivot_ptr = gpu_info_ptr + batch_size; blas.BatchedGETRF(n, reinterpret_cast(tmp_gpu_ptrs_data->ptr()), gpu_pivot_ptr, diff --git a/paddle/phi/kernels/funcs/values_vectors_functor.h b/paddle/phi/kernels/funcs/values_vectors_functor.h index 63202ca4a484d..d4314307873f4 100644 --- a/paddle/phi/kernels/funcs/values_vectors_functor.h +++ b/paddle/phi/kernels/funcs/values_vectors_functor.h @@ -354,12 +354,6 @@ struct MatrixEighFunctor { has_vectors ? CUSOLVER_EIG_MODE_VECTOR : CUSOLVER_EIG_MODE_NOVECTOR; ValueType *out_value = dev_ctx.template Alloc(eigen_values); - auto info = paddle::memory::Alloc( - dev_ctx.GetPlace(), - sizeof(int) * batch_size, - phi::Stream(reinterpret_cast(dev_ctx.stream()))); - auto *info_ptr = reinterpret_cast(info->ptr()); - DenseTensor input_trans = phi::TransposeLast2Dim(dev_ctx, input); T *input_vector = input_trans.data(); @@ -410,11 +404,13 @@ struct MatrixEighFunctor { out_value, &workspace_size); } + size_t total_bytes = sizeof(T) * workspace_size + sizeof(int) * batch_size; auto work = paddle::memory::Alloc( dev_ctx.GetPlace(), - sizeof(T) * workspace_size, + total_bytes, phi::Stream(reinterpret_cast(dev_ctx.stream()))); auto *work_ptr = reinterpret_cast(work->ptr()); + auto *info_ptr = reinterpret_cast(work_ptr + workspace_size); for (auto i = 0; i < batch_size; ++i) { auto *input_data = input_vector + i * vector_stride; From 5dfddaea8834b610641007139878fc3fbdde869d Mon Sep 17 00:00:00 2001 From: gouzil <66515297+gouzil@users.noreply.github.com> Date: Wed, 1 Feb 2023 14:40:43 +0800 Subject: [PATCH 61/89] [Divide by 0 Error] add norm check (#49966) * [Divide by 0 Error] add norm check * [Divide by 0 Error] fix x AttributeError * [Divide by 0 Error] norm check migrate to c++ --- paddle/phi/kernels/cpu/p_norm_kernel.cc | 7 +++++++ paddle/phi/kernels/gpu/p_norm_kernel.cu | 7 +++++++ paddle/phi/kernels/xpu/p_norm_kernel.cc | 8 ++++++++ python/paddle/fluid/tests/unittests/test_norm_all.py | 9 +++++++++ 4 files changed, 31 insertions(+) diff --git a/paddle/phi/kernels/cpu/p_norm_kernel.cc b/paddle/phi/kernels/cpu/p_norm_kernel.cc index 597939953b277..bb33b8a397e02 100644 --- a/paddle/phi/kernels/cpu/p_norm_kernel.cc +++ b/paddle/phi/kernels/cpu/p_norm_kernel.cc @@ -61,6 +61,13 @@ void PNormKernel(const Context& dev_ctx, int pre, n, post; GetDims(xdim, axis, &pre, &n, &post, asvector); + for (int i = 0; i < xdim.size(); i++) { + PADDLE_ENFORCE_LT(0, + xdim[i], + errors::InvalidArgument( + "The dims of Input(X) should be greater than 0.")); + } + auto* place = dev_ctx.eigen_device(); Eigen::DSizes shape(pre, n, post); diff --git a/paddle/phi/kernels/gpu/p_norm_kernel.cu b/paddle/phi/kernels/gpu/p_norm_kernel.cu index c7a6261ce381e..fb869a00d9c50 100644 --- a/paddle/phi/kernels/gpu/p_norm_kernel.cu +++ b/paddle/phi/kernels/gpu/p_norm_kernel.cu @@ -105,6 +105,13 @@ void PNormKernel(const Context& dev_ctx, std::vector reduce_axis = funcs::details::GetReduceDim(axis_dims, xdim.size(), asvector); + for (int i = 0; i < xdim.size(); i++) { + PADDLE_ENFORCE_LT(0, + xdim[i], + errors::InvalidArgument( + "The dims of Input(X) should be greater than 0.")); + } + using MT = typename dtype::MPTypeTrait::Type; if (porder == 0) { phi::funcs::ReduceKernel>( diff --git a/paddle/phi/kernels/xpu/p_norm_kernel.cc b/paddle/phi/kernels/xpu/p_norm_kernel.cc index 7ef72c61ad3aa..60abc59517b78 100644 --- a/paddle/phi/kernels/xpu/p_norm_kernel.cc +++ b/paddle/phi/kernels/xpu/p_norm_kernel.cc @@ -55,6 +55,14 @@ void PNormKernel(const Context& dev_ctx, int n = 1; int t = 1; GetDims(xdim, axis, &m, &t, &n, asvector); + + for (int i = 0; i < xdim.size(); i++) { + PADDLE_ENFORCE_LT(0, + xdim[i], + errors::InvalidArgument( + "The dims of Input(X) should be greater than 0.")); + } + x_dim.push_back(m); x_dim.push_back(t); x_dim.push_back(n); diff --git a/python/paddle/fluid/tests/unittests/test_norm_all.py b/python/paddle/fluid/tests/unittests/test_norm_all.py index d70d0dd9f065d..beff458bd1b70 100644 --- a/python/paddle/fluid/tests/unittests/test_norm_all.py +++ b/python/paddle/fluid/tests/unittests/test_norm_all.py @@ -655,6 +655,15 @@ def err_dtype(p, shape_x, xdtype, out=None): ValueError, paddle.norm, data, p='unspport', axis=[-3, -2, -1] ) + with fluid.dygraph.guard(): + # The size of input in Norm should not be 0. + def test_0_size(): + array = np.array([], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [0, 0]), dtype='float32') + paddle.linalg.norm(x, axis=0) + + self.assertRaises(ValueError, test_0_size) + if __name__ == '__main__': paddle.enable_static() From 226a6567f5f66ec6946b1cd4e62af3664e4b0caf Mon Sep 17 00:00:00 2001 From: gouzil <66515297+gouzil@users.noreply.github.com> Date: Wed, 1 Feb 2023 14:46:34 +0800 Subject: [PATCH 62/89] [Divide by 0 Error] add eig check (#49971) * [Divide by 0 Error] add eig check * [Divide by 0 Error] eig check migrate to c++ * [Divide by 0 Error] Fix class name error --- paddle/phi/kernels/cpu/eig_kernel.cc | 5 +++ .../tests/unittests/test_linalg_eig_op.py | 34 +++++++++++++++++++ python/paddle/tensor/linalg.py | 1 + 3 files changed, 40 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_linalg_eig_op.py diff --git a/paddle/phi/kernels/cpu/eig_kernel.cc b/paddle/phi/kernels/cpu/eig_kernel.cc index 42a843391872f..c9bdf8af11682 100644 --- a/paddle/phi/kernels/cpu/eig_kernel.cc +++ b/paddle/phi/kernels/cpu/eig_kernel.cc @@ -31,6 +31,11 @@ void EigKernel(const Context& dev_ctx, int batch_count = BatchCount(x); int order = x.dims()[x.dims().size() - 1]; + PADDLE_ENFORCE_LT(0, + order, + errors::InvalidArgument( + "The order of Input(X) should be greater than 0.")); + DenseTensor real_w; DenseTensor real_v; diff --git a/python/paddle/fluid/tests/unittests/test_linalg_eig_op.py b/python/paddle/fluid/tests/unittests/test_linalg_eig_op.py new file mode 100644 index 0000000000000..18d95a4f383d9 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_linalg_eig_op.py @@ -0,0 +1,34 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle + + +class TestEigAPIError(unittest.TestCase): + def test_errors(self): + # The size of input in Eig should not be 0. + def test_0_size(): + array = np.array([], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [0, 0]), dtype='float32') + paddle.linalg.eig(x) + + self.assertRaises(ValueError, test_0_size) + + +if __name__ == '__main__': + unittest.main() diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 46f11130c0354..10c8c24a78724 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -2323,6 +2323,7 @@ def eig(x, name=None): # [ (16.50471283351188+0j) , (-5.5034820550763515+0j) , # (-0.21026087843552282+0j)]) """ + if in_dygraph_mode(): return _C_ops.eig(x) else: From f0811bb7dbcbda67793b5a6c1b520ca0adb8c2ac Mon Sep 17 00:00:00 2001 From: Ruibiao Chen Date: Wed, 1 Feb 2023 14:48:02 +0800 Subject: [PATCH 63/89] Fix errors for test_standalone_custom_stream (#50103) --- .../standalone_executor/test_standalone_custom_stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_custom_stream.py b/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_custom_stream.py index 116aa60d05202..4126f84ed1e8c 100644 --- a/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_custom_stream.py +++ b/python/paddle/fluid/tests/unittests/standalone_executor/test_standalone_custom_stream.py @@ -50,10 +50,10 @@ def set_custom_stream(self, prog): ops = prog.global_block().ops for op_index in op_index_for_stream1: ops[op_index].dist_attr.execution_stream = "s1" - ops[op_index].dist_attr.stream_priority = -1 + ops[op_index].dist_attr.stream_priority = 0 for op_index in op_index_for_stream2: ops[op_index].dist_attr.execution_stream = "s2" - ops[op_index].dist_attr.stream_priority = -2 + ops[op_index].dist_attr.stream_priority = -1 def run_program(self, apply_custom_stream=False): paddle.seed(2022) From f71796b6a58f5c71505821cae5b83fdd7851ca1a Mon Sep 17 00:00:00 2001 From: gouzil <66515297+gouzil@users.noreply.github.com> Date: Wed, 1 Feb 2023 14:48:24 +0800 Subject: [PATCH 64/89] [Divide by 0 Error] add lu check (#49974) * [Divide by 0 Error] add lu check * [Divide by 0 Error] lu check migrate to c++ --- paddle/phi/kernels/impl/lu_kernel_impl.h | 8 ++++++++ python/paddle/fluid/tests/unittests/test_lu_op.py | 14 ++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/paddle/phi/kernels/impl/lu_kernel_impl.h b/paddle/phi/kernels/impl/lu_kernel_impl.h index 31a83ea540176..5315e36b47172 100644 --- a/paddle/phi/kernels/impl/lu_kernel_impl.h +++ b/paddle/phi/kernels/impl/lu_kernel_impl.h @@ -520,6 +520,14 @@ DenseTensor Transpose2DTo6D(const Context& dev_ctx, const DenseTensor& x) { auto x_dim = x.dims(); auto x_vec = phi::vectorize(x_dim); int rank = x_vec.size(); + + for (int i = 0; i < x_dim.size(); i++) { + PADDLE_ENFORCE_LT(0, + x_dim[i], + errors::InvalidArgument( + "The dims of Input(X) should be greater than 0.")); + } + std::swap(x_vec[rank - 1], x_vec[rank - 2]); std::vector out_shape = x_vec; std::vector axis(rank); diff --git a/python/paddle/fluid/tests/unittests/test_lu_op.py b/python/paddle/fluid/tests/unittests/test_lu_op.py index 790ebb36f6d7c..3e083c76b71df 100644 --- a/python/paddle/fluid/tests/unittests/test_lu_op.py +++ b/python/paddle/fluid/tests/unittests/test_lu_op.py @@ -303,6 +303,20 @@ def run_lu_static(shape, dtype): run_lu_static(tensor_shape, dtype) +class TestLUAPIError(unittest.TestCase): + def test_errors(self): + with paddle.fluid.dygraph.guard(): + # The size of input in lu should not be 0. + def test_0_size(): + array = np.array([], dtype=np.float32) + x = paddle.to_tensor( + np.reshape(array, [0, 0, 0]), dtype='float32' + ) + paddle.linalg.lu(x, get_infos=True) + + self.assertRaises(ValueError, test_0_size) + + if __name__ == "__main__": paddle.enable_static() unittest.main() From 520f48d6367b519aa5c3ec0d4073fdc380c458b1 Mon Sep 17 00:00:00 2001 From: zhangyikun02 <48021248+zhangyk0314@users.noreply.github.com> Date: Wed, 1 Feb 2023 14:49:29 +0800 Subject: [PATCH 65/89] support grid_sampler_grad op for XPU (#49857) --- paddle/phi/backends/xpu/xpu2_op_list.cc | 1 + .../kernels/xpu/grid_sample_grad_kernel.cc | 87 +++++++++++++++++++ .../fluid/tests/unittests/op_test_xpu.py | 7 +- .../unittests/xpu/test_grid_sampler_op_xpu.py | 1 + 4 files changed, 95 insertions(+), 1 deletion(-) create mode 100644 paddle/phi/kernels/xpu/grid_sample_grad_kernel.cc diff --git a/paddle/phi/backends/xpu/xpu2_op_list.cc b/paddle/phi/backends/xpu/xpu2_op_list.cc index 67ac2b17a7094..b3635652ffd10 100644 --- a/paddle/phi/backends/xpu/xpu2_op_list.cc +++ b/paddle/phi/backends/xpu/xpu2_op_list.cc @@ -313,6 +313,7 @@ XPUOpMap& get_kl2_ops() { XPUKernelSet({phi::DataType::INT64, phi::DataType::INT32, phi::DataType::FLOAT32})}, + {"grid_sampler_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"grid_sampler", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_sigmoid_grad", XPUKernelSet({phi::DataType::FLOAT32})}, {"hard_sigmoid", XPUKernelSet({phi::DataType::FLOAT32})}, diff --git a/paddle/phi/kernels/xpu/grid_sample_grad_kernel.cc b/paddle/phi/kernels/xpu/grid_sample_grad_kernel.cc new file mode 100644 index 0000000000000..86e78b4b15cf9 --- /dev/null +++ b/paddle/phi/kernels/xpu/grid_sample_grad_kernel.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/grid_sample_grad_kernel.h" + +#include "paddle/phi/backends/xpu/enforce_xpu.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +void GridSampleGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& grid, + const DenseTensor& out_grid, + const std::string& mode, + const std::string& padding_mode, + bool align_corners, + DenseTensor* x_grad, + DenseTensor* grid_grad) { + PADDLE_ENFORCE_EQ( + x.dims().size(), + 4, + phi::errors::InvalidArgument( + ("XPU is only support input_dims == 4 in grid_sample_grad op."))); + + const int64_t n = grid.dims()[0]; + const int64_t out_h = grid.dims()[1]; + const int64_t out_w = grid.dims()[2]; + const int64_t c = x.dims()[1]; + const int64_t in_h = x.dims()[2]; + const int64_t in_w = x.dims()[3]; + + x_grad->Resize({n, c, in_h, in_w}); + T* x_grad_ptr = dev_ctx.template Alloc(x_grad); + + T* grid_grad_ptr = nullptr; + if (grid_grad != nullptr) { + grid_grad->Resize({n, out_h, out_w, 2}); + grid_grad_ptr = dev_ctx.template Alloc(grid_grad); + } + + bool is_nearest = false; + if (mode == "nearest") { + is_nearest = true; + } + int64_t padding_mode_type = 0; + if (padding_mode == "border") { + padding_mode_type = 1; + } else if (padding_mode == "reflection") { + padding_mode_type = 2; + } + + int r = xpu::grid_sample_grad(dev_ctx.x_context(), + x.data(), + grid.data(), + out_grid.data(), + x_grad_ptr, + grid_grad_ptr, + n, + c, + in_h, + in_w, + out_h, + out_w, + is_nearest, + align_corners, + padding_mode_type, + true); + PADDLE_ENFORCE_XDNN_SUCCESS(r, "grid_sample_grad"); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + grid_sample_grad, XPU, ALL_LAYOUT, phi::GridSampleGradKernel, float) {} diff --git a/python/paddle/fluid/tests/unittests/op_test_xpu.py b/python/paddle/fluid/tests/unittests/op_test_xpu.py index a7cb066db7a1d..c6a76c55635dd 100644 --- a/python/paddle/fluid/tests/unittests/op_test_xpu.py +++ b/python/paddle/fluid/tests/unittests/op_test_xpu.py @@ -35,6 +35,7 @@ def setUpClass(cls): '''Fix random seeds to remove randomness from tests''' cls.use_xpu = True cls.use_mkldnn = False + cls.epsilon_xpu2xpu = 0.00000001 super().setUpClass() @classmethod @@ -212,7 +213,11 @@ def check_grad_with_place( user_defined_grad_outputs=user_defined_grad_outputs, ) self._assert_is_close( - a1, a2, inputs_to_check, 0.00000001, "Gradient Check On two xpu" + a1, + a2, + inputs_to_check, + self.epsilon_xpu2xpu, + "Gradient Check On two xpu", ) self._assert_is_close( a1, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py index 3b2deaf4396bb..c92ddc9531b21 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_grid_sampler_op_xpu.py @@ -170,6 +170,7 @@ def setUp(self): self.place = paddle.XPUPlace(0) self.init_dtype() self.op_type = 'grid_sampler' + self.epsilon_xpu2xpu = 0.000001 self.use_cudnn = False self.align_corners = True From 5349b9b9b3fce86204686dcf9c6bb327d56ba4a5 Mon Sep 17 00:00:00 2001 From: ZZK <359521840@qq.com> Date: Wed, 1 Feb 2023 15:11:31 +0800 Subject: [PATCH 66/89] bump isort version to 5.11.5 (#50126) --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bd09715e0a707..8168824189643 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -61,7 +61,7 @@ repos: - id: black files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$ - repo: https://github.com/pycqa/isort - rev: 5.10.1 + rev: 5.11.5 hooks: - id: isort - repo: https://github.com/PyCQA/flake8 From c62657b3eccf2fa41e12c94b5a34d82a6f54890f Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 1 Feb 2023 15:15:12 +0800 Subject: [PATCH 67/89] Fix Python IndexError of case9: paddle.static.nn.deform_conv2d (#49990) * add dimension check for deformable_conv * add unittest --- .../tests/unittests/test_deform_conv2d.py | 18 ++++++++++++++++++ python/paddle/static/nn/common.py | 5 +++++ 2 files changed, 23 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_deform_conv2d.py b/python/paddle/fluid/tests/unittests/test_deform_conv2d.py index 7a5093f872ced..d484e140b6e1d 100644 --- a/python/paddle/fluid/tests/unittests/test_deform_conv2d.py +++ b/python/paddle/fluid/tests/unittests/test_deform_conv2d.py @@ -726,5 +726,23 @@ def setUp(self): self.no_bias = False +class TestDeformConv2DError(unittest.TestCase): + def test_input_error(self): + def test_input_rank_error(): + paddle.enable_static() + x = paddle.static.data(name='error_x_1', shape=[0], dtype='float32') + offset = paddle.static.data( + name='error_offset_1', shape=[0], dtype='float32' + ) + mask = paddle.static.data( + name='error_mask_1', shape=[0, 0, 0], dtype='float32' + ) + out = paddle.static.nn.deform_conv2d( + x, offset, mask, 0, 0, deformable_groups=0 + ) + + self.assertRaises(ValueError, test_input_rank_error) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index c43385a8e9140..1581f299214df 100644 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -2244,6 +2244,11 @@ def deformable_conv( mask, 'mask', (paddle.static.Variable, type(None)), 'deformable_conv' ) + if input.ndim != 4: + raise ValueError( + f'The input should be of [N, C, H, W] format, but received {input.shape}' + ) + num_channels = input.shape[1] assert param_attr is not False, "param_attr should not be False here." From 9fa2eb387429c25f2ccbdf6969ab8886ed5ef6dc Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Wed, 1 Feb 2023 15:20:42 +0800 Subject: [PATCH 68/89] jit layer support multi thread and fix predictor clone (#50095) * jit layer support multi thread * fix bug * clone prediector not do graph optimizer * format * fix comment and format * fix override and fromat * fix * fix --- .../fluid/inference/api/analysis_predictor.cc | 3 + paddle/fluid/jit/compilation_unit.cc | 8 +++ paddle/fluid/jit/compilation_unit.h | 2 + paddle/fluid/jit/engine/base_engine.h | 2 + paddle/fluid/jit/engine/interpreter_engine.cc | 9 ++- paddle/fluid/jit/engine/interpreter_engine.h | 8 ++- paddle/fluid/jit/engine/predictor_engine.cc | 17 ++++++ paddle/fluid/jit/engine/predictor_engine.h | 13 ++++- paddle/fluid/jit/layer.cc | 12 +++- paddle/fluid/jit/layer.h | 3 + paddle/fluid/jit/layer_test.cc | 56 +++++++++++++++++++ paddle/fluid/jit/serializer.cc | 2 + 12 files changed, 128 insertions(+), 7 deletions(-) diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index e89bcfa2c6a99..49e18f95000ff 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -1086,6 +1086,7 @@ bool AnalysisPredictor::GetFetch(std::vector *outputs, } void AnalysisPredictor::PrepareArgument() { + VLOG(3) << "AnalysisPredictor::PrepareArgument"; // Init std::unique_ptr argument_. argument_.reset(new Argument); argument_->SetUseGPU(config_.use_gpu()); @@ -2246,10 +2247,12 @@ AnalysisPredictor::~AnalysisPredictor() { } std::unique_ptr AnalysisPredictor::Clone(void *stream) { + VLOG(3) << "AnalysisPredictor::Clone"; std::lock_guard lk(clone_mutex_); auto *x = new AnalysisPredictor(config_); x->status_is_cloned_ = true; x->root_predictor_id_ = this->root_predictor_id_; + x->config_.apply_optim_ = false; if (config_.use_external_stream_ && stream == nullptr) { PADDLE_THROW(platform::errors::InvalidArgument( "config has been configured to use external stream, but the Clone " diff --git a/paddle/fluid/jit/compilation_unit.cc b/paddle/fluid/jit/compilation_unit.cc index 0f241d864fe07..1a2351048f90a 100644 --- a/paddle/fluid/jit/compilation_unit.cc +++ b/paddle/fluid/jit/compilation_unit.cc @@ -38,5 +38,13 @@ void CompilationUnit::SetEngine(const std::string &name, const jit::EngineMap &CompilationUnit::EngineMap() const { return engine_map_; } +std::shared_ptr CompilationUnit::Clone(void *stream) { + auto x = std::make_shared(); + for (auto &it : engine_map_) { + x->SetEngine(it.first, std::move(it.second->Clone(stream))); + } + return x; +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/compilation_unit.h b/paddle/fluid/jit/compilation_unit.h index b862faa23f978..25e725fe57b9e 100644 --- a/paddle/fluid/jit/compilation_unit.h +++ b/paddle/fluid/jit/compilation_unit.h @@ -36,6 +36,8 @@ class CompilationUnit { const jit::EngineMap &EngineMap() const; + std::shared_ptr Clone(void *stream = nullptr); + private: jit::EngineMap engine_map_; }; diff --git a/paddle/fluid/jit/engine/base_engine.h b/paddle/fluid/jit/engine/base_engine.h index eaf3c1221c8a2..b6571d7ebdd41 100644 --- a/paddle/fluid/jit/engine/base_engine.h +++ b/paddle/fluid/jit/engine/base_engine.h @@ -29,6 +29,8 @@ class BaseEngine { virtual std::vector operator()(const std::vector &inputs) = 0; + virtual std::unique_ptr Clone(void *stream = nullptr) = 0; + virtual ~BaseEngine() {} }; diff --git a/paddle/fluid/jit/engine/interpreter_engine.cc b/paddle/fluid/jit/engine/interpreter_engine.cc index 410fd4dc01bed..36f8a2271d1ef 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.cc +++ b/paddle/fluid/jit/engine/interpreter_engine.cc @@ -28,14 +28,14 @@ namespace jit { InterpreterEngine::InterpreterEngine(const std::shared_ptr &info, const VariableMap ¶ms_dict, const phi::Place &place) - : info_(info), place_(place) { + : info_(info), params_dict_(params_dict), place_(place) { info_->RemoveDescFeedFetch(); PADDLE_ENFORCE_GT( static_cast(info_->ProgramDesc().Block(0).OpSize()), 0, platform::errors::PreconditionNotMet( "There is no operator in ProgramDesc.")); - utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, &scope_); + utils::ShareParamsIntoScope(info_->ParamNames(), params_dict_, &scope_); VLOG(6) << framework::GenScopeTreeDebugInfo(&scope_); CreateInterpreterCore(); } @@ -98,5 +98,10 @@ const std::shared_ptr &InterpreterEngine::Info() const { return info_; } +std::unique_ptr InterpreterEngine::Clone(void *stream) { + auto *x = new InterpreterEngine(info_, params_dict_, place_); + return std::unique_ptr(x); +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/engine/interpreter_engine.h b/paddle/fluid/jit/engine/interpreter_engine.h index 8c7f43f297d22..d7aa5d610a50e 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.h +++ b/paddle/fluid/jit/engine/interpreter_engine.h @@ -43,14 +43,18 @@ class InterpreterEngine : public BaseEngine { void CreateInterpreterCore(); - std::vector operator()(const std::vector &inputs); + std::vector operator()(const std::vector &inputs) override; - std::vector operator()(const std::vector &inputs); + std::vector operator()( + const std::vector &inputs) override; const std::shared_ptr &Info() const; + std::unique_ptr Clone(void *stream = nullptr) override; + private: std::shared_ptr info_; + VariableMap params_dict_; framework::Scope scope_; phi::Place place_; std::shared_ptr inner_interpreter_; diff --git a/paddle/fluid/jit/engine/predictor_engine.cc b/paddle/fluid/jit/engine/predictor_engine.cc index 6a44c192c16f7..bac6f993b04f6 100644 --- a/paddle/fluid/jit/engine/predictor_engine.cc +++ b/paddle/fluid/jit/engine/predictor_engine.cc @@ -55,6 +55,17 @@ PredictorEngine::PredictorEngine(const std::shared_ptr &info, scope_, std::make_shared(info_->ProgramDesc())); } +PredictorEngine::PredictorEngine( + const std::shared_ptr &info, + const std::shared_ptr &scope, + const phi::Place &place, + const std::shared_ptr &predictor) + : info_(info), + scope_(scope), + place_(place), + predictor_(std::dynamic_pointer_cast( + predictor)) {} + std::vector PredictorEngine::operator()( const std::vector &inputs) { auto dense_tensors = utils::ToDenseTensors(inputs); @@ -188,5 +199,11 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, return true; } +std::unique_ptr PredictorEngine::Clone(void *stream) { + auto *x = new PredictorEngine( + info_, scope_, place_, std::move(predictor_->Clone(stream))); + return std::unique_ptr(x); +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/engine/predictor_engine.h b/paddle/fluid/jit/engine/predictor_engine.h index 026b012cbfb02..ad07a7a7ffbf5 100644 --- a/paddle/fluid/jit/engine/predictor_engine.h +++ b/paddle/fluid/jit/engine/predictor_engine.h @@ -20,6 +20,7 @@ namespace paddle { class AnalysisPredictor; +class PaddlePredictor; namespace framework { class Scope; @@ -33,11 +34,19 @@ class PredictorEngine : public BaseEngine { const VariableMap ¶ms_dict, const phi::Place &place); + PredictorEngine(const std::shared_ptr &info, + const std::shared_ptr &scope, + const phi::Place &place, + const std::shared_ptr &predictor); + ~PredictorEngine() noexcept {} - std::vector operator()(const std::vector &inputs); + std::vector operator()(const std::vector &inputs) override; + + std::vector operator()( + const std::vector &inputs) override; - std::vector operator()(const std::vector &inputs); + std::unique_ptr Clone(void *stream = nullptr) override; private: std::shared_ptr info_; diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index 75a7e282e6be8..332c53a8e3649 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -30,7 +30,10 @@ Layer::Layer(const VariableMap& params_map, const VariableMap& attrs_map, const FunctionInfoMap& info_map, const phi::Place& place) - : params_map_(params_map), attrs_map_(attrs_map), info_map_(info_map) { + : params_map_(params_map), + attrs_map_(attrs_map), + info_map_(info_map), + place_(place) { unit_.reset(new CompilationUnit()); } @@ -94,5 +97,12 @@ PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) PD_SPECIALZE_ATTRIBUTE_TYPE(std::vector) +std::shared_ptr Layer::Clone(void* stream) { + std::shared_ptr x = + std::make_shared(params_map_, attrs_map_, info_map_, place_); + x->unit_ = unit_->Clone(stream); + return x; +} + } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/layer.h b/paddle/fluid/jit/layer.h index dd5ff5d9f91cd..ed8b739a0b72f 100644 --- a/paddle/fluid/jit/layer.h +++ b/paddle/fluid/jit/layer.h @@ -67,10 +67,13 @@ class Layer { std::vector FunctionNames() const; + std::shared_ptr Clone(void* stream = nullptr); + private: VariableMap params_map_; VariableMap attrs_map_; FunctionInfoMap info_map_; + phi::Place place_; std::shared_ptr unit_; }; diff --git a/paddle/fluid/jit/layer_test.cc b/paddle/fluid/jit/layer_test.cc index 4e367d8cc1b51..c163f3c50d9dd 100644 --- a/paddle/fluid/jit/layer_test.cc +++ b/paddle/fluid/jit/layer_test.cc @@ -20,6 +20,7 @@ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/framework/variable.h" +#include "paddle/fluid/platform/timer.h" #include "paddle/phi/api/include/api.h" #include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/kernel_registry.h" @@ -78,7 +79,11 @@ TEST(CpuLayerTest, Function) { TEST(CpuLayerTest, Construct) { auto place = phi::CPUPlace(); std::string path = "./multi_program_load/export"; + paddle::platform::Timer timer; + timer.Start(); auto layer = jit::Load(path, place); + timer.Pause(); + std::cout << "jit::Load coast" << timer.ElapsedMS() << std::endl; float fbias = layer.Attribute("fbias"); EXPECT_FLOAT_EQ(fbias, 1.4); @@ -119,6 +124,41 @@ TEST(CpuLayerTest, Construct) { EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6); } +TEST(CpuLayerTest, Clone) { + auto place = phi::CPUPlace(); + std::string path = "./multi_program_load/export"; + + paddle::platform::Timer timer; + timer.Start(); + auto layer = jit::Load(path, place); + timer.Pause(); + std::cout << "jit::Load cost " << timer.ElapsedMS() << " ms" << std::endl; + + timer.Start(); + auto layer2 = layer.Clone(); + timer.Pause(); + std::cout << "jit::Layer::Clone cost " << timer.ElapsedMS() << " ms" + << std::endl; + + float fbias = layer2->Attribute("fbias"); + EXPECT_FLOAT_EQ(fbias, 1.4); + + auto inputs = PrepareInputs(place); + auto outs = layer2->forward(inputs); + auto out_data = outs[0].data(); + EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); + + auto func = layer2->Function("infer"); + EXPECT_TRUE(func.IsValid()); + outs = func(inputs); + out_data = outs[0].data(); + EXPECT_NEAR(out_data[0], 1.41562390, 1e-6); + auto pow_out = + paddle::experimental::pow(outs[0], paddle::experimental::Scalar(2)); + out_data = pow_out.data(); + EXPECT_NEAR(out_data[0], pow(1.41562390, 2.0), 1e-6); +} + #if defined(PADDLE_WITH_CUDA) TEST(GpuLayerTest, Construct) { auto place = phi::GPUPlace(); @@ -147,6 +187,22 @@ TEST(GpuLayerTest, Construct) { out_data = cpu_tensor.data(); EXPECT_NEAR(out_data[0], sqrt(1.41562390), 1e-6); } + +TEST(GpuLayerTest, Clone) { + auto place = phi::GPUPlace(); + + std::string path = "./multi_program_load/export"; + auto layer = jit::Load(path, place); + auto inputs = PrepareInputs(place); + + auto layer2 = layer.Clone(); + auto outs = layer2->forward(inputs); + auto gpu_tensor = outs[0]; + auto cpu_tensor = + paddle::experimental::copy_to(gpu_tensor, phi::CPUPlace(), true); + auto out_data = cpu_tensor.data(); + EXPECT_NEAR(out_data[0], 0.02194316, 1e-6); +} #endif } // namespace jit diff --git a/paddle/fluid/jit/serializer.cc b/paddle/fluid/jit/serializer.cc index 0a7fdc0e3525a..436717a8dc389 100644 --- a/paddle/fluid/jit/serializer.cc +++ b/paddle/fluid/jit/serializer.cc @@ -30,8 +30,10 @@ DECLARE_string(jit_engine_type); namespace paddle { namespace jit { + using FunctionInfoMap = std::unordered_map>; + Layer Deserializer::operator()(const std::string& path, const phi::Place& place) { const auto& pdmodel_paths = utils::PdmodelFilePaths(path); From e03718f5b0049c7cddc4729d3aa786398c6873ce Mon Sep 17 00:00:00 2001 From: Wang Bojun <105858416+wwbitejotunn@users.noreply.github.com> Date: Wed, 1 Feb 2023 15:22:31 +0800 Subject: [PATCH 69/89] Preln fix (#49802) * preln_residual 2 fused_bias_residual * skip layernorm fix and ut * code refine * code style refine * fix ut * fix output * add trt layer fall back info * refine op teller and ut * DropoutMaskOut output fix --- .../ir/preln_residual_bias_fuse_pass.cc | 51 +++++++++++++++---- .../ir/trt_skip_layernorm_fuse_pass.cc | 2 +- .../fluid/inference/api/analysis_predictor.cc | 2 +- .../tensorrt/convert/preln_residual_bias.cc | 27 +++++----- paddle/fluid/inference/tensorrt/op_teller.cc | 20 ++++++-- ...sed_bias_dropout_residual_layer_norm_op.cc | 9 ++-- ...sed_bias_dropout_residual_layer_norm_op.cu | 8 ++- .../fused_layernorm_residual_dropout_bias.h | 7 +-- .../unittests/ir/inference/CMakeLists.txt | 9 ---- .../test_trt_convert_preln_residual_bias.py | 15 +++++- ...test_trt_convert_preln_residual_no_bias.py | 16 +++++- .../test_ir_preln_residual_bias_fuse_pass.py | 4 +- 12 files changed, 117 insertions(+), 53 deletions(-) diff --git a/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc b/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc index 13b7b4ac72f96..48baf1f4b102f 100644 --- a/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc +++ b/paddle/fluid/framework/ir/preln_residual_bias_fuse_pass.cc @@ -129,6 +129,24 @@ void PrelnResidualBias::operator()(PDNode *x, PDNode *y) { } // namespace patterns +void setIntermediateOut(OpDesc *desc, + const std::string &out_name, + const std::string &scope_name) { + std::string new_name = scope_name + "/at." + out_name + ".new"; + desc->SetOutput(out_name, {new_name}); +} + +void addIntermediateOut(Node *op_node, + const std::string &out_name, + const std::string &scope_name, + Graph *graph) { + std::string new_name = scope_name + "/at." + out_name + ".new"; + VarDesc out_var(new_name); + out_var.SetPersistable(false); + auto *node_var = graph->CreateVarNode(&out_var); + IR_NODE_LINK_TO(op_node, node_var); +} + int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, bool with_bias) const { PADDLE_ENFORCE_NOT_NULL( @@ -207,7 +225,7 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, // on each other, so we make below check to ensure only one // PrelnResidualBias pattern is delalted with. for (auto op : elementwise1_out->inputs) { - if (op->Name() == "preln_residual_bias") return; + if (op->Name() == "fused_bias_dropout_residual_layer_norm") return; } if (!IsCompat(subgraph, graph)) { @@ -218,31 +236,37 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, std::unordered_set del_node_set; // Create an PrelnResidualBias op node OpDesc new_desc; - new_desc.SetType("preln_residual_bias"); + new_desc.SetType("fused_bias_dropout_residual_layer_norm"); // inputs new_desc.SetInput("X", {subgraph.at(x)->Name()}); - new_desc.SetInput("Y", {subgraph.at(y)->Name()}); - new_desc.SetInput("Scale", {layer_norm_scale->Name()}); - new_desc.SetInput("Bias", {layer_norm_bias->Name()}); + new_desc.SetInput("Residual", {subgraph.at(y)->Name()}); + new_desc.SetInput("LnScale", {layer_norm_scale->Name()}); + new_desc.SetInput("LnBias", {layer_norm_bias->Name()}); if (with_bias) { - new_desc.SetInput("EleBias", {elementwise_bias->Name()}); + new_desc.SetInput("Bias", {elementwise_bias->Name()}); } // outputs - new_desc.SetOutput("Out_0", {layer_norm_out->Name()}); - new_desc.SetOutput("Out_1", {elementwise1_out->Name()}); + new_desc.SetOutput("Y", {layer_norm_out->Name()}); + new_desc.SetOutput("BiasDropoutResidualOut", {elementwise1_out->Name()}); + new_desc.SetOutput("LnMean", {layer_norm_mean->Name()}); + new_desc.SetOutput("LnVariance", {layer_norm_variance->Name()}); + setIntermediateOut(&new_desc, "DropoutMaskOut", "preln_residual_bias_fuse"); // attrs - new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon")); + new_desc.SetAttr("ln_epsilon", layer_norm->Op()->GetAttr("epsilon")); + new_desc.SetAttr("dropout_rate", 0.0f); + new_desc.SetAttr("is_test", true); new_desc.SetAttr("begin_norm_axis", layer_norm->Op()->GetAttr("begin_norm_axis")); auto fused_node = graph->CreateOpNode(&new_desc); // OpDesc will be copied. + addIntermediateOut( + fused_node, "DropoutMaskOut", "preln_residual_bias_fuse", graph); + if (with_bias) { del_node_set.insert(elementwise0); del_node_set.insert(elementwise0_out); } del_node_set.insert(elementwise1); del_node_set.insert(layer_norm); - del_node_set.insert(layer_norm_mean); - del_node_set.insert(layer_norm_variance); GraphSafeRemoveNodes(graph, del_node_set); IR_NODE_LINK_TO(subgraph.at(x), fused_node); IR_NODE_LINK_TO(subgraph.at(y), fused_node); @@ -253,6 +277,9 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, IR_NODE_LINK_TO(layer_norm_bias, fused_node); IR_NODE_LINK_TO(fused_node, layer_norm_out); IR_NODE_LINK_TO(fused_node, elementwise1_out); + IR_NODE_LINK_TO(fused_node, layer_norm_mean); + IR_NODE_LINK_TO(fused_node, layer_norm_variance); + found_subgraph_count++; }; @@ -261,6 +288,8 @@ int PrelnResidualBiasFusePass::ApplyPattern(ir::Graph *graph, } void PrelnResidualBiasFusePass::ApplyImpl(ir::Graph *graph) const { + VLOG(1) << "Fuse PrelnResidualBias into " + "fused_bias_dropout_residual_layer_norm op with dropout rate = 0"; PADDLE_ENFORCE_NOT_NULL( graph, platform::errors::PreconditionNotMet("graph should not be null.")); FusePassBase::Init("preln_residual_bias_fuse", graph); diff --git a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc index db023746ac4c7..18ea8850dc5bf 100644 --- a/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc +++ b/paddle/fluid/framework/ir/trt_skip_layernorm_fuse_pass.cc @@ -170,7 +170,7 @@ void TrtSkipLayerNormFusePass::ApplyImpl(ir::Graph *graph) const { // attrs new_desc.SetAttr("epsilon", layer_norm->Op()->GetAttr("epsilon")); - if (new_desc.HasAttr("begin_norm_axis")) { + if (layer_norm->Op()->HasAttr("begin_norm_axis")) { int32_t begin_norm_axis = PADDLE_GET_CONST( int32_t, layer_norm->Op()->GetAttr("begin_norm_axis")); int32_t input_rank = diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 49e18f95000ff..bd49153f6b85e 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -2464,7 +2464,7 @@ USE_TRT_CONVERTER(rsqrt); USE_TRT_CONVERTER(fused_preln_embedding_eltwise_layernorm) USE_TRT_CONVERTER(fused_embedding_eltwise_layernorm); USE_TRT_CONVERTER(preln_skip_layernorm) -USE_TRT_CONVERTER(preln_residual_bias) +USE_TRT_CONVERTER(fused_bias_dropout_residual_layer_norm) USE_TRT_CONVERTER(c_allreduce_sum) USE_TRT_CONVERTER(roll) USE_TRT_CONVERTER(strided_slice) diff --git a/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc b/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc index 28847aa5b7a30..85f9106b01148 100644 --- a/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc +++ b/paddle/fluid/inference/tensorrt/convert/preln_residual_bias.cc @@ -26,16 +26,12 @@ class PrelnResidualBiasOpConverter : public OpConverter { void operator()(const framework::proto::OpDesc& op, const framework::Scope& scope, bool test_mode) override { - VLOG(4) << "convert fused preln_residual_bias op to tensorrt layer"; - if (!engine_->with_dynamic_shape()) { - PADDLE_THROW( - platform::errors::Fatal("Unsupported static graph mode. Please set " - "dynamic shape of inputs.")); - } + VLOG(4) << "convert fused_bias_dropout_residual_layer_norm op with " + "drop_rate = 0 to preln_residual_bias tensorrt layer"; framework::OpDesc op_desc(op, nullptr); // Declare inputs auto* input1 = engine_->GetITensor(op_desc.Input("X")[0]); - auto* input2 = engine_->GetITensor(op_desc.Input("Y")[0]); + auto* input2 = engine_->GetITensor(op_desc.Input("Residual")[0]); std::vector inputs; inputs.push_back(input1); inputs.push_back(input2); @@ -50,18 +46,18 @@ class PrelnResidualBiasOpConverter : public OpConverter { return temp_data; }; framework::DDim bias_dims, scale_dims, ele_bias_dims; - auto* bias = get_persistable_data("Bias", &bias_dims); - auto* scale = get_persistable_data("Scale", &scale_dims); + auto* bias = get_persistable_data("LnBias", &bias_dims); + auto* scale = get_persistable_data("LnScale", &scale_dims); auto const& vars = op_desc.Inputs(false); - bool has_bias = vars.find("EleBias") != vars.end(); + bool has_bias = vars.find("Bias") != vars.end(); float* ele_bias = - has_bias ? get_persistable_data("EleBias", &ele_bias_dims) : nullptr; + has_bias ? get_persistable_data("Bias", &ele_bias_dims) : nullptr; int bias_size = phi::product(bias_dims); int scale_size = phi::product(scale_dims); int ele_bias_size = has_bias ? phi::product(ele_bias_dims) : 0; - float epsilon = PADDLE_GET_CONST(float, op_desc.GetAttr("epsilon")); + float epsilon = PADDLE_GET_CONST(float, op_desc.GetAttr("ln_epsilon")); bool with_fp16 = engine_->WithFp16() && !engine_->disable_trt_plugin_fp16(); if (engine_->precision() == AnalysisConfig::Precision::kInt8) { with_fp16 = true; @@ -102,8 +98,8 @@ class PrelnResidualBiasOpConverter : public OpConverter { plugin_inputs.emplace_back(input2); layer = engine_->AddDynamicPlugin(plugin_inputs.data(), 2, plugin); std::vector output_names; - output_names.push_back(op_desc.Output("Out_0")[0]); - output_names.push_back(op_desc.Output("Out_1")[0]); + output_names.push_back(op_desc.Output("Y")[0]); + output_names.push_back(op_desc.Output("BiasDropoutResidualOut")[0]); RreplenishLayerAndOutput( layer, "preln_residual_bias", output_names, test_mode); } @@ -113,4 +109,5 @@ class PrelnResidualBiasOpConverter : public OpConverter { } // namespace inference } // namespace paddle -REGISTER_TRT_OP_CONVERTER(preln_residual_bias, PrelnResidualBiasOpConverter); +REGISTER_TRT_OP_CONVERTER(fused_bias_dropout_residual_layer_norm, + PrelnResidualBiasOpConverter); diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 0075c64759333..e9c34408bb6bf 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -1495,7 +1495,21 @@ struct SimpleOpTypeSetTeller : public Teller { return false; } } - + if (op_type == "fused_bias_dropout_residual_layer_norm") { + if (!with_dynamic_shape) { + VLOG(3) << "fused_bias_dropout_residual_layer_norm should run on " + "dynamic shape mode."; + return false; + } + float dropout_rate = + PADDLE_GET_CONST(float, desc.GetAttr("dropout_rate")); + if (dropout_rate != 0.0f) { + VLOG(4) << "preln_residual_bias trt layer can not work with " + "fused_bias_dropout_residual_layer_norm op in which the " + "dropout_rate != 0, stop convert"; + return false; + } + } if (op_type == "fused_preln_embedding_eltwise_layernorm") { if (!with_dynamic_shape) { VLOG(3) << "fused_preln_embedding_eltwise_layernorm should run on " @@ -2594,7 +2608,7 @@ struct SimpleOpTypeSetTeller : public Teller { "slice", "strided_slice", "fused_preln_embedding_eltwise_layernorm", - "preln_residual_bias", + "fused_bias_dropout_residual_layer_norm", "c_allreduce_sum", "c_allreduce_min", "c_allreduce_max", @@ -2744,7 +2758,7 @@ struct SimpleOpTypeSetTeller : public Teller { "strided_slice", "fused_preln_embedding_eltwise_layernorm", "preln_skip_layernorm", - "preln_residual_bias", + "fused_bias_dropout_residual_layer_norm", "c_allreduce_sum", "c_allreduce_min", "c_allreduce_max", diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc index a6fa80a493972..7f877867050ed 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cc @@ -35,16 +35,17 @@ class FusedBiasDropoutResidualLnOp : public framework::OperatorWithKernel { "Output", "LnVariance", "FusedBiasDropoutResidualLnOp"); - OP_INOUT_CHECK(ctx->HasOutput("BiasDropoutResidualOut"), - "Output", - "BiasDropoutResidualOut", - "FusedBiasDropoutResidualLnOp"); OP_INOUT_CHECK(ctx->HasOutput("DropoutMaskOut"), "Output", "DropoutMaskOut", "FusedBiasDropoutResidualLnOp"); + OP_INOUT_CHECK(ctx->HasOutput("BiasDropoutResidualOut"), + "Output", + "BiasDropoutResidualOut", + "FusedBiasDropoutResidualLnOp"); OP_INOUT_CHECK( ctx->HasOutput("Y"), "Output", "Y", "FusedBiasDropoutResidualLnOp"); + auto x_dim = ctx->GetInputDim("X"); int left = 1; for (int i = 0; i < x_dim.size() - 1; i++) { diff --git a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu index 2562c2cc22575..01a233950b279 100644 --- a/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu +++ b/paddle/fluid/operators/fused/fused_bias_dropout_residual_layer_norm_op.cu @@ -54,8 +54,12 @@ class FusedBiasDropoutResidualLnOpKernel : public framework::OpKernel { auto *ln_mean_data = dev_ctx.Alloc(ln_mean, ln_mean->numel() * sizeof(U)); auto *ln_var_data = dev_ctx.Alloc(ln_var, ln_var->numel() * sizeof(U)); - auto *dropout_mask_out_data = dev_ctx.Alloc( - dropout_mask_out, dropout_mask_out->numel() * sizeof(uint8_t)); + auto *dropout_mask_out_data = + (dropout_mask_out == nullptr) + ? nullptr + : dev_ctx.Alloc( + dropout_mask_out, + dropout_mask_out->numel() * sizeof(uint8_t)); auto *y_data = dev_ctx.Alloc(y, y->numel() * sizeof(T)); const auto input_x_dims = input_x->dims(); diff --git a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h index c65364d2818d1..0c4e10fa156f9 100644 --- a/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h +++ b/paddle/fluid/operators/fused/fused_layernorm_residual_dropout_bias.h @@ -854,9 +854,10 @@ void LaunchLayernormResidualDropoutBias( residual, rows * cols * sizeof(T), ctx.stream()); - PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync( - mask_data, 0, rows * cols * sizeof(MaskType), ctx.stream())); - + if (mask_data != nullptr) { + PADDLE_ENFORCE_GPU_SUCCESS(cudaMemsetAsync( + mask_data, 0, rows * cols * sizeof(MaskType), ctx.stream())); + } // call layernorm forward switch (GetDesiredBlockDim(cols)) { FIXED_BLOCK_DIM_CASE( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt index bdcf6ab951022..2dd35d10d5ff0 100755 --- a/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/ir/inference/CMakeLists.txt @@ -18,15 +18,6 @@ string(REPLACE ".py" "" TEST_TRT_CONVERTER "${TEST_TRT_CONVERTER}") if(NOT WITH_DISTRIBUTE) list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_delete_c_identity_op_pass") - list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES - "test_trt_convert_preln_residual_bias") - list(REMOVE_ITEM TEST_TRT_IR_PASSES "test_trt_convert_preln_residual_bias") - list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_preln_residual_bias") - list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES - "test_trt_convert_preln_residual_no_bias") - list(REMOVE_ITEM TEST_TRT_IR_PASSES "test_trt_convert_preln_residual_no_bias") - list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_preln_residual_no_bias") - list(REMOVE_ITEM TEST_INFERENCE_IR_PASSES "test_trt_convert_c_allreduce") list(REMOVE_ITEM TEST_TRT_IR_PASSES "test_trt_convert_c_allreduce") list(REMOVE_ITEM TEST_TRT_CONVERTER "test_trt_convert_c_allreduce") diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_bias.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_bias.py index 9e17b83ab9c1e..a45ddfcae189e 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_bias.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_bias.py @@ -158,11 +158,24 @@ def clear_dynamic_shape(): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - return 1, 4 + if dynamic_shape: + return 1, 4 + else: + return 0, 5 attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] + # for static_shape, fall back to fluid fused op + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-2 # atol=1e-2 while rtol is 1e-8 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-2 # atol=1e-2 while rtol is 1e-8 # just support dynamic_shape generate_dynamic_shape(attrs) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_no_bias.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_no_bias.py index aef2142bf3e8e..fd3bdb64c7ede 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_no_bias.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_convert_preln_residual_no_bias.py @@ -146,12 +146,26 @@ def clear_dynamic_shape(): self.dynamic_shape.opt_input_shape = {} def generate_trt_nodes_num(attrs, dynamic_shape): - return 1, 4 + if dynamic_shape: + return 1, 4 + else: + return 0, 5 attrs = [ program_config.ops[i].attrs for i in range(len(program_config.ops)) ] + # for static_shape, fall back to fluid fused op + clear_dynamic_shape() + self.trt_param.precision = paddle_infer.PrecisionType.Float32 + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-2 # atol=1e-2 while rtol is 1e-8 + self.trt_param.precision = paddle_infer.PrecisionType.Half + yield self.create_inference_config(), generate_trt_nodes_num( + attrs, False + ), 1e-2 # atol=1e-2 while rtol is 1e-8 + # just support dynamic_shape generate_dynamic_shape(attrs) self.trt_param.precision = paddle_infer.PrecisionType.Float32 diff --git a/python/paddle/fluid/tests/unittests/ir/test_ir_preln_residual_bias_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/test_ir_preln_residual_bias_fuse_pass.py index 8f74ceebb6586..c66ee86453288 100644 --- a/python/paddle/fluid/tests/unittests/ir/test_ir_preln_residual_bias_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/test_ir_preln_residual_bias_fuse_pass.py @@ -38,7 +38,7 @@ def setUp(self): self.fetch_list = [out, elementwise_out] self.pass_names = "preln_residual_bias_fuse_pass" - self.fused_op_type = "preln_residual_bias" + self.fused_op_type = "fused_bias_dropout_residual_layer_norm" self.num_fused_ops = 1 # self.graph_attrs = { # "embedding_eltwise_layernorm_fuse_pass_flag": True, @@ -72,7 +72,7 @@ def setUp(self): self.fetch_list = [out, elementwise_out] self.pass_names = "preln_residual_bias_fuse_pass" - self.fused_op_type = "preln_residual_bias" + self.fused_op_type = "fused_bias_dropout_residual_layer_norm" self.num_fused_ops = 1 def test_check_program(self): From 776021c10278d5e3e714237943022e8118fad526 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Wed, 1 Feb 2023 15:34:42 +0800 Subject: [PATCH 70/89] fix the NullPointerError of matrix_power (#50015) --- python/paddle/fluid/tests/unittests/test_matrix_power_op.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py index 6381aeeca9868..8296aa320f59b 100644 --- a/python/paddle/fluid/tests/unittests/test_matrix_power_op.py +++ b/python/paddle/fluid/tests/unittests/test_matrix_power_op.py @@ -317,6 +317,12 @@ def test_errors(self): input = fluid.data(name="input_4", shape=[1, 1, 0, 0], dtype="float32") self.assertRaises(ValueError, paddle.linalg.matrix_power, input, 2) + # The size of input should not be 0 + input = fluid.data(name="input_5", shape=[0, 0], dtype="float32") + self.assertRaises( + ValueError, paddle.linalg.matrix_power, input, -956301312 + ) + class TestMatrixPowerSingularAPI(unittest.TestCase): def setUp(self): From 3a73d3488410ffac3cc049fdce7d9fbf7a3a62f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=BC=A0=E6=98=A5=E4=B9=94?= <83450930+Liyulingyue@users.noreply.github.com> Date: Wed, 1 Feb 2023 15:36:01 +0800 Subject: [PATCH 71/89] fix the div 0 error of sparse_embedding (#49948) * fix the div 0 error of sparse_embedding * add unittest --- python/paddle/fluid/contrib/layers/nn.py | 3 ++ .../unittests/test_sparse_embedding_op.py | 37 +++++++++++++++++++ 2 files changed, 40 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_sparse_embedding_op.py diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index b836dfa451c33..d2aff8bfcf659 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -793,6 +793,9 @@ def sparse_embedding( 'paddle.static.nn.sparse_embedding', ) + if input.size == 0: + raise ValueError("input size should not be 0") + w = helper.create_parameter( attr=helper.param_attr, shape=size, diff --git a/python/paddle/fluid/tests/unittests/test_sparse_embedding_op.py b/python/paddle/fluid/tests/unittests/test_sparse_embedding_op.py new file mode 100644 index 0000000000000..0e0beda67971e --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_sparse_embedding_op.py @@ -0,0 +1,37 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import paddle + + +class TestSparseEmbeddingAPIError(unittest.TestCase): + def test_errors(self): + with paddle.fluid.dygraph.guard(): + # The size of input in sparse_embedding should not be 0. + def test_0_size(): + input = paddle.to_tensor([], dtype='int64') + paddle.static.nn.sparse_embedding( + input, + [2097152, 2097152, 2097152, 2097152], + padding_idx=2097152, + ) + + self.assertRaises(ValueError, test_0_size) + + +if __name__ == '__main__': + paddle.enable_static() + unittest.main() From 0361903789ea754079a95fe7df5876196fdf9ed7 Mon Sep 17 00:00:00 2001 From: Guanghua Yu <742925032@qq.com> Date: Wed, 1 Feb 2023 16:33:04 +0800 Subject: [PATCH 72/89] Skip the int input operator when inserting a quant node & fix some bug (#49926) --- .../static/quantization/quantization_pass.py | 86 ++++++++++++------- 1 file changed, 53 insertions(+), 33 deletions(-) diff --git a/python/paddle/static/quantization/quantization_pass.py b/python/paddle/static/quantization/quantization_pass.py index 83587563c4930..c9094998dfe24 100644 --- a/python/paddle/static/quantization/quantization_pass.py +++ b/python/paddle/static/quantization/quantization_pass.py @@ -2890,6 +2890,19 @@ def apply(self, graph): ) if in_node.persistable(): continue + + if in_node.dtype() not in [ + paddle.float64, + paddle.float32, + paddle.float16, + ]: + _logger.warning( + "Since the {} contains an input of type INT, the quantization of this layer is skipped.".format( + op_node.name() + ) + ) + break + if arg_name in dequantized_vars_map: dequant_var_node = dequantized_vars_map[arg_name] else: @@ -3137,7 +3150,7 @@ def __init__( self._save_int_weight = save_int_weight assert self._scope is not None, "scope must not be None." assert self._place is not None, "place must not be None." - self._quantized_ops = set() + self._quantized_ops = {} def apply(self, graph): assert isinstance( @@ -3176,7 +3189,6 @@ def apply(self, graph): quant_axis = _op.op().attr("quant_axis") bits_length = _op.op().attr("bit_length") if x_node.name() not in self._quantized_ops: - self._quantized_ops.add(x_node.name()) quantized_param_v = utils.quant_tensor( param_v.copy(), scale_v, @@ -3211,10 +3223,13 @@ def apply(self, graph): self._scope, self._place, ) + self._quantized_ops[x_node.name()] = quant_weight_node for next_op_node in out_node.outputs: graph.update_input_link( - out_node, quant_weight_node, next_op_node + out_node, + self._quantized_ops[x_node.name()], + next_op_node, ) graph.safe_remove_nodes(_op) self._remove_unused_var_nodes(graph) @@ -3298,9 +3313,9 @@ def apply(self, graph): op_node.outputs, var_name ) if out_node.dtype() not in [ - core.VarDesc.VarType.FP64, - core.VarDesc.VarType.FP32, - core.VarDesc.VarType.FP16, + paddle.float64, + paddle.float32, + paddle.float16, ]: continue if var_name in dequantized_vars_map: @@ -3319,7 +3334,10 @@ def apply(self, graph): else: var_names = utils._get_op_input_var_names(op_node) for var_name in var_names: - if var_name in dequant_node_map: + if ( + var_name in dequant_node_map + and dequant_node_map[var_name] + ): in_node = graph._find_node_by_name( op_node.inputs, var_name ) @@ -3345,39 +3363,41 @@ def _insert_quant_dequant_op(self, graph, var_node): shape=var_node.shape(), var_dtype=var_node.dtype(), ) - if not self._calibration_range_dict: - try: - scale_var_node = graph._find_node_by_name( - graph.all_persistable_nodes(), self._scale_name(var_name) + + try: + scale_var_node = graph._find_node_by_name( + graph.all_persistable_nodes(), self._scale_name(var_name) + ) + except: + if ( + self._calibration_range_dict + and var_name in self._calibration_range_dict + ): + scale_value = self._calibration_range_dict[var_name] + scale_var_node = graph.create_persistable_node( + name=self._scale_name(var_name), + var_type=var_node.type(), + shape=[1], + var_dtype=var_node.dtype(), ) - except: + data_type = ( + 'float64' + if var_node.dtype() == core.VarDesc.VarType.FP64 + else 'float32' + ) + _init_var_node( + scale_var_node, + np.array(scale_value, dtype=data_type), + self._scope, + self._place, + ) + else: _logger.warning( "Cannot find the target node {} in scope, so skip adding quant node.".format( var_name ) ) return None - elif var_name in self._calibration_range_dict: - scale_value = self._calibration_range_dict[var_name] - scale_var_node = graph.create_persistable_node( - name=self._scale_name(var_name), - var_type=var_node.type(), - shape=[1], - var_dtype=var_node.dtype(), - ) - data_type = ( - 'float64' - if var_node.dtype() == core.VarDesc.VarType.FP64 - else 'float32' - ) - _init_var_node( - scale_var_node, - np.array(scale_value, dtype=data_type), - self._scope, - self._place, - ) - else: - return None try: zero_point_node = graph._find_node_by_name( graph.all_persistable_nodes(), From 6f0ae156ec3f342a0fd841fad9fedb3c9c8049be Mon Sep 17 00:00:00 2001 From: PuQing Date: Wed, 1 Feb 2023 16:52:53 +0800 Subject: [PATCH 73/89] [Numpy]Fix NumpyScaler2Tensor dtype error (#50018) * fix numpyScaler2Tensor type error * fix to_tensor docs, test=document_fix --- .../unittests/test_npscaler_to_tensor.py | 95 +++++++++++++++++++ python/paddle/tensor/creation.py | 17 ++++ 2 files changed, 112 insertions(+) create mode 100644 python/paddle/fluid/tests/unittests/test_npscaler_to_tensor.py diff --git a/python/paddle/fluid/tests/unittests/test_npscaler_to_tensor.py b/python/paddle/fluid/tests/unittests/test_npscaler_to_tensor.py new file mode 100644 index 0000000000000..da6569d7d2973 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_npscaler_to_tensor.py @@ -0,0 +1,95 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle + +DTYPE_MAP = { + paddle.bool: np.bool_, + paddle.int32: np.int32, + paddle.int64: np.int64, + paddle.float16: np.float16, + paddle.float32: np.float32, + paddle.float64: np.float64, + paddle.complex64: np.complex64, +} + + +class NumpyScaler2Tensor(unittest.TestCase): + def setUp(self): + self.dtype = np.float32 + self.x_np = np.array([1], dtype=self.dtype)[0] + + def test_dynamic_scaler2tensor(self): + paddle.disable_static() + x = paddle.to_tensor(self.x_np) + self.assertEqual(DTYPE_MAP[x.dtype], self.dtype) + self.assertEqual(x.numpy(), self.x_np) + if self.dtype in [ + np.bool_ + ]: # bool is not supported convert to 0D-Tensor + return + self.assertEqual(len(x.shape), 0) + + def test_static_scaler2tensor(self): + if self.dtype in [np.float16, np.complex64]: + return + paddle.enable_static() + x = paddle.to_tensor(self.x_np) + self.assertEqual(DTYPE_MAP[x.dtype], self.dtype) + if self.dtype in [ + np.bool_, + np.float64, + ]: # bool is not supported convert to 0D-Tensor and float64 not supported in static mode + return + self.assertEqual(len(x.shape), 0) + + +class NumpyScaler2TensorBool(NumpyScaler2Tensor): + def setUp(self): + self.dtype = np.bool_ + self.x_np = np.array([1], dtype=self.dtype)[0] + + +class NumpyScaler2TensorFloat16(NumpyScaler2Tensor): + def setUp(self): + self.dtype = np.float16 + self.x_np = np.array([1], dtype=self.dtype)[0] + + +class NumpyScaler2TensorFloat64(NumpyScaler2Tensor): + def setUp(self): + self.dtype = np.float64 + self.x_np = np.array([1], dtype=self.dtype)[0] + + +class NumpyScaler2TensorInt32(NumpyScaler2Tensor): + def setUp(self): + self.dtype = np.int32 + self.x_np = np.array([1], dtype=self.dtype)[0] + + +class NumpyScaler2TensorInt64(NumpyScaler2Tensor): + def setUp(self): + self.dtype = np.int64 + self.x_np = np.array([1], dtype=self.dtype)[0] + + +class NumpyScaler2TensorComplex64(NumpyScaler2Tensor): + def setUp(self): + self.dtype = np.complex64 + self.x_np = np.array([1], dtype=self.dtype)[0] diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 9ad83ba74b7f5..7523845c2b8b2 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -533,6 +533,9 @@ def logspace(start, stop, num, base=10.0, dtype=None, name=None): def _to_tensor_non_static(data, dtype=None, place=None, stop_gradient=True): + if isinstance(data, np.number): # Special case for numpy scalars + data = np.array(data) + if not isinstance(data, np.ndarray): def _handle_dtype(data, dtype): @@ -627,6 +630,8 @@ def _to_tensor_static(data, dtype=None, stop_gradient=None): if isinstance(data, Variable) and (dtype is None or dtype == data.dtype): output = data else: + if isinstance(data, np.number): # Special case for numpy scalars + data = np.array(data) if not isinstance(data, np.ndarray): if np.isscalar(data) and not isinstance(data, str): @@ -690,6 +695,18 @@ def to_tensor(data, dtype=None, place=None, stop_gradient=True): If the ``data`` is already a Tensor, copy will be performed and return a new tensor. If you only want to change stop_gradient property, please call ``Tensor.stop_gradient = stop_gradient`` directly. + .. code-block:: text + + We use the dtype conversion rules following this: + Keep dtype + np.number ───────────► paddle.Tensor + (0D-Tensor) + default_dtype + Python Number ───────────────► paddle.Tensor + (1D-Tensor) + Keep dtype + np.ndarray ───────────► paddle.Tensor + Args: data(scalar|tuple|list|ndarray|Tensor): Initial data for the tensor. Can be a scalar, list, tuple, numpy\.ndarray, paddle\.Tensor. From 71f247b14a1d07eddb2c1a661c5973bee7f8835c Mon Sep 17 00:00:00 2001 From: YUNSHEN XIE <1084314248@qq.com> Date: Wed, 1 Feb 2023 16:56:41 +0800 Subject: [PATCH 74/89] run infer ut in A10 (#48535) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * run infer ut in A10 * 增加cuda11.2-cudnn8-trt8.4镜像 * add paddle_coverage_new.sh --- paddle/scripts/paddle_build.sh | 29 +- tools/coverage/paddle_coverage_new.sh | 287 ++++++++++++++++++ tools/dockerfile/build_scripts/install_trt.sh | 5 + tools/dockerfile/ci_dockerfile.sh | 28 ++ 4 files changed, 340 insertions(+), 9 deletions(-) create mode 100644 tools/coverage/paddle_coverage_new.sh diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 4c48154b80a4b..2c83897b16678 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -1326,6 +1326,17 @@ function card_test() { cardnumber=$2 parallel_level_base=${CTEST_PARALLEL_LEVEL:-1} + # run ut based on the label + if [[ "${UT_RUN_TYPE_SETTING}" == "INFER" ]];then + run_label_mode="-L (RUN_TYPE=INFER)" + elif [[ "${UT_RUN_TYPE_SETTING}" == "DIST" ]];then + run_label_mode="-L (RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE)" + elif [[ "${UT_RUN_TYPE_SETTING}" == "WITHOUT_INFER" ]];then + run_label_mode="-LE (RUN_TYPE=INFER)" + elif [[ "${UT_RUN_TYPE_SETTING}" == "OTHER" ]];then + run_label_mode="-LE (RUN_TYPE=INFER|RUN_TYPE=DIST|RUN_TYPE=EXCLUSIVE)" + fi + # get the CUDA device count, XPU device count is one if [ "${WITH_XPU}" == "ON" ];then CUDA_DEVICE_COUNT=1 @@ -1375,15 +1386,15 @@ function card_test() { tmpfile=$tmp_dir/$tmpfile_rand"_"$i if [ ${TESTING_DEBUG_MODE:-OFF} == "ON" ] ; then if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then - (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" -V --timeout 120 -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} -V --timeout 120 -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & else - (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 -V -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & fi else if [[ $cardnumber == $CUDA_DEVICE_COUNT ]]; then - (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & else - (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & + (env CUDA_VISIBLE_DEVICES=$cuda_list ctest -I $i,,$NUM_PROC -R "($testcases)" -E "($disable_ut_quickly)" ${run_label_mode} --timeout 120 --output-on-failure -j $parallel_job | tee $tmpfile; test ${PIPESTATUS[0]} -eq 0) & fi fi done @@ -2364,7 +2375,7 @@ set +x if [[ "${failed_test_lists}" == "" ]];then break else - retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) + retry_unittests=$( echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) fi fi echo "=========================================" @@ -2687,10 +2698,10 @@ set +x if [ ${TIMEOUT_DEBUG_HELP:-OFF} == "ON" ];then bash $PADDLE_ROOT/tools/timeout_debug_help.sh "$failed_test_lists" # cat logs for tiemout uts which killed by ctest fi - read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) + need_retry_ut_str=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) need_retry_ut_arr=(${need_retry_ut_str}) need_retry_ut_count=${#need_retry_ut_arr[@]} - read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) + retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) while ( [ $exec_times -lt $retry_time ] ) do if [[ "${exec_times}" == "0" ]] ;then @@ -2700,7 +2711,7 @@ set +x is_retry_execuate=1 fi elif [[ "${exec_times}" == "1" ]] ;then - read need_retry_ut_str <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) + need_retry_ut_str=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) need_retry_ut_arr=(${need_retry_ut_str}) need_retry_ut_count=${#need_retry_ut_arr[@]} if [ $need_retry_ut_count -lt $exec_retry_threshold ];then @@ -2718,7 +2729,7 @@ set +x if [[ "${failed_test_lists}" == "" ]];then break else - read retry_unittests <<< $(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) + retry_unittests=$(echo "$failed_test_lists" | grep -oEi "\-.+\(.+\)" | sed 's/(.\+)//' | sed 's/- //' ) fi fi echo "=========================================" diff --git a/tools/coverage/paddle_coverage_new.sh b/tools/coverage/paddle_coverage_new.sh new file mode 100644 index 0000000000000..98de591fd154b --- /dev/null +++ b/tools/coverage/paddle_coverage_new.sh @@ -0,0 +1,287 @@ +#!/usr/bin/env bash + +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -xe + +PADDLE_ROOT="$( cd "$( dirname "${BASH_SOURCE[0]}")/../../" && pwd )" + +function lcov_init(){ + # install lcov + if [ ! -f "/root/.cache/lcov-1.14.tar.gz" ];then + wget -P /home https://paddle-ci.gz.bcebos.com/coverage/lcov-1.14.tar.gz --no-proxy --no-check-certificate || exit 101 + cp /home/lcov-1.14.tar.gz /root/.cache/lcov-1.14.tar.gz + else + cp /root/.cache/lcov-1.14.tar.gz /home/lcov-1.14.tar.gz + fi + tar -xf /home/lcov-1.14.tar.gz -C / + cd /lcov-1.14 + make install +} + +function gen_cpp_covinfo(){ + # run paddle coverage + cd /paddle/build + python3.7 ${PADDLE_ROOT}/tools/coverage/gcda_clean.py ${GIT_PR_ID} || exit 101 + lcov --capture -d ./ -o coverage.info --rc lcov_branch_coverage=0 +} + + +# full html report + +function gen_full_html_report() { + lcov --extract coverage.info \ + '/paddle/paddle/fluid/framework/*' \ + '/paddle/paddle/fluid/imperative/*' \ + '/paddle/paddle/fluid/inference/*' \ + '/paddle/paddle/fluid/memory/*' \ + '/paddle/paddle/fluid/operators/*' \ + '/paddle/paddle/fluid/recordio/*' \ + '/paddle/paddle/fluid/string/*' \ + '/paddle/paddle/fluid/eager/*' \ + '/paddle/paddle/phi/*' \ + '/paddle/paddle/utils/*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info + + lcov --remove coverage-full.info \ + '/paddle/paddle/fluid/framework/*_test*' \ + '/paddle/paddle/fluid/*/*test*' \ + '/paddle/paddle/fluid/*/*/*test*' \ + '/paddle/paddle/fluid/inference/tests/*' \ + '/paddle/paddle/fluid/inference/api/demo_ci/*' \ + '/paddle/paddle/fluid/eager/tests/*' \ + '/paddle/paddle/phi/tests/*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info +} + +function gen_full_html_report_xpu() { + lcov --extract coverage.info \ + '/paddle/paddle/fluid/operators/*xpu*' \ + '/paddle/paddle/phi/kernels/xpu/*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info + + lcov --remove coverage-full.info \ + '/paddle/paddle/fluid/framework/*_test*' \ + '/paddle/paddle/fluid/*/*test*' \ + '/paddle/paddle/fluid/*/*/*test*' \ + '/paddle/paddle/fluid/inference/tests/*' \ + '/paddle/paddle/fluid/inference/api/demo_ci/*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info +} + +function gen_full_html_report_npu() { + lcov --extract coverage.info \ + '/paddle/paddle/fluid/operators/*npu*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info + + lcov --remove coverage-full.info \ + '/paddle/paddle/fluid/framework/*_test*' \ + '/paddle/paddle/fluid/*/*test*' \ + '/paddle/paddle/fluid/*/*/*test*' \ + '/paddle/paddle/fluid/inference/tests/*' \ + '/paddle/paddle/fluid/inference/api/demo_ci/*' \ + -o coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f coverage-full.tmp coverage-full.info +} + +# if [ ${WITH_XPU:-OFF} == "ON" ]; then +# gen_full_html_report_xpu || true +# elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then +# gen_full_html_report_npu || true +# else +# gen_full_html_report || true +# fi + +# diff html report + +function gen_diff_html_report() { + if [ "${GIT_PR_ID}" != "" ]; then + + COVERAGE_DIFF_PATTERN="`python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`" + + python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > git-diff.out + fi + + lcov --extract coverage-full.info \ + ${COVERAGE_DIFF_PATTERN} \ + -o coverage-diff.info \ + --rc lcov_branch_coverage=0 + + python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_diff.py coverage-diff.info git-diff.out > coverage-diff.tmp + + mv -f coverage-diff.tmp coverage-diff.info + + genhtml -o coverage-diff -t 'Diff Coverage' --no-function-coverage --no-branch-coverage coverage-diff.info +} + +# gen_diff_html_report || true + +function gen_py_covinfo(){ + # python coverage + + export COVERAGE_FILE=/paddle/build/python-coverage.data + coverage combine `$(ls python-coverage.data.*)` || NO_PYTHON_COVERAGE_DATA=1 + `$(coverage xml -i -o python-coverage.xml)` || [[ "${NO_PYTHON_COVERAGE_DATA}" == "1" ]] + sed -i 's/mnt\/paddle/paddle/g' python-coverage.xml + `$(python ${PADDLE_ROOT}/tools/coverage/python_coverage.py > python-coverage.info)` || [[ "${NO_PYTHON_COVERAGE_DATA}" == "1" ]] +} + + +# python full html report +# +function gen_python_full_html_report() { + lcov --extract python-coverage.info \ + '/paddle/python/*' \ + -o python-coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f python-coverage-full.tmp python-coverage-full.info + + lcov --remove python-coverage-full.info \ + '/*/tests/*' \ + -o python-coverage-full.tmp \ + --rc lcov_branch_coverage=0 + + mv -f python-coverage-full.tmp python-coverage-full.info +} + +# gen_python_full_html_report || true + +# python diff html report + +function gen_python_diff_html_report() { + if [ "${GIT_PR_ID}" != "" ]; then + COVERAGE_DIFF_PATTERN="`python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py files ${GIT_PR_ID}`" + + python3.7 ${PADDLE_ROOT}/tools/coverage/pull_request.py diff ${GIT_PR_ID} > python-git-diff.out + fi + + lcov --extract python-coverage-full.info \ + ${COVERAGE_DIFF_PATTERN} \ + -o python-coverage-diff.info \ + --rc lcov_branch_coverage=0 + + python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_diff.py python-coverage-diff.info python-git-diff.out > python-coverage-diff.tmp + + mv -f python-coverage-diff.tmp python-coverage-diff.info + + genhtml -o python-coverage-diff \ + -t 'Python Diff Coverage' \ + --no-function-coverage \ + --no-branch-coverage \ + --ignore-errors source \ + python-coverage-diff.info +} + +# gen_python_diff_html_report || true + +# assert coverage lines + +function covinfo_combine_full(){ + if [ -f "other-coverage.info" ];then + if [ -f "infer-coverage.info" ];then + lcov -a other-coverage.info -a infer-coverage.info -o coverage.info + else + mv other-coverage.info coverage.info + fi + elif [ -f "infer-coverage.info" ];then + mv infer-coverage.info coverage.info + else + echo "Cannot found coverage.info" + fi + + if [ -f "other-python-coverage-full.info" ];then + if [ -f "infer-python-coverage-full.info" ];then + lcov -a other-python-coverage-full.info -a infer-python-coverage-full.info -o python-coverage-full.info + else + mv other-python-coverage-full.info python-coverage-full.info + fi + elif [ -f "infer-coverage.info" ];then + mv infer-python-coverage-full.info python-coverage-full.info + else + echo "Cannot found python coverage.info" + fi +} + +function cov_rate_judge(){ + echo "Assert CPP Diff Coverage" + python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py coverage-diff.info 0.9 || COVERAGE_LINES_ASSERT=1 + + echo "Assert Python Diff Coverage" + + if [ ${WITH_XPU:-OFF} == "ON" ]; then + echo "XPU has no python coverage!" + elif [ ${WITH_ASCEND_CL:-OFF} == "ON" ]; then + echo "NPU has no python coverage!" + else + if [[ python-coverage-diff.info ]];then + python3.7 ${PADDLE_ROOT}/tools/coverage/coverage_lines.py python-coverage-diff.info 0.9 || PYTHON_COVERAGE_LINES_ASSERT=1 + fi + fi + if [ "$COVERAGE_LINES_ASSERT" = "1" ] || [ "$PYTHON_COVERAGE_LINES_ASSERT" = "1" ]; then + echo "exit 9" > /tmp/paddle_coverage.result + exit 9 + fi +} + +function print_usage() { + echo -e "\n${RED}Usage${NONE}: + ${BOLD}${SCRIPT_NAME}${NONE} [OPTION]" + + echo -e "\n${RED}Options${NONE}: + ${BLUE}gen_cov_info${NONE}: generate coverage info + ${BLUE}test${NONE}: coverage info combine + " +} + +function main () { + local CMD=$1 + lcov_init + case $CMD in + gen_cov_info) + gen_cpp_covinfo + gen_py_covinfo + ;; + combine_cov_info) + covinfo_combine_full + gen_diff_html_report + gen_python_diff_html_report + cov_rate_judge + ;; + *) + print_usage + exit 1 + ;; + esac +} + +main $@ diff --git a/tools/dockerfile/build_scripts/install_trt.sh b/tools/dockerfile/build_scripts/install_trt.sh index 490b0af5289c5..2129f92adbac3 100644 --- a/tools/dockerfile/build_scripts/install_trt.sh +++ b/tools/dockerfile/build_scripts/install_trt.sh @@ -36,6 +36,11 @@ elif [[ "$1" == "trt8406" ]];then tar -zxf TensorRT-8.4.0.6.Linux.x86_64-gnu.cuda-11.6.cudnn8.3.tar.gz -C /usr/local cp -rf /usr/local/TensorRT-8.4.0.6/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.4.0.6/lib/* /usr/lib/ rm -f TensorRT-8.4.0.6.Linux.x86_64-gnu.cuda-11.6.cudnn8.3.tar.gz +elif [[ "$1" == "trt8431" ]];then + wget -q https://paddle-ci.gz.bcebos.com/TRT/TensorRT-8.4.3.1.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz --no-check-certificate --no-proxy + tar -zxf TensorRT-8.4.3.1.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz -C /usr/local + cp -rf /usr/local/TensorRT-8.4.3.1/include/* /usr/include/ && cp -rf /usr/local/TensorRT-8.4.3.1/lib/* /usr/lib/ + rm -f TensorRT-8.4.3.1.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz elif [[ "$VERSION" == "11.2" ]];then wget -q --no-proxy https://paddle-ci.gz.bcebos.com/TRT/TensorRT7-cuda11.1-cudnn8.1.tar.gz --no-check-certificate tar -zxf TensorRT7-cuda11.1-cudnn8.1.tar.gz -C /usr/local diff --git a/tools/dockerfile/ci_dockerfile.sh b/tools/dockerfile/ci_dockerfile.sh index 17ba5b3ee4c32..16471f2edd8f2 100644 --- a/tools/dockerfile/ci_dockerfile.sh +++ b/tools/dockerfile/ci_dockerfile.sh @@ -166,6 +166,33 @@ function make_unbuntu18_cu117_dockerfile(){ sed -i 's# && rm /etc/apt/sources.list.d/nvidia-ml.list##g' ${dockerfile_name} } +function make_ubuntu18_cu112_dockerfile(){ + dockerfile_name="Dockerfile.cuda11.2_cudnn8.1_trt8.4_gcc8.2_ubuntu18" + sed "s##nvidia/cuda:11.2.0-cudnn8-devel-ubuntu18.04#g" ./Dockerfile.ubuntu18 >${dockerfile_name} + sed -i "s##ENV LD_LIBRARY_PATH=/usr/local/cuda-11.2/targets/x86_64-linux/lib:\$LD_LIBRARY_PATH #g" ${dockerfile_name} + sed -i "s#liblzma-dev#liblzma-dev openmpi-bin openmpi-doc libopenmpi-dev#g" ${dockerfile_name} + dockerfile_line=$(wc -l ${dockerfile_name}|awk '{print $1}') + sed -i 's#RUN bash /build_scripts/install_trt.sh#RUN bash /build_scripts/install_trt.sh trt8431#g' ${dockerfile_name} + sed -i "${dockerfile_line}i RUN wget --no-check-certificate -q https://paddle-edl.bj.bcebos.com/hadoop-2.7.7.tar.gz \&\& \ + tar -xzf hadoop-2.7.7.tar.gz && mv hadoop-2.7.7 /usr/local/" ${dockerfile_name} + sed -i "${dockerfile_line}i RUN apt remove git -y \&\& apt install -y libsndfile1 zstd pigz libcurl4-openssl-dev gettext zstd ninja-build \&\& wget -q https://paddle-ci.gz.bcebos.com/git-2.17.1.tar.gz \&\& \ + tar -xvf git-2.17.1.tar.gz \&\& \ + cd git-2.17.1 \&\& \ + ./configure --with-openssl --with-curl --prefix=/usr/local \&\& \ + make -j8 \&\& make install " ${dockerfile_name} + sed -i "${dockerfile_line}i RUN pip install wheel \&\& pip3 install PyGithub wheel \&\& pip3.7 install PyGithub distro \&\& pip3.8 install PyGithub distro" ${dockerfile_name} + sed -i 's###g' ${dockerfile_name} + sed -i "s##WORKDIR /usr/bin \\ + COPY tools/dockerfile/build_scripts /build_scripts \\ + RUN bash /build_scripts/install_gcc.sh gcc82 \&\& rm -rf /build_scripts \\ + RUN cp gcc gcc.bak \&\& cp g++ g++.bak \&\& rm gcc \&\& rm g++ \\ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc \\ + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ \\ + RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc \\ + RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ \\ + ENV PATH=/usr/local/gcc-8.2/bin:\$PATH #g" ${dockerfile_name} +} + function main() { make_ubuntu_dockerfile make_ubuntu_trt7_dockerfile @@ -173,6 +200,7 @@ function main() { make_cinn_dockerfile make_ce_framework_dockcerfile make_unbuntu18_cu117_dockerfile + make_ubuntu18_cu112_dockerfile } main "$@" From e4e94a889a7e172ca92b9d0c4aca8c3c08a39fea Mon Sep 17 00:00:00 2001 From: Zhong Hui Date: Wed, 1 Feb 2023 17:01:31 +0800 Subject: [PATCH 75/89] [Zero-Dim] Fix 0-dim tensor for arg_min_max op. (#49570) * fix 0-d tensor for arg_min_max op. * fix xpu. * fix zero dims * fix * Update arg_min_max_kernel.cc * Update arg_min_max_kernel.cc * Update arg_min_max_kernel.cc * Update test_zero_dim_tensor.py * Update test_zero_dim_tensor_xpu.py * Update test_zero_dim_tensor.py * Update arg_min_max_kernel.cc * Update arg_min_max_kernel.cc * Update arg_min_max_kernel.cc --- paddle/phi/infermeta/unary.cc | 47 +++++++++++++------ paddle/phi/kernels/cpu/arg_min_max_kernel.cc | 6 +++ paddle/phi/kernels/gpu/arg_min_max_kernel.cu | 7 +++ paddle/phi/kernels/xpu/arg_min_max_kernel.cc | 9 ++++ .../tests/unittests/test_zero_dim_tensor.py | 15 ++++-- .../unittests/xpu/test_zero_dim_tensor_xpu.py | 5 +- 6 files changed, 68 insertions(+), 21 deletions(-) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index eb05437ada8a5..2b35545db1cd8 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -160,22 +160,34 @@ void ArgMinMaxInferMeta(const MetaTensor& x, auto int_axis = axis.to(); const auto& x_dims = x.dims(); - PADDLE_ENFORCE_GE( - int_axis, - -x_dims.size(), - phi::errors::InvalidArgument("'axis'(%d) must be greater than or equal to" - " -Rank(X)(%d).", - int_axis, - -x_dims.size())); - PADDLE_ENFORCE_LT(int_axis, - x_dims.size(), - phi::errors::InvalidArgument( - "'axis'(%d) must be less than Rank(X)(%d) of Input(X).", - int_axis, - x_dims.size())); + auto x_rank = x.dims().size(); + if (x_rank > 0) { + PADDLE_ENFORCE_GE(int_axis, + -x_rank, + phi::errors::InvalidArgument( + "'axis'(%d) must be greater than or equal to" + " -Rank(X)(%d).", + int_axis, + -x_rank)); + PADDLE_ENFORCE_LT( + int_axis, + x_rank, + phi::errors::InvalidArgument( + "'axis'(%d) must be less than Rank(X)(%d) of Input(X).", + int_axis, + x_rank)); + } else { + // 0-dim tensor + PADDLE_ENFORCE_EQ((int_axis == 0 || int_axis == -1) && flatten, + true, + phi::errors::InvalidArgument( + "'axis'(%d) must be 0 or -1 if input tensor is " + "0-dim. and flatten should be true.", + int_axis)); + } - auto x_rank = x_dims.size(); if (int_axis < 0) int_axis += x_rank; + if (config.is_runtime) { if (dtype == phi::TransToProtoVarType(DataType::INT32)) { int64_t all_element_num = 0; @@ -195,8 +207,12 @@ void ArgMinMaxInferMeta(const MetaTensor& x, INT_MAX)); } } + std::vector vec; - if (flatten) { + + if (x_rank == 0) { + // vec is set to empty + } else if (flatten) { vec.emplace_back(static_cast(1)); } else { for (int64_t i = 0; i < int_axis; i++) vec.emplace_back(x_dims[i]); @@ -205,6 +221,7 @@ void ArgMinMaxInferMeta(const MetaTensor& x, } for (int64_t i = int_axis + 1; i < x_rank; i++) vec.emplace_back(x_dims[i]); } + out->set_dims(phi::make_ddim(vec)); if (dtype == 2) { out->set_dtype(DataType::INT32); diff --git a/paddle/phi/kernels/cpu/arg_min_max_kernel.cc b/paddle/phi/kernels/cpu/arg_min_max_kernel.cc index 61d20ac32f15a..694698050a0c0 100644 --- a/paddle/phi/kernels/cpu/arg_min_max_kernel.cc +++ b/paddle/phi/kernels/cpu/arg_min_max_kernel.cc @@ -96,6 +96,12 @@ struct VisitDataArgMinMaxFunctor { if (axis < 0) new_axis = axis + x_dims.size(); } + // For 0D Tensor + if (x.dims().size() == 0) { + phi::funcs::set_constant(dev_ctx, out, 0); + return; + } + #define CALL_ARG_MINMAX_FUNCTOR(rank) \ ArgMinMaxFunctor functor##rank; \ functor##rank(dev_ctx, x, out, x_dims, new_axis, new_keepdims) diff --git a/paddle/phi/kernels/gpu/arg_min_max_kernel.cu b/paddle/phi/kernels/gpu/arg_min_max_kernel.cu index affd36a95ef8b..199ecc8e5b989 100644 --- a/paddle/phi/kernels/gpu/arg_min_max_kernel.cu +++ b/paddle/phi/kernels/gpu/arg_min_max_kernel.cu @@ -30,6 +30,7 @@ namespace cub = hipcub; #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/funcs/math_function.h" namespace phi { namespace { // NOLINT @@ -180,6 +181,12 @@ struct VisitDataCudaArgMinMaxFunctor { x_dims = x.dims(); if (axis < 0) new_axis = axis + x.dims().size(); } + // For 0D Tensor + if (x.dims().size() == 0) { + dev_ctx.template Alloc(out); + phi::funcs::set_constant(dev_ctx, out, 0); + return; + } int64_t numel = x.numel(); int64_t groups = numel / x_dims[new_axis]; diff --git a/paddle/phi/kernels/xpu/arg_min_max_kernel.cc b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc index 3513b64bc600e..ebf13142345ce 100644 --- a/paddle/phi/kernels/xpu/arg_min_max_kernel.cc +++ b/paddle/phi/kernels/xpu/arg_min_max_kernel.cc @@ -18,6 +18,7 @@ #include "paddle/phi/core/ddim.h" #include "paddle/phi/core/kernel_registry.h" #include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/funcs/math_function.h" namespace phi { @@ -39,7 +40,15 @@ void ArgMaxKernel(const Context& dev_ctx, DataType::INT64, DataType::INT32, dtype)); + // TODO(ZHUI): fix dtype of out dev_ctx.template Alloc(out); + if (x.dims().size() == 0) { + xpu::constant(dev_ctx.x_context(), + out->data(), + x.numel(), + static_cast(0)); + return; + } DDim x_dims; int axis_val = axis.to(); diff --git a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py index 2d07ab31334df..fcc171674deab 100644 --- a/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_zero_dim_tensor.py @@ -189,6 +189,8 @@ def test_static_unary(self): paddle.logsumexp, paddle.all, paddle.any, + paddle.argmax, + paddle.argmin, ] @@ -208,12 +210,13 @@ def test_dygraph_reduce(self): out.retain_grads() out.backward() - out_empty_list = api(x, []) - self.assertEqual(out_empty_list, out) - self.assertEqual(x.shape, []) self.assertEqual(out.shape, []) - np.testing.assert_allclose(out.numpy(), x.numpy()) + if api not in [paddle.argmax, paddle.argmin]: + np.testing.assert_allclose(out.numpy(), x.numpy()) + out_empty_list = api(x, []) + self.assertEqual(out_empty_list, out) + if x.grad is not None: self.assertEqual(x.grad.shape, []) self.assertEqual(out.grad.shape, []) @@ -250,7 +253,9 @@ def test_static_reduce(self): res = exe.run(main_prog, fetch_list=fetch_list) self.assertEqual(res[0].shape, ()) self.assertEqual(res[1].shape, ()) - np.testing.assert_allclose(res[0], res[1]) + if api not in [paddle.argmax, paddle.argmin]: + np.testing.assert_allclose(res[0], res[1]) + if len(res) > 2: self.assertEqual(res[2].shape, ()) self.assertEqual(res[3].shape, ()) diff --git a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py index f6f64aefe9db7..35e98e3cdaa75 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_zero_dim_tensor_xpu.py @@ -132,6 +132,8 @@ def test_dygraph_unary(self): paddle.logsumexp, paddle.all, paddle.any, + paddle.argmax, + paddle.argmin, ] @@ -153,7 +155,8 @@ def test_dygraph_reduce(self): self.assertEqual(x.shape, []) self.assertEqual(out.shape, []) - np.testing.assert_allclose(out.numpy(), x.numpy()) + if api not in [paddle.argmax, paddle.argmin]: + np.testing.assert_allclose(out.numpy(), x.numpy()) if x.grad is not None: self.assertEqual(x.grad.shape, []) self.assertEqual(out.grad.shape, []) From 3ab6faa8b311fdfcd9458641cd30eb8faf8379d8 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 1 Feb 2023 17:08:09 +0800 Subject: [PATCH 76/89] Fix div 0 error of case11: paddle.nn.functional.max_pool1d/max_pool2d/max_pool3d (#50010) * add stride check for MaxPool * add unittests --- paddle/fluid/operators/pool_with_index_op.cc | 5 +++++ paddle/phi/kernels/funcs/pooling.h | 5 +++++ .../fluid/tests/unittests/test_pool1d_api.py | 14 +++++++++++++- .../fluid/tests/unittests/test_pool2d_api.py | 12 ++++++++++++ .../fluid/tests/unittests/test_pool3d_api.py | 12 ++++++++++++ 5 files changed, 47 insertions(+), 1 deletion(-) diff --git a/paddle/fluid/operators/pool_with_index_op.cc b/paddle/fluid/operators/pool_with_index_op.cc index 74b98069bf647..79262db30fafb 100644 --- a/paddle/fluid/operators/pool_with_index_op.cc +++ b/paddle/fluid/operators/pool_with_index_op.cc @@ -27,6 +27,11 @@ inline int MaxPoolOutputSize(int input_size, int filter_size, int padding, int stride) { + PADDLE_ENFORCE_NE( + stride, + 0, + phi::errors::InvalidArgument( + "The stride of MaxPool shall not be 0, but received %d.", stride)); int output_size = (input_size - filter_size + 2 * padding) / stride + 1; return output_size; } diff --git a/paddle/phi/kernels/funcs/pooling.h b/paddle/phi/kernels/funcs/pooling.h index 17b87a0e17d51..c0741672a458e 100644 --- a/paddle/phi/kernels/funcs/pooling.h +++ b/paddle/phi/kernels/funcs/pooling.h @@ -402,6 +402,11 @@ inline int MaxPoolOutputSize(int input_size, int filter_size, int padding, int stride) { + PADDLE_ENFORCE_NE( + stride, + 0, + phi::errors::InvalidArgument( + "The stride of MaxPool shall not be 0, but received %d.", stride)); int output_size = (input_size - filter_size + 2 * padding) / stride + 1; return output_size; } diff --git a/python/paddle/fluid/tests/unittests/test_pool1d_api.py b/python/paddle/fluid/tests/unittests/test_pool1d_api.py index 2c191bf4892b7..3816822e8f3ec 100644 --- a/python/paddle/fluid/tests/unittests/test_pool1d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool1d_api.py @@ -274,7 +274,7 @@ def test_pool1d(self): self.check_max_dygraph_return_index_results(place) -class TestPool2DError_API(unittest.TestCase): +class TestPool1DError_API(unittest.TestCase): def test_error_api(self): def run1(): with fluid.dygraph.guard(): @@ -417,6 +417,18 @@ def run_stride_out_of_range(): self.assertRaises(ValueError, run_stride_out_of_range) + def run_zero_stride(): + with fluid.dygraph.guard(): + array = np.array([1], dtype=np.float32) + x = paddle.to_tensor( + np.reshape(array, [1, 1, 1]), dtype='float32' + ) + out = F.max_pool1d( + x, 1, stride=0, padding=1, return_mask=True, ceil_mode=True + ) + + self.assertRaises(ValueError, run_zero_stride) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool2d_api.py b/python/paddle/fluid/tests/unittests/test_pool2d_api.py index 44ef18605ed2a..c55ea337c41b7 100644 --- a/python/paddle/fluid/tests/unittests/test_pool2d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool2d_api.py @@ -597,6 +597,18 @@ def run_stride_out_of_range(): self.assertRaises(ValueError, run_stride_out_of_range) + def run_zero_stride(): + with fluid.dygraph.guard(): + array = np.array([1], dtype=np.float32) + x = paddle.to_tensor( + np.reshape(array, [1, 1, 1, 1]), dtype='float32' + ) + out = max_pool2d( + x, 1, stride=0, padding=1, return_mask=True, ceil_mode=True + ) + + self.assertRaises(ValueError, run_zero_stride) + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_pool3d_api.py b/python/paddle/fluid/tests/unittests/test_pool3d_api.py index 961f0b5c569f0..30a03ab220bcc 100644 --- a/python/paddle/fluid/tests/unittests/test_pool3d_api.py +++ b/python/paddle/fluid/tests/unittests/test_pool3d_api.py @@ -563,6 +563,18 @@ def run_size_out_of_range(): self.assertRaises(ValueError, run_size_out_of_range) + def run_zero_stride(): + with fluid.dygraph.guard(): + array = np.array([1], dtype=np.float32) + x = paddle.to_tensor( + np.reshape(array, [1, 1, 1, 1, 1]), dtype='float32' + ) + out = max_pool3d( + x, 1, stride=0, padding=1, return_mask=True, ceil_mode=True + ) + + self.assertRaises(ValueError, run_zero_stride) + if __name__ == '__main__': unittest.main() From ccf8d96c5da5d3c6df8d1c15cbb812d15dfc81f9 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Wed, 1 Feb 2023 17:08:34 +0800 Subject: [PATCH 77/89] support cuda11.7 manylinux (#44217) --- tools/dockerfile/build_scripts/install_cudnn.sh | 8 ++++++++ tools/dockerfile/build_scripts/install_nccl2.sh | 2 +- tools/dockerfile/centos7_manylinux.sh | 9 +++++++++ 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/tools/dockerfile/build_scripts/install_cudnn.sh b/tools/dockerfile/build_scripts/install_cudnn.sh index 0817634fa91af..2310370f223c8 100644 --- a/tools/dockerfile/build_scripts/install_cudnn.sh +++ b/tools/dockerfile/build_scripts/install_cudnn.sh @@ -45,4 +45,12 @@ elif [[ "$1" == "cudnn821" && "$VERSION" == "11.2" ]]; then cp -r lib64 /usr && cd ../ && \ rm -f cudnn-11.3-linux-x64-v8.2.1.32.tgz && \ rm -rf cuda +elif [[ "$1" == "cudnn841" && "$VERSION" == "11.7" ]]; then + wget -q https://paddle-ci.gz.bcebos.com/cudnn/cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz --no-check-certificate + tar xJvf cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz && \ + cd cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive && \ + cp -r include /usr && \ + cp -r lib /usr && cd ../ && \ + rm -f cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive.tar.xz && \ + rm -rf cudnn-linux-x86_64-8.4.1.50_cuda11.6-archive fi diff --git a/tools/dockerfile/build_scripts/install_nccl2.sh b/tools/dockerfile/build_scripts/install_nccl2.sh index d39e74f3cf537..c21267807976d 100644 --- a/tools/dockerfile/build_scripts/install_nccl2.sh +++ b/tools/dockerfile/build_scripts/install_nccl2.sh @@ -17,7 +17,7 @@ VERSION=$(nvcc --version | grep release | grep -oEi "release ([0-9]+)\.([0-9])"| sed "s/release //") if [ "$VERSION" == "10.0" ]; then DEB="nccl-repo-ubuntu1604-2.4.7-ga-cuda10.0_1-1_amd64.deb" -elif [ "$VERSION" == "10.2" ] || [ "$VERSION" == "10.1" ] || [ "$VERSION" == "11.0" ] || [ "$VERSION" == "11.2" ] || [ "$VERSION" == "11.3" ] || [ "$VERSION" == "11.4" ] || [ "$VERSION" == "11.5" ] || [ "$VERSION" == "11.6" ] || [ "$VERSION" == "11.8" ]; then +elif [ "$VERSION" == "10.2" ] || [ "$VERSION" == "10.1" ] || [ "$VERSION" == "11.0" ] || [ "$VERSION" == "11.2" ] || [ "$VERSION" == "11.3" ] || [ "$VERSION" == "11.4" ] || [ "$VERSION" == "11.5" ] || [ "$VERSION" == "11.6" ] || [ "$VERSION" == "11.7" ] || [ "$VERSION" == "11.8" ]; then if [ -f "/etc/redhat-release" ];then rm -f /usr/local/lib/libnccl.so wget --no-check-certificate -q https://nccl2-deb.cdn.bcebos.com/libnccl-2.10.3-1+cuda11.4.x86_64.rpm diff --git a/tools/dockerfile/centos7_manylinux.sh b/tools/dockerfile/centos7_manylinux.sh index 7b21a51045618..4beb8b3a592ad 100755 --- a/tools/dockerfile/centos7_manylinux.sh +++ b/tools/dockerfile/centos7_manylinux.sh @@ -128,6 +128,12 @@ function make_cuda116cudnn840trt8406gcc82() { sed -i '/CMD/iRUN ldconfig' Dockerfile.tmp } +function make_cuda117cudnn841() { + sed 's//11.7.0-devel-centos7/g' Dockerfile.centos >Dockerfile.tmp + sed -i "s#RUN bash build_scripts/build.sh#RUN bash build_scripts/install_gcc.sh gcc82 \nRUN mv /usr/bin/cc /usr/bin/cc.bak \&\& ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/cc \nENV PATH=/usr/local/gcc-8.2/bin:\$PATH \nRun bash build_scripts/install_cudnn.sh cudnn841 \nENV CUDNN_VERSION=8.4.1 \nRUN bash build_scripts/build.sh#g" Dockerfile.tmp + sed -i '/CMD/iRUN ldconfig' Dockerfile.tmp +} + function main() { local CMD=$1 case $CMD in @@ -188,6 +194,9 @@ function main() { cuda116cudnn840trt8406gcc82) make_cuda116cudnn840trt8406gcc82 ;; + cuda117cudnn841) + make_cuda117cudnn841 + ;; *) echo "Make dockerfile error, Without this paramet." exit 1 From 838dc660e51f6b1479274d706434d992ba4a44a6 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Wed, 1 Feb 2023 17:09:03 +0800 Subject: [PATCH 78/89] [dockerfile] support python3.10 in manylinux dockerfile (#42126) * support python3.10 in manylinux dockerfile * fix python3.10 ssl * do not check for python3.10 --- tools/dockerfile/Dockerfile.centos | 7 +++++-- tools/dockerfile/build_scripts/build.sh | 17 ++++++++++------- tools/dockerfile/build_scripts/build_utils.sh | 7 +++++++ 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/tools/dockerfile/Dockerfile.centos b/tools/dockerfile/Dockerfile.centos index 1bc7727f304af..3452db468b311 100644 --- a/tools/dockerfile/Dockerfile.centos +++ b/tools/dockerfile/Dockerfile.centos @@ -50,17 +50,20 @@ RUN wget https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/python/re RUN LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install setuptools -U && \ LD_LIBRARY_PATH=/opt/_internal/cpython-3.8.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.8.0/bin/pip3 install setuptools -U && \ - LD_LIBRARY_PATH=/opt/_internal/cpython-3.9.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.9.0/bin/pip3 install setuptools -U + LD_LIBRARY_PATH=/opt/_internal/cpython-3.9.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.9.0/bin/pip3 install setuptools -U && \ + LD_LIBRARY_PATH=/opt/_internal/cpython-3.10.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.10.0/bin/pip3 install setuptools -U RUN LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install -r /root/requirements.txt && \ LD_LIBRARY_PATH=/opt/_internal/cpython-3.8.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.8.0/bin/pip3 install -r /root/requirements.txt && \ LD_LIBRARY_PATH=/opt/_internal/cpython-3.9.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.9.0/bin/pip3 install -r /root/requirements.txt && \ + LD_LIBRARY_PATH=/opt/_internal/cpython-3.10.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.10.0/bin/pip3 install -r /root/requirements.txt && \ go get github.com/Masterminds/glide && \ rm -rf /root/requirements.txt RUN LD_LIBRARY_PATH=/opt/_internal/cpython-3.7.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.7.0/bin/pip3 install pre-commit 'ipython==5.3.0' && \ LD_LIBRARY_PATH=/opt/_internal/cpython-3.8.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.8.0/bin/pip3 install pre-commit 'ipython==5.3.0' && \ - LD_LIBRARY_PATH=/opt/_internal/cpython-3.9.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.9.0/bin/pip3 install pre-commit 'ipython==5.3.0' + LD_LIBRARY_PATH=/opt/_internal/cpython-3.9.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.9.0/bin/pip3 install pre-commit 'ipython==5.3.0' && \ + LD_LIBRARY_PATH=/opt/_internal/cpython-3.10.0/lib/:${LD_LIBRARY_PATH} /opt/_internal/cpython-3.10.0/bin/pip3 install pre-commit 'ipython==5.3.0' RUN wget -O /opt/swig-2.0.12.tar.gz --no-check-certificate https://sourceforge.net/projects/swig/files/swig/swig-2.0.12/swig-2.0.12.tar.gz/download && \ cd /opt && tar xzf swig-2.0.12.tar.gz && cd /opt/swig-2.0.12 && ./configure && make && make install && cd /opt && rm swig-2.0.12.tar.gz diff --git a/tools/dockerfile/build_scripts/build.sh b/tools/dockerfile/build_scripts/build.sh index 4bbe8198556e3..5822fa10160b7 100644 --- a/tools/dockerfile/build_scripts/build.sh +++ b/tools/dockerfile/build_scripts/build.sh @@ -24,7 +24,7 @@ set -ex # remove others to expedite build and reduce docker image size. The original # manylinux docker image project builds many python versions. # NOTE We added back 3.5.1, since auditwheel requires python 3.3+ -CPYTHON_VERSIONS="3.9.0 3.8.0 3.7.0" +CPYTHON_VERSIONS="3.10.0 3.9.0 3.8.0 3.7.0" # openssl version to build, with expected sha256 hash of .tar.gz # archive @@ -80,11 +80,12 @@ build_cpythons $CPYTHON_VERSIONS PY37_BIN=/opt/python/cp37-cp37m/bin PY38_BIN=/opt/python/cp38-cp38m/bin PY39_BIN=/opt/python/cp39-cp39m/bin +PY310_BIN=/opt/python/cp310-cp310m/bin # NOTE Since our custom manylinux image builds pythons with shared # libpython, we need to add libpython's dir to LD_LIBRARY_PATH before running # python. ORIGINAL_LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" -LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY37_BIN})/lib:$(dirname ${PY38_BIN})/lib:$(dirname ${PY39_BIN})/lib" +LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname ${PY37_BIN})/lib:$(dirname ${PY38_BIN})/lib:$(dirname ${PY39_BIN})/lib:$(dirname ${PY310_BIN})/lib" # Our openssl doesn't know how to find the system CA trust store # (https://github.com/pypa/manylinux/issues/53) @@ -136,11 +137,13 @@ for PYTHON in /opt/python/*/bin/python; do # Add matching directory of libpython shared library to library lookup path LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib" - # Smoke test to make sure that our Pythons work, and do indeed detect as - # being manylinux compatible: - LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib" $PYTHON $MY_DIR/manylinux1-check.py - # Make sure that SSL cert checking works - LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib" $PYTHON $MY_DIR/ssl-check.py + if [ "$(dirname $(dirname ${PYTHON}))" != "/opt/python/cp310-cp310" ]; then + # Smoke test to make sure that our Pythons work, and do indeed detect as + # being manylinux compatible: + LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib" $PYTHON $MY_DIR/manylinux1-check.py + # Make sure that SSL cert checking works + LD_LIBRARY_PATH="${ORIGINAL_LD_LIBRARY_PATH}:$(dirname $(dirname ${PYTHON}))/lib" $PYTHON $MY_DIR/ssl-check.py + fi done # Restore LD_LIBRARY_PATH diff --git a/tools/dockerfile/build_scripts/build_utils.sh b/tools/dockerfile/build_scripts/build_utils.sh index 70071a9ccb07b..76ad518ae24cc 100755 --- a/tools/dockerfile/build_scripts/build_utils.sh +++ b/tools/dockerfile/build_scripts/build_utils.sh @@ -92,7 +92,14 @@ function do_cpython_build { if [ -e ${prefix}/bin/python3.9 ]; then ln -s python3.9 ${prefix}/bin/python fi + if [ -e ${prefix}/bin/python3.10 ]; then + ln -s python3.10 ${prefix}/bin/python + fi # NOTE Make libpython shared library visible to python calls below + if [ -e ${prefix}/bin/python3.10 ]; then + LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python -m pip config set global.trusted-host mirrors.aliyun.com + LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python -m pip config set global.index-url http://mirrors.aliyun.com/pypi/simple/ + fi LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/python get-pip.py LD_LIBRARY_PATH="/usr/local/ssl/lib:${prefix}/lib" ${prefix}/bin/pip install wheel==0.32.2 cd / From 1346cd3516b0965590691449d587c306bba10285 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Wed, 1 Feb 2023 17:09:43 +0800 Subject: [PATCH 79/89] [dockerfile] fix python3.7 setuptools bug in release18 dockerfile (#42575) * fix release dockerfile * fix GPG error in ubuntu18 * fix cpu * fix --- tools/dockerfile/Dockerfile.release18 | 6 ++++-- tools/dockerfile/ubuntu18_release.sh | 6 +++++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/dockerfile/Dockerfile.release18 b/tools/dockerfile/Dockerfile.release18 index cf343873d943a..42b24030c00a8 100644 --- a/tools/dockerfile/Dockerfile.release18 +++ b/tools/dockerfile/Dockerfile.release18 @@ -17,7 +17,9 @@ ENV HOME /root # Add bash enhancements COPY paddle/scripts/docker/root/ /root/ -RUN apt-get update && \ +RUN chmod 777 /tmp + +RUN apt-get update --allow-unauthenticated && \ apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && \ apt-get update && \ apt-get install -y curl wget vim git unzip unrar tar xz-utils libssl-dev bzip2 gzip \ @@ -48,7 +50,7 @@ ENV PATH=/home/cmake-3.16.0-Linux-x86_64/bin:$PATH RUN apt-get update && \ - apt-get install -y python3.7 python3.7-dev && \ + apt-get install -y python3.7 python3.7-dev python3.7-distutils && \ mv /usr/bin/python /usr/bin/python.bak && ln -s /usr/bin/python3.7 /usr/bin/python && \ mv /usr/bin/python3 /usr/bin/python3.bak && ln -s /usr/bin/python3.7 /usr/bin/python3 diff --git a/tools/dockerfile/ubuntu18_release.sh b/tools/dockerfile/ubuntu18_release.sh index 2c12d4b74c073..4c4cc780ce8be 100755 --- a/tools/dockerfile/ubuntu18_release.sh +++ b/tools/dockerfile/ubuntu18_release.sh @@ -80,7 +80,11 @@ function install_whl(){ function set_cuda_env(){ if [[ ${WITH_GPU} == "ON" ]]; then - sed -i "s##ENV LD_LIBRARY_PATH=/usr/local/cuda-${ref_CUDA_MAJOR}/targets/x86_64-linux/lib:\$LD_LIBRARY_PATH #g" Dockerfile.tmp + sed -i "s##ENV LD_LIBRARY_PATH=/usr/local/cuda-${ref_CUDA_MAJOR}/targets/x86_64-linux/lib:\$LD_LIBRARY_PATH \\ +\\ +RUN apt-key del 7fa2af80 \\ +RUN rm /etc/apt/sources.list.d/cuda.list \&\& rm /etc/apt/sources.list.d/nvidia-ml.list \\ +RUN apt-key adv --fetch-keys https://developer.download.nvidia.cn/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub #g" Dockerfile.tmp else sed -i 's###g' Dockerfile.tmp fi From 3cf50f91472544b7560241b1a2ee9d9155175997 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 1 Feb 2023 17:11:01 +0800 Subject: [PATCH 80/89] =?UTF-8?q?Fix=20=E7=A9=BA=E6=8C=87=E9=92=88=20(Null?= =?UTF-8?q?=20pointer)=20of=20case8:=20paddle.slice=20(#49979)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add check for input of slice * add unittest --- paddle/phi/infermeta/unary.cc | 15 +++++++++++++ .../fluid/tests/unittests/test_slice_op.py | 21 +++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 2b35545db1cd8..f2fcb3162081f 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -3395,6 +3395,21 @@ void SliceRawInferMeta(const MetaTensor& input, } } + PADDLE_ENFORCE_EQ( + axes.size(), + starts_arr.size(), + phi::errors::InvalidArgument( + "The length of axes (%d) and length of starts (%d) should be same.", + axes.size(), + starts_arr.size())); + PADDLE_ENFORCE_EQ( + axes.size(), + ends_arr.size(), + phi::errors::InvalidArgument( + "The length of axes (%d) and length of ends (%d) should be same.", + axes.size(), + ends_arr.size())); + // 2.1 Check attrs. std::vector starts = starts_arr.GetData(); std::vector ends = ends_arr.GetData(); diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index 19aa669badf5c..157818e794301 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -852,6 +852,27 @@ def test_axis_less_than_zero(self): paddle.slice(x, 0, starts, ends) +class TestSliceOpError(unittest.TestCase): + def test_dismatch_shape(self): + with fluid.dygraph.guard(): + with self.assertRaises(ValueError): + array = np.array([], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [0]), dtype='float32') + paddle.slice(x, axes=[0], starts=[], ends=[]) + + with self.assertRaises(ValueError): + array = np.array([], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [0]), dtype='float32') + paddle.slice(x, axes=[0], starts=[0], ends=[]) + + # if shape match, pass + array = np.array([], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [0]), dtype='float32') + out = paddle.slice(x, axes=[0], starts=[0], ends=[0]) + self.assertEqual(out.numel(), 0) + # self.assertEqual(out.shape) + + @unittest.skipIf( not core.is_compiled_with_cuda(), "core is not compiled with CUDA" ) From fd5b8eea463ba14d86c12c327deae0475aa10f0f Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 1 Feb 2023 17:15:42 +0800 Subject: [PATCH 81/89] Fix Python IndexError of case2-3 (#49986) * add shape check for fused_multi_head_attention * use raise for coverage test * add unittest * remove unnecessary pass * add unittest --- .../test_fused_attention_no_dropout.py | 12 +++++++++ .../test_fused_multi_transformer_op.py | 26 +++++++++++++++++++ .../nn/functional/fused_transformer.py | 5 ++++ 3 files changed, 43 insertions(+) diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_no_dropout.py b/python/paddle/fluid/tests/unittests/test_fused_attention_no_dropout.py index c459f2dbb22e6..4f18abd79e0fe 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_attention_no_dropout.py +++ b/python/paddle/fluid/tests/unittests/test_fused_attention_no_dropout.py @@ -192,5 +192,17 @@ def set_configs(self): self.normalize_before = True +class TestFusedAttentionAPIError(unittest.TestCase): + def test_invalid_x_rank(self): + def test_x_rank_1(): + with paddle.fluid.dygraph.guard(): + layer = FusedMultiHeadAttention(embed_dim=1, num_heads=1) + array = np.array([1.9], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [1]), dtype='float32') + out = layer(x) + + self.assertRaises(ValueError, test_x_rank_1) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py index e3da925a01e42..8068387cfdcba 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py @@ -1051,5 +1051,31 @@ def test_fused_multi_transformer_op(self): ) +class TestFusedMultiAttentionAPIError(unittest.TestCase): + def test_errors(self): + def test_invalid_input_dim(): + array = np.array([1.9], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [1]), dtype='float32') + layer = paddle.incubate.nn.FusedMultiHeadAttention( + embed_dim=1, num_heads=1 + ) + out = layer(x) + + self.assertRaises(ValueError, test_invalid_input_dim) + + +class TestFusedMultiTransformerAPIError(unittest.TestCase): + def test_errors(self): + def test_invalid_input_dim(): + array = np.array([], dtype=np.float32) + x = paddle.to_tensor(np.reshape(array, [0]), dtype='int32') + layer = paddle.incubate.nn.FusedTransformerEncoderLayer( + 108, 108, 108, 0.0, 'relu' + ) + out = layer(x) + + self.assertRaises(ValueError, test_invalid_input_dim) + + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/incubate/nn/functional/fused_transformer.py b/python/paddle/incubate/nn/functional/fused_transformer.py index 01d2161b22342..19ec0ad2458d7 100644 --- a/python/paddle/incubate/nn/functional/fused_transformer.py +++ b/python/paddle/incubate/nn/functional/fused_transformer.py @@ -615,6 +615,11 @@ def fused_multi_head_attention( 'downgrade_in_infer' if mode == 'downscale_in_infer' else mode ) # semantic transfer + if x.ndim != 3: + raise ValueError( + f"The rank of the x should be 3, but received {x.ndim}." + ) + if _non_static_mode(): if default_main_program().random_seed != 0: seed = default_main_program().random_seed From 34bf3d09b125cc6590edfd98bcb5bdeaf98f7ad3 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Wed, 1 Feb 2023 18:00:53 +0800 Subject: [PATCH 82/89] =?UTF-8?q?Fix=20UFA=E9=9D=9E=E6=B3=95=E5=9C=B0?= =?UTF-8?q?=E5=9D=80=E8=AE=BF=E9=97=AE(UFA=20illegal=20address=20access)?= =?UTF-8?q?=20of=20case3:=20paddle.crop=20(#49994)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add range check for crop_kernel * remove shape negative check * add unittest --- paddle/phi/kernels/impl/crop_kernel_impl.h | 10 ++++++++++ python/paddle/fluid/tests/unittests/test_crop_op.py | 7 +++++++ 2 files changed, 17 insertions(+) diff --git a/paddle/phi/kernels/impl/crop_kernel_impl.h b/paddle/phi/kernels/impl/crop_kernel_impl.h index d3cb672104d67..5aa951d4da09d 100644 --- a/paddle/phi/kernels/impl/crop_kernel_impl.h +++ b/paddle/phi/kernels/impl/crop_kernel_impl.h @@ -100,6 +100,16 @@ void CropTensorFunction(const Context& dev_ctx, out->Resize(out_dims); dev_ctx.template Alloc(out); for (size_t i = 0; i < offsets_vec.size(); ++i) { + PADDLE_ENFORCE_GE( + offsets_vec[i], + 0, + errors::InvalidArgument("The offsets (%d) of the %uth elements of" + " Op(crop_tensor) " + "should be greater than or " + "equal to 0.", + offsets_vec[i], + i)); + PADDLE_ENFORCE_LE(offsets_vec[i] + shape_vec[i], x_dims[i], errors::InvalidArgument( diff --git a/python/paddle/fluid/tests/unittests/test_crop_op.py b/python/paddle/fluid/tests/unittests/test_crop_op.py index 1050fb0ad5c57..f5886edc3350c 100644 --- a/python/paddle/fluid/tests/unittests/test_crop_op.py +++ b/python/paddle/fluid/tests/unittests/test_crop_op.py @@ -149,6 +149,13 @@ def test_crop_none_shape(self): self.assertEqual(crop.shape, (3, 6, 6)) +class TestCropError(unittest.TestCase): + def test_neg_offset_error(self): + with self.assertRaises(ValueError): + x = fluid.data(name='input2', shape=[1], dtype="float32") + out = paddle.crop(x, offsets=[-1]) + + if __name__ == '__main__': paddle.enable_static() unittest.main() From e6d29e0020ac7057b2bed5fef9d689d5f18642be Mon Sep 17 00:00:00 2001 From: risemeup1 <62429225+risemeup1@users.noreply.github.com> Date: Wed, 1 Feb 2023 19:58:46 +0800 Subject: [PATCH 83/89] add information of build_size (#49397) --- paddle/scripts/paddle_build.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 2c83897b16678..d389b76cfe2c9 100644 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -459,7 +459,7 @@ EOF if ls ${PADDLE_ROOT}/build/python/dist/*whl >/dev/null 2>&1; then PR_whlSize=$($com ${PADDLE_ROOT}/build/python/dist |awk '{print $1}') elif ls ${PADDLE_ROOT}/dist/*whl >/dev/null 2>&1; then - PR_whlSize=$($com ${PADDLE_ROOT}/build/python/dist |awk '{print $1}') + PR_whlSize=$($com ${PADDLE_ROOT}/dist |awk '{print $1}') fi echo "PR whl Size: $PR_whlSize" echo "ipipe_log_param_PR_whl_Size: $PR_whlSize" >> ${PADDLE_ROOT}/build/build_summary.txt @@ -3763,6 +3763,8 @@ function run_setup(){ exit 7; fi + build_size + endTime_s=`date +%s` [ -n "$startTime_firstBuild" ] && startTime_s=$startTime_firstBuild echo "Build Time: $[ $endTime_s - $startTime_s ]s" From 2b848aef55e47c514cef216af51ba7bcad1e43b7 Mon Sep 17 00:00:00 2001 From: Yuang Liu Date: Wed, 1 Feb 2023 20:10:35 +0800 Subject: [PATCH 84/89] Fused attention pass fwd, create the fused_attention op. (#50125) --- .../framework/ir/fused_attention_pass.cc | 246 +++++++++++++++--- .../fluid/framework/ir/fused_attention_pass.h | 24 +- .../unittests/test_fused_attention_pass.py | 14 +- 3 files changed, 231 insertions(+), 53 deletions(-) diff --git a/paddle/fluid/framework/ir/fused_attention_pass.cc b/paddle/fluid/framework/ir/fused_attention_pass.cc index 72fa90db9b15a..7b0f469ff87cd 100644 --- a/paddle/fluid/framework/ir/fused_attention_pass.cc +++ b/paddle/fluid/framework/ir/fused_attention_pass.cc @@ -22,7 +22,6 @@ namespace patterns { PDNode* FusedAttentionPattern::operator()(PDNode* x, bool pre_layer_norm, - bool post_layer_norm, bool has_attn_mask, bool do_dropout, bool add_residual) { @@ -259,7 +258,7 @@ PDNode* FusedAttentionPattern::operator()(PDNode* x, out_linear_dropout_node->LinksFrom({out_linear_ele_add_out_node}) .LinksTo({out_linear_dropout_mask_node, out_linear_dropout_out_node}); - if (!add_residual && !post_layer_norm) { + if (!add_residual && pre_layer_norm) { return out_linear_dropout_out_node; } @@ -276,7 +275,7 @@ PDNode* FusedAttentionPattern::operator()(PDNode* x, residual_ele_add_node->LinksFrom({x, out_linear_dropout_out_node}) .LinksTo({residual_ele_add_out_node}); - if (!post_layer_norm) { + if (pre_layer_norm) { return residual_ele_add_out_node; } } @@ -323,13 +322,12 @@ PDNode* FusedAttentionPattern::operator()(PDNode* x, PDNode* FusedAttentionGradPattern::operator()(PDNode* x, bool pre_layer_norm, - bool post_layer_norm, bool has_attn_mask, bool do_dropout, bool add_residual) { // post layer norm PDNode* post_layer_norm_grad_out_node{nullptr}; - if (post_layer_norm) { + if (!pre_layer_norm) { auto* post_layer_norm_grad_node = pattern->NewNode(post_layer_norm_grad_op_repr()) ->assert_is_op("layer_norm_grad"); @@ -375,7 +373,7 @@ PDNode* FusedAttentionGradPattern::operator()(PDNode* x, PDNode* residual_ele_add_grad_x_grad_node{nullptr}; if (add_residual) { PDNode* ele_add_grad_input = x; - if (post_layer_norm) { + if (!pre_layer_norm) { ele_add_grad_input = post_layer_norm_grad_out_node; } auto* residual_ele_add_grad_node = @@ -404,7 +402,7 @@ PDNode* FusedAttentionGradPattern::operator()(PDNode* x, // get the real input x for dropout grad PDNode* out_linear_grad_input_node = x; - if (post_layer_norm && !add_residual) { + if (!pre_layer_norm && !add_residual) { out_linear_grad_input_node = post_layer_norm_grad_out_node; } else if (add_residual) { out_linear_grad_input_node = residual_ele_add_grad_out_node; @@ -769,11 +767,11 @@ PDNode* FusedAttentionGradPattern::operator()(PDNode* x, void FusedAttentionsPass::ApplyImpl(Graph* graph) const { FusePassBase::Init(name_scope_, graph); - graph = PreMaskDropResPostFwd(graph); - graph = PreMaskDropResPostBwd(graph); + graph = PreMaskDropResFwd(graph); + graph = PreMaskDropResBwd(graph); } -ir::Graph* FusedAttentionsPass::PreMaskDropResPostFwd(Graph* graph) const { +ir::Graph* FusedAttentionsPass::PreMaskDropResFwd(Graph* graph) const { GraphPatternDetector gpd; auto* x = gpd.mutable_pattern() ->NewNode(patterns::PDNodeName(name_scope_, "x")) @@ -784,7 +782,6 @@ ir::Graph* FusedAttentionsPass::PreMaskDropResPostFwd(Graph* graph) const { fused_attention_pattern(x, /* pre_layer_norm */ true, - /* post_layer_norm */ true, /* has_attn_mask */ true, /* do_dropout */ true, /* add_residual */ true); @@ -835,10 +832,191 @@ ir::Graph* FusedAttentionsPass::PreMaskDropResPostFwd(Graph* graph) const { fused_attention_pattern); GET_IR_NODE_FROM_SUBGRAPH( residual_ele_add_op_node, residual_ele_add_op, fused_attention_pattern); + + OpDesc fused_attention_op_desc(pre_layer_norm_op_node->Op()->Block()); + fused_attention_op_desc.SetType("fused_attention"); + fused_attention_op_desc.SetInput("X", {subgraph.at(x)->Name()}); + + fused_attention_op_desc.SetAttr("pre_layer_norm", true); + GET_IR_NODE_FROM_SUBGRAPH(pre_layer_norm_scale_node, + pre_layer_norm_scale, + fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + pre_layer_norm_bias_node, pre_layer_norm_bias, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + pre_layer_norm_out_node, pre_layer_norm_out, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + pre_layer_norm_mean_node, pre_layer_norm_mean, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(pre_layer_norm_variance_node, + pre_layer_norm_variance, + fused_attention_pattern); + fused_attention_op_desc.SetInput("LnScale", + {pre_layer_norm_scale_node->Name()}); + fused_attention_op_desc.SetInput("LnBias", + {pre_layer_norm_bias_node->Name()}); + fused_attention_op_desc.SetOutput("LnOut", + {pre_layer_norm_out_node->Name()}); + fused_attention_op_desc.SetOutput("LnMean", + {pre_layer_norm_mean_node->Name()}); + fused_attention_op_desc.SetOutput("LnVariance", + {pre_layer_norm_variance_node->Name()}); + fused_attention_op_desc.SetAttr( + "epsilon", + PADDLE_GET_CONST(float, + pre_layer_norm_op_node->Op()->GetAttr("epsilon"))); + + fused_attention_op_desc.SetAttr("transpose_qkv_wb", true); + std::vector shape = PADDLE_GET_CONST( + std::vector, fuse_qkv_reshape_op_node->Op()->GetAttr("shape")); + fused_attention_op_desc.SetAttr("num_heads", shape[2]); GET_IR_NODE_FROM_SUBGRAPH( - post_layer_norm_op_node, post_layer_norm_op, fused_attention_pattern); + fuse_qkv_matmul_w_node, fuse_qkv_matmul_w, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + fuse_qkv_matmul_out_node, fuse_qkv_matmul_out, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_ele_add_bias_node, + fuse_qkv_ele_add_bias, + fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_ele_add_out_node, + fuse_qkv_ele_add_out, + fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(fuse_qkv_transpose_out_node, + fuse_qkv_transpose_out, + fused_attention_pattern); + fused_attention_op_desc.SetInput("QKVW", {fuse_qkv_matmul_w_node->Name()}); + fused_attention_op_desc.SetInput("QKVBias", + {fuse_qkv_ele_add_bias_node->Name()}); + fused_attention_op_desc.SetOutput("QKVOut", + {fuse_qkv_matmul_out_node->Name()}); + fused_attention_op_desc.SetOutput("QKVBiasOut", + {fuse_qkv_ele_add_out_node->Name()}); + fused_attention_op_desc.SetOutput("TransposeOut2", + {fuse_qkv_transpose_out_node->Name()}); - // TODO(Yuang Liu): finish the handler + GET_IR_NODE_FROM_SUBGRAPH( + qk_matmul_out_node, qk_matmul_out, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(add_mask_ele_add_mask_node, + add_mask_ele_add_mask, + fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(add_mask_ele_add_out_node, + add_mask_ele_add_out, + fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + qk_softmax_out_node, qk_softmax_out, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + attn_dropout_out_node, attn_dropout_out, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + attn_dropout_mask_node, attn_dropout_mask, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + qkv_matmul_out_node, qkv_matmul_out, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH( + qkv_reshape_out_node, qkv_reshape_out, fused_attention_pattern); + fused_attention_op_desc.SetOutput("QKOut", {qk_matmul_out_node->Name()}); + fused_attention_op_desc.SetInput("SrcMask", + {add_mask_ele_add_mask_node->Name()}); + fused_attention_op_desc.SetOutput("SrcMaskOut", + {add_mask_ele_add_out_node->Name()}); + fused_attention_op_desc.SetOutput("SoftmaxOut", + {qk_softmax_out_node->Name()}); + fused_attention_op_desc.SetAttr( + "attn_dropout_rate", + PADDLE_GET_CONST(float, + attn_dropout_op_node->Op()->GetAttr("dropout_prob"))); + fused_attention_op_desc.SetAttr( + "is_test", + PADDLE_GET_CONST(bool, attn_dropout_op_node->Op()->GetAttr("is_test"))); + fused_attention_op_desc.SetAttr( + "attn_dropout_fix_seed", + PADDLE_GET_CONST(bool, + attn_dropout_op_node->Op()->GetAttr("fix_seed"))); + fused_attention_op_desc.SetAttr( + "attn_dropout_seed", + PADDLE_GET_CONST(int, attn_dropout_op_node->Op()->GetAttr("seed"))); + fused_attention_op_desc.SetAttr( + "attn_dropout_implementation", + PADDLE_GET_CONST( + std::string, + attn_dropout_op_node->Op()->GetAttr("dropout_implementation"))); + fused_attention_op_desc.SetOutput("AttnDropoutMaskOut", + {attn_dropout_mask_node->Name()}); + fused_attention_op_desc.SetOutput("AttnDropoutOut", + {attn_dropout_out_node->Name()}); + fused_attention_op_desc.SetOutput("QKTVOut", {qkv_matmul_out_node->Name()}); + fused_attention_op_desc.SetOutput("FMHAOut", + {qkv_reshape_out_node->Name()}); + + GET_IR_NODE_FROM_SUBGRAPH( + out_linear_matmul_w_node, out_linear_matmul_w, fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(out_linear_matmul_out_node, + out_linear_matmul_out, + fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(out_linear_ele_add_bias_node, + out_linear_ele_add_bias, + fused_attention_pattern); + GET_IR_NODE_FROM_SUBGRAPH(out_linear_ele_add_out_node, + out_linear_ele_add_out, + fused_attention_pattern); + fused_attention_op_desc.SetInput("OutLinearW", + {out_linear_matmul_w_node->Name()}); + fused_attention_op_desc.SetInput("OutLinearBias", + {out_linear_ele_add_bias_node->Name()}); + fused_attention_op_desc.SetOutput("OutLinearOut", + {out_linear_matmul_out_node->Name()}); + GET_IR_NODE_FROM_SUBGRAPH(out_linear_dropout_mask_node, + out_linear_dropout_mask, + fused_attention_pattern); + fused_attention_op_desc.SetAttr( + "dropout_rate", + PADDLE_GET_CONST( + float, out_linear_dropout_op_node->Op()->GetAttr("dropout_prob"))); + fused_attention_op_desc.SetAttr( + "dropout_fix_seed", + PADDLE_GET_CONST( + bool, out_linear_dropout_op_node->Op()->GetAttr("fix_seed"))); + fused_attention_op_desc.SetAttr( + "dropout_seed", + PADDLE_GET_CONST(int, + out_linear_dropout_op_node->Op()->GetAttr("seed"))); + fused_attention_op_desc.SetAttr( + "dropout_implementation", + PADDLE_GET_CONST(std::string, + out_linear_dropout_op_node->Op()->GetAttr( + "dropout_implementation"))); + fused_attention_op_desc.SetOutput("DropoutMaskOut", + {out_linear_dropout_mask_node->Name()}); + + GET_IR_NODE_FROM_SUBGRAPH(residual_ele_add_out_node, + residual_ele_add_out, + fused_attention_pattern); + fused_attention_op_desc.SetAttr("add_residual", true); + fused_attention_op_desc.SetOutput("Y", {residual_ele_add_out_node->Name()}); + + auto fused_attention_node = g->CreateOpNode(&fused_attention_op_desc); + + IR_NODE_LINK_TO(subgraph.at(x), fused_attention_node); + IR_NODE_LINK_TO(pre_layer_norm_scale_node, fused_attention_node); + IR_NODE_LINK_TO(pre_layer_norm_bias_node, fused_attention_node); + IR_NODE_LINK_TO(fuse_qkv_matmul_w_node, fused_attention_node); + IR_NODE_LINK_TO(fuse_qkv_ele_add_bias_node, fused_attention_node); + IR_NODE_LINK_TO(add_mask_ele_add_mask_node, fused_attention_node); + IR_NODE_LINK_TO(out_linear_matmul_w_node, fused_attention_node); + IR_NODE_LINK_TO(out_linear_ele_add_bias_node, fused_attention_node); + + IR_NODE_LINK_TO(fused_attention_node, pre_layer_norm_out_node); + IR_NODE_LINK_TO(fused_attention_node, pre_layer_norm_mean_node); + IR_NODE_LINK_TO(fused_attention_node, pre_layer_norm_variance_node); + IR_NODE_LINK_TO(fused_attention_node, fuse_qkv_matmul_out_node); + IR_NODE_LINK_TO(fused_attention_node, fuse_qkv_ele_add_out_node); + IR_NODE_LINK_TO(fused_attention_node, fuse_qkv_transpose_out_node); + IR_NODE_LINK_TO(fused_attention_node, qk_matmul_out_node); + IR_NODE_LINK_TO(fused_attention_node, add_mask_ele_add_out_node); + IR_NODE_LINK_TO(fused_attention_node, qk_softmax_out_node); + IR_NODE_LINK_TO(fused_attention_node, attn_dropout_mask_node); + IR_NODE_LINK_TO(fused_attention_node, attn_dropout_out_node); + IR_NODE_LINK_TO(fused_attention_node, qkv_matmul_out_node); + IR_NODE_LINK_TO(fused_attention_node, qkv_reshape_out_node); + IR_NODE_LINK_TO(fused_attention_node, out_linear_matmul_out_node); + IR_NODE_LINK_TO(fused_attention_node, out_linear_dropout_mask_node); + IR_NODE_LINK_TO(fused_attention_node, residual_ele_add_out_node); GraphSafeRemoveNodes(g, {pre_layer_norm_op_node, @@ -858,8 +1036,7 @@ ir::Graph* FusedAttentionsPass::PreMaskDropResPostFwd(Graph* graph) const { out_linear_matmul_op_node, out_linear_ele_add_op_node, out_linear_dropout_op_node, - residual_ele_add_op_node, - post_layer_norm_op_node}); + residual_ele_add_op_node}); found_fused_attention++; }; @@ -869,18 +1046,17 @@ ir::Graph* FusedAttentionsPass::PreMaskDropResPostFwd(Graph* graph) const { return graph; } -ir::Graph* FusedAttentionsPass::PreMaskDropResPostBwd(Graph* graph) const { +ir::Graph* FusedAttentionsPass::PreMaskDropResBwd(Graph* graph) const { GraphPatternDetector gpd; auto* x = gpd.mutable_pattern() ->NewNode(patterns::PDNodeName(name_scope_, "x")) ->AsInput() - ->assert_is_op_input("layer_norm_grad", "Y@GRAD"); + ->assert_is_op_input("elementwise_add_grad", "Out@GRAD"); patterns::FusedAttentionGradPattern fused_attention_grad_pattern( gpd.mutable_pattern(), "fused_attention_grad_pattern"); fused_attention_grad_pattern(x, /* pre_layer_norm */ true, - /* post_layer_norm */ true, /* has_attn_mask */ true, /* do_dropout */ true, /* add_residual */ true); @@ -891,9 +1067,6 @@ ir::Graph* FusedAttentionsPass::PreMaskDropResPostBwd(Graph* graph) const { Graph* g) { VLOG(3) << "handle FusedMultiHeadAttention backward pass's fusion"; - GET_IR_NODE_FROM_SUBGRAPH(post_layer_norm_grad_op_node, - post_layer_norm_grad_op, - fused_attention_grad_pattern); GET_IR_NODE_FROM_SUBGRAPH(residual_ele_add_grad_op_node, residual_ele_add_grad_op, fused_attention_grad_pattern); @@ -953,17 +1126,26 @@ ir::Graph* FusedAttentionsPass::PreMaskDropResPostBwd(Graph* graph) const { // TODO(Yuang Liu): finish the handler - GraphSafeRemoveNodes( - g, {post_layer_norm_grad_op_node, residual_ele_add_grad_op_node, - out_linear_dropout_grad_op_node, out_linear_ele_add_grad_op_node, - out_linear_matmul_grad_op_node, qkv_reshape_grad_op_node, - qkv_transpose_grad_op_node, qkv_matmul_grad_op_node, - attn_dropout_grad_op_node, qk_softmax_grad_op_node, - add_mask_ele_add_grad_op_node, qk_scale_grad_op_node, - qk_matmul_grad_op_node, fuse_qkv_split_grad_op_node, - fuse_qkv_transpose_grad_op_node, fuse_qkv_reshape_grad_op_node, - fuse_qkv_ele_add_grad_op_node, fuse_qkv_matmul_grad_op_node, - pre_layer_norm_grad_op_node, grad_accumulation_sum_op_node}); + GraphSafeRemoveNodes(g, + {residual_ele_add_grad_op_node, + out_linear_dropout_grad_op_node, + out_linear_ele_add_grad_op_node, + out_linear_matmul_grad_op_node, + qkv_reshape_grad_op_node, + qkv_transpose_grad_op_node, + qkv_matmul_grad_op_node, + attn_dropout_grad_op_node, + qk_softmax_grad_op_node, + add_mask_ele_add_grad_op_node, + qk_scale_grad_op_node, + qk_matmul_grad_op_node, + fuse_qkv_split_grad_op_node, + fuse_qkv_transpose_grad_op_node, + fuse_qkv_reshape_grad_op_node, + fuse_qkv_ele_add_grad_op_node, + fuse_qkv_matmul_grad_op_node, + pre_layer_norm_grad_op_node, + grad_accumulation_sum_op_node}); found_fused_attention++; }; diff --git a/paddle/fluid/framework/ir/fused_attention_pass.h b/paddle/fluid/framework/ir/fused_attention_pass.h index d360f7f6520d1..41a90bd59960f 100644 --- a/paddle/fluid/framework/ir/fused_attention_pass.h +++ b/paddle/fluid/framework/ir/fused_attention_pass.h @@ -28,7 +28,7 @@ namespace patterns { // Declare patterns for multi head attention. // Can detect: -// 1. Pre layer norm, post layer norm or sandwich layer norm. +// 1. Pre layer norm or post layer norm. // 2. Add attn mask for qk product before the softmax or not. // 3. Do attn dropout or not. // 4. Add residual to the out linear result or not. @@ -37,11 +37,10 @@ struct FusedAttentionPattern : public PatternBase { : PatternBase(pattern, name_scope, "fused_attention_pattern") {} PDNode* operator()(PDNode* x, - bool pre_layer_norm, // do pre ln or not - bool post_layer_norm, // do post ln or not - bool has_attn_mask, // add attn mask to qk or not - bool do_dropout, // dropout the softmax(qk) or not - bool add_residual); // add residual to out linear or not + bool pre_layer_norm, // do pre ln or not + bool has_attn_mask, // add attn mask to qk or not + bool do_dropout, // dropout the softmax(qk) or not + bool add_residual); // add residual to out linear or not // pre layer norm PATTERN_DECL_NODE(pre_layer_norm_op); @@ -134,11 +133,10 @@ struct FusedAttentionGradPattern : public PatternBase { : PatternBase(pattern, name_scope, "fused_attention_pattern") {} PDNode* operator()(PDNode* x, - bool pre_layer_norm, // pre ln - bool post_layer_norm, // post ln - bool has_attn_mask, // add attn mask to qk or not - bool do_dropout, // dropout the softmax(qk) or not - bool add_residual); // add residual to out linear or not + bool pre_layer_norm, // pre ln + bool has_attn_mask, // add attn mask to qk or not + bool do_dropout, // dropout the softmax(qk) or not + bool add_residual); // add residual to out linear or not // post layer norm grad PATTERN_DECL_NODE(post_layer_norm_grad_op); @@ -275,9 +273,9 @@ class FusedAttentionsPass : public FusePassBase { // If true, the function name will have an abbreviation part. // If false, the function name won't contain an abbreviation for it. - ir::Graph* PreMaskDropResPostFwd(Graph* graph) const; + ir::Graph* PreMaskDropResFwd(Graph* graph) const; - ir::Graph* PreMaskDropResPostBwd(Graph* graph) const; + ir::Graph* PreMaskDropResBwd(Graph* graph) const; }; } // namespace ir diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py index cce05d8747cdf..12366a574db21 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fused_attention_pass.py @@ -31,7 +31,6 @@ def __init__( num_heads, add_residual=True, pre_ln=True, - post_ln=False, attn_dropout=True, ): super(MultiHeadAttention, self).__init__() @@ -42,7 +41,6 @@ def __init__( self.add_residual = add_residual self.pre_ln = pre_ln - self.post_ln = post_ln self.attn_dropout = attn_dropout self.head_dim = embed_dim // num_heads @@ -90,7 +88,7 @@ def forward(self, x, attn_mask=None): if self.add_residual: out = residual + out - if self.post_ln: + if not self.pre_ln: # post layer norm out = self.norm2(out) @@ -104,7 +102,6 @@ class TestFusedAttentionPass(unittest.TestCase): def setUp(self): self.add_residual = True self.pre_ln = True - self.post_ln = True self.attn_dropout = True self.add_mask = True @@ -120,6 +117,7 @@ def test_pass(self): ).astype('float32') main_prog = paddle.static.Program() + main_prog.random_seed = 1234 startup_prog = paddle.static.Program() with paddle.static.program_guard(main_prog, startup_prog): @@ -142,7 +140,6 @@ def test_pass(self): num_heads, add_residual=self.add_residual, pre_ln=self.pre_ln, - post_ln=self.post_ln, attn_dropout=self.attn_dropout, ) @@ -157,13 +154,14 @@ def test_pass(self): pass_manager.apply([main_prog], [startup_prog]) ops = main_prog.global_block().ops - assert ops[2].type == 'reduce_mean' - assert ops[4].type == 'reduce_mean_grad' + assert ops[2].type == 'fused_attention' + assert ops[3].type == 'reduce_mean' + assert ops[5].type == 'reduce_mean_grad' # two ops for linear, one op for reduce mean # one fill constant # one op for reduce mean grad, two ops for linear bwd # the eighth op should be the optimizer - assert ops[7].type == 'sgd' + assert ops[8].type == 'sgd' if __name__ == "__main__": From 6edc7bba6ab192595ec860c3b5034e6bed92110a Mon Sep 17 00:00:00 2001 From: zqw_1997 <118182234+zhengqiwen1997@users.noreply.github.com> Date: Wed, 1 Feb 2023 21:38:27 +0800 Subject: [PATCH 85/89] remove fluid.initializer.UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, BilinearInitializer, MSRAInitializer, NumpyArrayInitializer and calculate_gain.. (#49498) * move UniformInitializer and ConstantInitializer * more modify * circular import resolved * another circular import resolved? * more circular import 2 * circular import 3 * change import paddle in metric.py * BuildStrategy import from fluid * modify the framework import path in common.py * change rnn.py import, from static to original framework * change import static in the nn folder * default_main_program should import from common_ops_import * add import paddle in param_attr.py * use core not paddle module for using VarDesc * another old uniform * mistake that use Uniform instead of UniformInitializer * modify UniformInitializer doc * move fluid.NormalInitializer to nn.initializer.NormalInitializer * remove import of Normal in fluid.layers.nn.py * remove more import of old Normal * remove more import of old Normal * sample code modify and tests modify import * is_listen_failed passing arg should be log file * problem solved * a mistake solved * comments resoleved and remove paddle.fluid.initializer.TruncatedNormalInitializer * remove paddle.fluid.initializer.XavierInitializer and paddle.fluid.initializer.MSRAInitializer * remove paddle.fluid.initializer.BilinearInitializer NumpyArrayInitializer and set_global_initializer * change fluid to static * change static to fluid to avoid circular import in distributed_strategy.py * fix example code and test_initializer * ValueType * sample code fix * change set_global_initializer back to fluid * put paddle.static.BuildStrategy.ReduceStrategy into the fuction to avoid circular import * remove calculate_gain, delete BilinearInitializer and revert set_global_initializer * change the time of using UniformInitializer, ConstantInitializer, NormalInitializer, TruncatedNormalInitializer, XavierInitializer, MSRAInitializer, NumpyArrayInitializer as few as possible * fix argument incampatible * fix more arg incompatible * fix test_prelu_op_xpu.py Constant * fix inaccurate doc * more doc fix: default value --- python/paddle/common_ops_import.py | 1 - .../fleet/base/distributed_strategy.py | 3 +- .../distributed/fleet/layers/mpu/random.py | 2 +- .../fleet/meta_optimizers/dgc_optimizer.py | 4 +- .../distributed/fleet/metrics/metric.py | 2 +- python/paddle/fluid/compiler.py | 6 +- .../paddle/fluid/contrib/layers/metric_op.py | 6 +- python/paddle/fluid/contrib/layers/nn.py | 32 +- python/paddle/fluid/evaluator.py | 1 - .../incubate/fleet/tests/fleet_deep_ctr.py | 6 +- python/paddle/fluid/initializer.py | 1218 +---------------- python/paddle/fluid/install_check.py | 5 +- python/paddle/fluid/layer_helper.py | 7 +- python/paddle/fluid/layers/io.py | 2 +- python/paddle/fluid/layers/nn.py | 8 +- python/paddle/fluid/metrics.py | 1 - python/paddle/fluid/optimizer.py | 20 +- python/paddle/fluid/param_attr.py | 13 +- .../unittests/auto_parallel_autoconvert.py | 9 +- .../unittests/auto_parallel_save_load.py | 5 +- .../collective/column_parallel_linear_api.py | 8 +- .../fleet/parallel_dygraph_transformer.py | 10 +- .../collective/fleet/pipeline_mnist.py | 8 +- .../fleet/pipeline_mnist_multi_device.py | 8 +- .../fleet/pipeline_mnist_one_device.py | 6 +- .../fleet/static_model_parallel_by_col.py | 6 +- .../fleet/static_model_parallel_by_row.py | 8 +- .../fleet/static_model_parallel_embedding.py | 6 +- .../multinode/dygraph_hybrid_dpppmp.py | 2 +- .../multinode/dygraph_hybrid_fp16.py | 2 +- .../multinode/dygraph_hybrid_recompute.py | 2 +- .../collective/parallel_embedding_api.py | 4 +- .../collective/row_parallel_linear_api.py | 4 +- .../tests/unittests/dist_allreduce_op.py | 6 +- .../paddle/fluid/tests/unittests/dist_ctr.py | 6 +- .../fluid/tests/unittests/dist_fleet_ctr.py | 6 +- .../dist_fleet_heter_pipeline_ctr.py | 6 +- .../dist_fleet_raw_program_optimizer.py | 6 +- ...et_raw_program_optimizer_fuse_allreduce.py | 6 +- .../tests/unittests/dist_fleet_simnet_bow.py | 16 +- .../dist_fleet_sparse_embedding_ctr.py | 10 +- .../fluid/tests/unittests/dist_mnist.py | 6 +- .../fluid/tests/unittests/dist_se_resnext.py | 8 +- .../unittests/dist_text_classification.py | 8 +- .../fluid/tests/unittests/dist_transformer.py | 16 +- .../fluid/tests/unittests/dist_word2vec.py | 12 +- .../test_auto_parallel_gradient_merge_pass.py | 9 +- .../dygraph_to_static/bert_dygraph_model.py | 10 +- .../unittests/dygraph_to_static/darknet.py | 6 +- .../seq2seq_dygraph_model.py | 6 +- .../dygraph_to_static/simnet_dygraph_model.py | 5 +- .../simnet_dygraph_model_v2.py | 2 +- .../test_basic_api_transformation.py | 14 +- .../unittests/dygraph_to_static/test_bmn.py | 4 +- .../dygraph_to_static/test_convert_call.py | 4 +- .../dygraph_to_static/test_cycle_gan.py | 20 +- .../unittests/dygraph_to_static/test_lac.py | 12 +- .../dygraph_to_static/test_mobile_net.py | 7 +- .../dygraph_to_static/test_ptb_lm.py | 14 +- .../dygraph_to_static/test_resnet.py | 2 +- .../dygraph_to_static/test_se_resnet.py | 6 +- .../dygraph_to_static/test_word2vec.py | 4 +- .../transformer_dygraph_model.py | 10 +- .../unittests/dygraph_to_static/yolov3.py | 4 +- .../unittests/fleet_heter_ps_training.py | 6 +- .../unittests/ir/inference/program_config.py | 3 +- .../test_mkldnn_conv_bias_fuse_pass.py | 12 +- .../ir/inference/test_trt_subgraph_pass.py | 4 +- .../mlu/test_batch_norm_op_mlu_v2.py | 6 +- .../tests/unittests/npu/test_adam_op_npu.py | 4 +- .../unittests/npu/test_run_program_op_npu.py | 4 +- .../parallel_dygraph_sparse_embedding.py | 6 +- .../fluid/tests/unittests/simple_nets.py | 4 +- .../static_model_parallel_fused_attention.py | 6 +- ...static_model_parallel_fused_feedforward.py | 6 +- ..._model_parallel_fused_multi_transformer.py | 6 +- .../fluid/tests/unittests/test_adam_op.py | 6 +- .../unittests/test_auto_parallel_mapper.py | 17 +- .../test_avoid_twice_initialization.py | 5 +- .../fluid/tests/unittests/test_base_layer.py | 2 +- .../tests/unittests/test_batch_norm_op_v2.py | 6 +- .../tests/unittests/test_calc_gradient.py | 4 +- .../tests/unittests/test_communicator_geo.py | 2 +- .../tests/unittests/test_conv2d_layer.py | 5 +- .../unittests/test_conv2d_transpose_layer.py | 5 +- .../tests/unittests/test_conv3d_layer.py | 5 +- .../unittests/test_conv3d_transpose_layer.py | 5 +- .../tests/unittests/test_cuda_random_seed.py | 8 +- .../unittests/test_decoupled_py_reader.py | 2 +- .../fluid/tests/unittests/test_desc_clone.py | 4 +- .../fluid/tests/unittests/test_detach.py | 4 +- ..._dist_fleet_a_sync_optimizer_auto_async.py | 2 +- .../test_dist_fleet_heter_program.py | 18 +- .../unittests/test_dist_fleet_minimize.py | 12 +- .../tests/unittests/test_dist_fleet_ps.py | 12 +- .../tests/unittests/test_dist_fleet_ps11.py | 12 +- .../tests/unittests/test_dist_fleet_ps12.py | 12 +- .../tests/unittests/test_dist_fleet_ps13.py | 12 +- .../tests/unittests/test_dist_fleet_ps2.py | 12 +- .../tests/unittests/test_dist_fleet_ps3.py | 12 +- .../tests/unittests/test_dist_fleet_ps4.py | 12 +- .../tests/unittests/test_dist_fleet_ps5.py | 12 +- .../tests/unittests/test_dist_fleet_ps6.py | 12 +- .../test_dist_fleet_sparse_embedding_ctr.py | 6 +- .../tests/unittests/test_dist_fleet_spmt.py | 12 +- .../unittests/test_dist_sparse_load_ps0.py | 8 +- .../fluid/tests/unittests/test_dist_train.py | 12 +- .../tests/unittests/test_dist_transpiler.py | 20 +- .../test_eager_deletion_delete_vars.py | 2 +- .../test_eager_deletion_padding_rnn.py | 14 +- .../test_eager_deletion_recurrent_op.py | 4 +- .../tests/unittests/test_egr_python_api.py | 6 +- .../tests/unittests/test_functional_conv2d.py | 9 +- .../test_functional_conv2d_transpose.py | 9 +- .../tests/unittests/test_functional_conv3d.py | 9 +- .../test_functional_conv3d_transpose.py | 9 +- .../tests/unittests/test_fuse_bn_act_pass.py | 4 +- .../unittests/test_fuse_bn_add_act_pass.py | 14 +- .../test_fused_multi_transformer_op.py | 10 +- .../unittests/test_generator_dataloader.py | 2 +- .../fluid/tests/unittests/test_hsigmoid_op.py | 15 +- .../tests/unittests/test_imperative_deepcf.py | 2 +- ..._imperative_lod_tensor_to_selected_rows.py | 4 +- .../test_imperative_ocr_attention_model.py | 9 +- .../unittests/test_imperative_ptb_rnn.py | 14 +- .../tests/unittests/test_imperative_resnet.py | 2 +- .../unittests/test_imperative_save_load_v2.py | 14 +- ..._imperative_selected_rows_to_lod_tensor.py | 6 +- ..._imperative_transformer_sorted_gradient.py | 10 +- .../fluid/tests/unittests/test_initializer.py | 73 +- .../tests/unittests/test_ir_inplace_pass.py | 2 +- .../fluid/tests/unittests/test_layers.py | 66 +- .../fluid/tests/unittests/test_linear.py | 4 +- .../unittests/test_lookup_table_bf16_op.py | 2 +- .../unittests/test_lookup_table_v2_bf16_op.py | 2 +- .../unittests/test_lookup_table_v2_op.py | 4 +- ...cess_dataloader_iterable_dataset_static.py | 4 +- .../test_multiprocess_dataloader_static.py | 4 +- .../paddle/fluid/tests/unittests/test_nce.py | 5 +- .../test_nn_functional_embedding_static.py | 4 +- .../tests/unittests/test_optimizer_grad.py | 6 +- .../test_optimizer_in_control_flow.py | 8 +- .../unittests/test_parallel_executor_mnist.py | 4 +- .../fluid/tests/unittests/test_parameter.py | 3 +- .../fluid/tests/unittests/test_prelu_op.py | 4 +- .../unittests/test_program_prune_backward.py | 2 +- .../fluid/tests/unittests/test_prune.py | 6 +- .../fluid/tests/unittests/test_py_func_op.py | 2 +- .../fluid/tests/unittests/test_random_seed.py | 8 +- .../tests/unittests/test_recurrent_op.py | 8 +- .../fluid/tests/unittests/test_row_conv_op.py | 2 +- .../tests/unittests/test_run_program_op.py | 6 +- .../tests/unittests/test_set_bool_attr.py | 4 +- .../fluid/tests/unittests/test_sgd_op_bf16.py | 2 +- .../tests/unittests/test_static_save_load.py | 14 +- .../tests/unittests/test_tdm_child_op.py | 4 +- .../tests/unittests/test_tdm_sampler_op.py | 6 +- .../tests/unittests/test_uniform_random_op.py | 2 +- .../unittests/test_weight_normalization.py | 3 +- .../tests/unittests/transformer_model.py | 25 +- .../unittests/xpu/test_batch_norm_op_xpu.py | 2 +- .../test_fused_resnet_basic_block_op_xpu.py | 36 +- .../tests/unittests/xpu/test_prelu_op_xpu.py | 2 +- .../fluid/transpiler/distribute_transpiler.py | 4 +- python/paddle/incubate/asp/asp.py | 5 +- python/paddle/nn/decode.py | 2 +- python/paddle/nn/functional/common.py | 2 +- python/paddle/nn/functional/conv.py | 2 +- python/paddle/nn/functional/extension.py | 2 +- python/paddle/nn/functional/input.py | 2 +- python/paddle/nn/functional/loss.py | 2 +- python/paddle/nn/functional/vision.py | 2 +- python/paddle/nn/initializer/Bilinear.py | 182 +++ python/paddle/nn/initializer/__init__.py | 13 +- python/paddle/nn/initializer/assign.py | 120 +- python/paddle/nn/initializer/constant.py | 65 +- python/paddle/nn/initializer/dirac.py | 2 +- python/paddle/nn/initializer/initializer.py | 159 +++ python/paddle/nn/initializer/kaiming.py | 188 ++- python/paddle/nn/initializer/normal.py | 193 ++- python/paddle/nn/initializer/orthogonal.py | 2 +- python/paddle/nn/initializer/uniform.py | 133 +- python/paddle/nn/initializer/xavier.py | 182 ++- python/paddle/nn/layer/rnn.py | 9 +- python/paddle/optimizer/optimizer.py | 9 +- python/paddle/static/nn/common.py | 14 +- python/paddle/static/nn/loss.py | 4 +- python/paddle/static/nn/metric.py | 5 +- python/paddle/tensor/array.py | 2 +- python/paddle/tensor/attribute.py | 2 +- python/paddle/tensor/creation.py | 8 +- .../paddle/tensor/layer_function_generator.py | 2 +- python/paddle/tensor/linalg.py | 2 +- python/paddle/tensor/logic.py | 2 +- python/paddle/tensor/manipulation.py | 3 +- python/paddle/tensor/math.py | 2 +- python/paddle/tensor/random.py | 2 +- python/paddle/tensor/stat.py | 2 +- python/paddle/vision/ops.py | 4 +- 199 files changed, 1927 insertions(+), 1925 deletions(-) create mode 100644 python/paddle/nn/initializer/Bilinear.py create mode 100644 python/paddle/nn/initializer/initializer.py mode change 100755 => 100644 python/paddle/tensor/logic.py diff --git a/python/paddle/common_ops_import.py b/python/paddle/common_ops_import.py index 91a3f49cdbba2..1ec54064eb64e 100644 --- a/python/paddle/common_ops_import.py +++ b/python/paddle/common_ops_import.py @@ -32,7 +32,6 @@ dygraph_only, in_dygraph_mode, ) -from paddle.fluid.initializer import Constant # noqa: F401 from paddle.fluid.layer_helper import LayerHelper # noqa: F401 from paddle.fluid.layers import fill_constant, utils # noqa: F401 from paddle.fluid.layers.layer_function_generator import ( # noqa: F401 diff --git a/python/paddle/distributed/fleet/base/distributed_strategy.py b/python/paddle/distributed/fleet/base/distributed_strategy.py index efa94862b5246..fbe391b45f055 100755 --- a/python/paddle/distributed/fleet/base/distributed_strategy.py +++ b/python/paddle/distributed/fleet/base/distributed_strategy.py @@ -104,7 +104,6 @@ def _set_distributed_strategy(self, dist_strategy): self.job_info.strategy = dist_strategy -ReduceStrategyFluid = paddle.static.BuildStrategy.ReduceStrategy ReduceStrategyFleet = int @@ -261,7 +260,7 @@ def build_strategy(self): for f in fields: value = getattr(self.strategy.build_strategy, f.name) if f.name == 'reduce_strategy': - value = ReduceStrategyFluid(value) + value = paddle.static.BuildStrategy.ReduceStrategy(value) setattr(build_strategy, f.name, value) return build_strategy diff --git a/python/paddle/distributed/fleet/layers/mpu/random.py b/python/paddle/distributed/fleet/layers/mpu/random.py index 7b89330d951c8..718c85e855734 100644 --- a/python/paddle/distributed/fleet/layers/mpu/random.py +++ b/python/paddle/distributed/fleet/layers/mpu/random.py @@ -18,11 +18,11 @@ import paddle from paddle import _legacy_C_ops +from paddle.common_ops_import import Variable from paddle.fluid import core from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.fluid.framework import in_dygraph_mode from paddle.framework import LayerHelper -from paddle.static import Variable __all__ = [] diff --git a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py index 9dce0d540a16f..98d131822fe36 100644 --- a/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py +++ b/python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py @@ -171,7 +171,7 @@ def _add_auto_increment_var(self, counter_name, begin, step=1): if is_new_var: helper.set_variable_initializer( counter, - initializer=paddle.fluid.initializer.Constant( + initializer=paddle.nn.initializer.ConstantInitializer( value=float(begin - 1), force_cpu=True ), ) @@ -194,7 +194,7 @@ def _add_nranks_var(self, name, value=-1): if is_new_var: helper.set_variable_initializer( counter, - initializer=paddle.fluid.initializer.Constant( + initializer=paddle.nn.initializer.ConstantInitializer( value=float(value), force_cpu=True ), ) diff --git a/python/paddle/distributed/fleet/metrics/metric.py b/python/paddle/distributed/fleet/metrics/metric.py index 999ab6f0af126..d2f72b0c7d047 100644 --- a/python/paddle/distributed/fleet/metrics/metric.py +++ b/python/paddle/distributed/fleet/metrics/metric.py @@ -18,7 +18,7 @@ import numpy as np import paddle -from paddle.static import Variable +from paddle.common_ops_import import Variable __all__ = [] diff --git a/python/paddle/fluid/compiler.py b/python/paddle/fluid/compiler.py index e8393c63b1053..609bfa3d93e53 100644 --- a/python/paddle/fluid/compiler.py +++ b/python/paddle/fluid/compiler.py @@ -586,7 +586,6 @@ def convert_concrete_program( """ from ..fluid.dygraph.base import switch_to_static_graph from ..fluid import backward - from ..fluid.initializer import Constant from ..fluid.framework import device_guard import paddle @@ -645,7 +644,10 @@ def append_backward_desc(): device = optimizer._get_device_for_param(param_name) with device_guard(device): optimizer.helper.set_variable_initializer( - var, initializer=Constant(value=0.0) + var, + initializer=paddle.nn.initializer.Constant( + value=0.0 + ), ) param_or_lr_tensor = scope.find_var( var_tmp.name diff --git a/python/paddle/fluid/contrib/layers/metric_op.py b/python/paddle/fluid/contrib/layers/metric_op.py index 414fcf5b6cd51..07d6b464ddb11 100755 --- a/python/paddle/fluid/contrib/layers/metric_op.py +++ b/python/paddle/fluid/contrib/layers/metric_op.py @@ -17,7 +17,6 @@ import warnings from paddle.fluid.layer_helper import LayerHelper -from paddle.fluid.initializer import Normal, Constant from paddle.fluid.framework import Variable from paddle.fluid.param_attr import ParamAttr from paddle.fluid.layers import tensor @@ -147,7 +146,10 @@ def ctr_metric_bundle(input, label, ins_tag_weight=None): local_ins_num, ]: helper.set_variable_initializer( - var, Constant(value=0.0, force_cpu=True) + var, + paddle.nn.initializer.ConstantInitializer( + value=0.0, force_cpu=True + ), ) helper.append_op( diff --git a/python/paddle/fluid/contrib/layers/nn.py b/python/paddle/fluid/contrib/layers/nn.py index d2aff8bfcf659..9064e4f9f09dd 100644 --- a/python/paddle/fluid/contrib/layers/nn.py +++ b/python/paddle/fluid/contrib/layers/nn.py @@ -24,7 +24,6 @@ from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers import utils from ... import unique_name -from paddle.fluid.initializer import Normal, Constant, NumpyArrayInitializer from paddle.fluid.data_feeder import ( check_variable_and_dtype, check_type, @@ -896,8 +895,10 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) tree_info = [[0,0,0,1,2], [0,1,0,3,4],[0,1,0,5,6], @@ -908,7 +909,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): child_nums = 2 child, leaf_mask = fluid.contrib.layers.tdm_child(x, node_nums, child_nums, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( tree_info_np))) place = fluid.CPUPlace() exe = fluid.Executor(place) @@ -925,7 +926,7 @@ def tdm_child(x, node_nums, child_nums, param_attr=None, dtype='int32'): attr=helper.param_attr, shape=[node_nums, 3 + child_nums], dtype=dtype, - default_initializer=Constant(0), + default_initializer=paddle.nn.initializer.Constant(0), ) tree_info.stop_gradient = True @@ -1003,8 +1004,10 @@ def tdm_sampler( Examples: .. code-block:: python + import paddle import paddle.fluid as fluid import numpy as np + paddle.enable_static() x = fluid.data(name="x", shape=[None, 1], dtype="int32", lod_level=1) travel_list = [[1, 3], [1, 4], [2, 5], [2, 6]] # leaf node's travel path, shape(leaf_node_num, layer_num) layer_list_flat = [[1], [2], [3], [4], [5], [6]] # shape(node_nums, 1) @@ -1022,10 +1025,10 @@ def tdm_sampler( layer_node_num_list, leaf_node_num, tree_travel_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( travel_array)), tree_layer_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( layer_array)), output_positive=True, output_list=True, @@ -1089,7 +1092,7 @@ def tdm_sampler( attr=tree_travel_attr, shape=travel_shape, dtype=tree_dtype, - default_initializer=Constant(0), + default_initializer=paddle.nn.initializer.Constant(0), ) layer_shape = [node_nums, 1] @@ -1097,7 +1100,7 @@ def tdm_sampler( attr=tree_layer_attr, shape=layer_shape, dtype=tree_dtype, - default_initializer=Constant(0), + default_initializer=paddle.nn.initializer.Constant(0), ) out = helper.create_variable_for_type_inference(dtype=dtype) @@ -1640,7 +1643,7 @@ def build_program(main_program, startup_program): attr=helper.param_attr, shape=param_shape, dtype=bn_param_dtype, - default_initializer=Constant(1.0), + default_initializer=paddle.nn.initializer.Constant(1.0), ) bias = helper.create_parameter( attr=helper.bias_attr, @@ -1650,7 +1653,9 @@ def build_program(main_program, startup_program): ) mean = helper.create_parameter( attr=ParamAttr( - name=moving_mean_name, initializer=Constant(0.0), trainable=False + name=moving_mean_name, + initializer=paddle.nn.initializer.Constant(0.0), + trainable=False, ), shape=param_shape, dtype=bn_param_dtype, @@ -1659,7 +1664,7 @@ def build_program(main_program, startup_program): variance = helper.create_parameter( attr=ParamAttr( name=moving_variance_name, - initializer=Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, ), shape=param_shape, @@ -1723,13 +1728,16 @@ def pow2_decay_with_linear_warmup( helper = LayerHelper("pow2_decay_with_linear_warmup", **locals()) lr = helper.create_global_variable(persistable=True, dtype=dtype, shape=[1]) helper.set_variable_initializer( - lr, Constant(value=float(base_lr) / warmup_steps) + lr, + paddle.nn.initializer.Constant(value=float(base_lr) / warmup_steps), ) step = helper.create_global_variable( persistable=True, dtype='int64', shape=[1] ) - helper.set_variable_initializer(step, Constant(value=0)) + helper.set_variable_initializer( + step, paddle.nn.initializer.Constant(value=0) + ) assert ( warmup_steps <= total_steps ), "warmup_steps cannot be larger than total_steps" diff --git a/python/paddle/fluid/evaluator.py b/python/paddle/fluid/evaluator.py index 472bcbd3cac4b..a4d80ecbfed25 100644 --- a/python/paddle/fluid/evaluator.py +++ b/python/paddle/fluid/evaluator.py @@ -20,7 +20,6 @@ from .framework import Program, Variable, program_guard from . import unique_name from .layer_helper import LayerHelper -from .initializer import Constant def _clone_var_(block, var): diff --git a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py index 23f5a44fe139e..9fc9182017ec4 100644 --- a/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py +++ b/python/paddle/fluid/incubate/fleet/tests/fleet_deep_ctr.py @@ -109,7 +109,7 @@ def model(): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -121,7 +121,7 @@ def model(): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -134,7 +134,7 @@ def model(): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) diff --git a/python/paddle/fluid/initializer.py b/python/paddle/fluid/initializer.py index 38650856b0720..6eb88d8f8ef3d 100644 --- a/python/paddle/fluid/initializer.py +++ b/python/paddle/fluid/initializer.py @@ -30,1139 +30,24 @@ from paddle import _C_ops, _legacy_C_ops import paddle -__all__ = [ - 'Constant', - 'Uniform', - 'Normal', - 'TruncatedNormal', - 'Xavier', - 'Bilinear', - 'MSRA', - 'ConstantInitializer', - 'UniformInitializer', - 'NormalInitializer', - 'TruncatedNormalInitializer', - 'XavierInitializer', - 'BilinearInitializer', - 'MSRAInitializer', - 'NumpyArrayInitializer', - 'set_global_initializer', -] +__all__ = ['set_global_initializer'] _global_weight_initializer_ = None _global_bias_initializer_ = None -class Initializer: - """Base class for variable initializers - - Defines the common interface of variable initializers. - They add operations to the init program that are used - to initialize variables. Users should not use this class - directly, but need to use one of its implementations. - """ - - def __init__(self): - pass - - def __call__(self, param, block=None): - if not lazy_init_helper().state: - return self.forward(param, block) - - return self._lazy_init(param, block) - - def forward(self, param, block=None): - """Add corresponding initialization operations to the network""" - raise NotImplementedError() - - def _lazy_init(self, param, block=None): - """ - Apply lazy initialization - """ - assert in_dygraph_mode() - - def init_op_creator(forward, param, block): - new_var = param._to_static_var(True, block=block) - # Record initializer operator - with lazy_init_helper(): - forward(new_var, block) - - # Add hook function for initializing param in dygraph mode - param.set_init_func(functools.partial(self.forward, param, block)) - param._init_op_creator = functools.partial( - init_op_creator, self.forward, param - ) - - return param - - def _check_block(self, block): - if block is None: - block = default_main_program().global_block() - - return block - - def _compute_fans(self, var): - """Compute the fan_in and the fan_out for layers - - This method computes the fan_in and the fan_out - for neural network layers, if not specified. It is - not possible to perfectly estimate fan_in and fan_out. - This method will estimate it correctly for matrix multiply and - convolutions. - - Args: - var: variable for which fan_in and fan_out have to be computed - - Returns: - tuple of two integers (fan_in, fan_out) - """ - shape = var.shape - if not shape or len(shape) == 0: - fan_in = fan_out = 1 - elif len(shape) == 1: - fan_in = fan_out = shape[0] - elif len(shape) == 2: - # This is the case for simple matrix multiply - fan_in = shape[0] - fan_out = shape[1] - else: - # Assume this to be a convolutional kernel - # In PaddlePaddle, the shape of the kernel is like: - # [num_filters, num_filter_channels, ...] where the remaining - # dimensions are the filter_size - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - - return (fan_in, fan_out) - - -class ConstantInitializer(Initializer): - """Implements the constant initializer - - Args: - value (float32): constant value to initialize the variable - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name="data", shape=[8, 32, 32], dtype="float32") - fc = paddle.static.nn.fc( - x, - size=10, - weight_attr=fluid.initializer.Constant(value=2.0)) - - """ - - def __init__(self, value=0.0, force_cpu=False): - assert value is not None - super().__init__() - self._value = value - self._force_cpu = force_cpu - - def forward(self, var, block=None): - """Initialize the input tensor with constant. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) or isinstance( - var, framework.EagerParamBase - ) - assert isinstance(block, framework.Block) - - if in_dygraph_mode(): - place = _current_expected_place() - if self._force_cpu: - place = core.CPUPlace() - _C_ops.full_( - var, var.shape, str(float(self._value)), var.dtype, place - ) - return None - else: - op = block.append_op( - type="fill_constant", - outputs={"Out": var}, - attrs={ - "shape": var.shape, - "dtype": int(var.dtype), - "value": float(self._value), - 'str_value': str(float(self._value)), - 'force_cpu': self._force_cpu, - }, - stop_gradient=True, - ) - - var.op = op - return op - - -class UniformInitializer(Initializer): - """Implements the random uniform distribution initializer - - Args: - low (float): lower boundary of the uniform distribution - high (float): upper boundary of the uniform distribution - seed (int): random seed - diag_num (int): the number of diagonal elements to initialize. - If set to 0, diagonal initialization will be not performed. - diag_step (int): Step size between two diagonal elements, - which is generally the width of the square matrix. - diag_val (float): the value of the diagonal element to be initialized, - default 1.0. It takes effect only if the diag_num is greater than 0. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name='x', shape=[None, 1], dtype='float32') - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.Uniform(low=-0.5, high=0.5)) - """ - - def __init__( - self, low=-1.0, high=1.0, seed=0, diag_num=0, diag_step=0, diag_val=1.0 - ): - assert low is not None - assert high is not None - assert high >= low - assert seed is not None - assert diag_num is not None - assert diag_step is not None - assert diag_val is not None - if diag_num > 0 or diag_step > 0: - assert diag_num > 0 and diag_step > 0 - super().__init__() - self._low = low - self._high = high - self._seed = seed - self._diag_num = diag_num - self._diag_step = diag_step - self._diag_val = diag_val - - def forward(self, var, block=None): - """Initialize the input tensor with Uniform distribution. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(block, framework.Block) - if not in_dygraph_mode(): - check_variable_and_dtype( - var, - "Out", - ["uint16", "float16", "float32", "float64"], - "uniform_random", - ) - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initializers - if var.dtype == VarDesc.VarType.FP16: - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['uniform_random', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - out_var = _C_ops.uniform( - var.shape, - out_dtype, - self._low, - self._high, - self._seed, - _current_expected_place(), - ) - if var.dtype == VarDesc.VarType.FP16: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": var.shape, - "dtype": out_dtype, - "min": self._low, - "max": self._high, - "seed": self._seed, - "diag_num": self._diag_num, - "diag_step": self._diag_step, - "diag_val": self._diag_val, - }, - stop_gradient=True, - ) - - if var.dtype == VarDesc.VarType.FP16: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class NormalInitializer(Initializer): - """Implements the Random Normal(Gaussian) distribution initializer - - Args: - loc (float): mean of the normal distribution - scale (float): standard deviation of the normal distribution - seed (int): random seed - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name="data", shape=[None, 32, 32], dtype="float32") - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.Normal(loc=0.0, scale=2.0)) - - """ - - def __init__(self, loc=0.0, scale=1.0, seed=0): - assert loc is not None - assert scale is not None - assert seed is not None - super().__init__() - self._mean = loc - self._std_dev = scale - self._seed = seed - - def forward(self, var, block=None): - """Initialize the input tensor with Normal distribution. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(block, framework.Block) - - if self._seed == 0: - self._seed = block.program.random_seed - - if in_dygraph_mode(): - place = _current_expected_place() - out_var = _C_ops.gaussian( - var.shape, - self._mean, - self._std_dev, - self._seed, - var.dtype, - place, - ) - out_var._share_underline_tensor_to(var) - return None - - else: - check_variable_and_dtype( - var, - "Out", - ["uint16", "float16", "float32", "float64"], - "guassian_random", - ) - op = block.append_op( - type="gaussian_random", - outputs={"Out": var}, - attrs={ - "shape": var.shape, - "dtype": var.dtype, - "mean": self._mean, - "std": self._std_dev, - "seed": self._seed, - "use_mkldnn": False, - }, - stop_gradient=True, - ) - var.op = op - return op - - -class TruncatedNormalInitializer(Initializer): - """Implements the Random TruncatedNormal(Gaussian) distribution initializer - - Args: - loc (float): mean of the normal distribution - scale (float): standard deviation of the normal distribution - seed (int): random seed - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name='x', shape=[None, 1], dtype='float32') - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) - """ - - def __init__(self, loc=0.0, scale=1.0, seed=0): - assert loc is not None - assert scale is not None - assert seed is not None - super().__init__() - self._mean = loc - self._std_dev = scale - self._seed = seed - - def forward(self, var, block=None): - """Initialize the input tensor with TruncatedNormal distribution. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) - assert isinstance(block, framework.Block) - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initalizers - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['truncated_gaussian_random', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - out_var = _C_ops.truncated_gaussian_random( - var.shape, - self._mean, - self._std_dev, - self._seed, - out_dtype, - _current_expected_place(), - ) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - - else: - op = block.append_op( - type="truncated_gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": var.shape, - "dtype": out_dtype, - "mean": self._mean, - "std": self._std_dev, - "seed": self._seed, - }, - stop_gradient=True, - ) - - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - var.op = op - return op - - -class XavierInitializer(Initializer): - r""" - This class implements the Xavier weight initializer from the paper - `Understanding the difficulty of training deep feedforward neural - networks `_ - by Xavier Glorot and Yoshua Bengio. - - This initializer is designed to keep the scale of the gradients - approximately same in all the layers. In case of Uniform distribution, - the range is [-x, x], where - - .. math:: - - x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} - - In case of Normal distribution, the mean is 0 and the standard deviation - is - - .. math:: - - \sqrt{\\frac{2.0}{fan\_in + fan\_out}} - - - Args: - uniform (bool,default True): whether to use uniform ,if False use normal distribution - fan_in (float,default None): fan_in for Xavier initialization. If None, it is - inferred from the variable. - fan_out (float,default None): fan_out for Xavier initialization. If None, it is - inferred from the variable. - seed (int): random seed - - Note: - It is recommended to set fan_in and fan_out to None for most cases. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - queries = fluid.data(name='x', shape=[None,1], dtype='float32') - fc = paddle.static.nn.fc( - x=queries, size=10, - weight_attr=fluid.initializer.Xavier(uniform=False)) - - """ - - def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): - assert uniform is not None - assert seed is not None - super().__init__() - self._uniform = uniform - self._fan_in = fan_in - self._fan_out = fan_out - self._seed = seed - - def forward(self, var, block=None): - """Initialize the input tensor with Xavier initialization. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(block, framework.Block) - if not in_dygraph_mode(): - check_variable_and_dtype( - var, - "Out", - ["uint16", "float16", "float32", "float64"], - "xavier_init", - ) - - f_in, f_out = self._compute_fans(var) - - # If fan_in and fan_out are passed, use them - fan_in = f_in if self._fan_in is None else self._fan_in - fan_out = f_out if self._fan_out is None else self._fan_out - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['xavier_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - if self._uniform: - limit = math.sqrt(6.0 / float(fan_in + fan_out)) - out_var = _C_ops.uniform( - out_var.shape, - out_dtype, - -limit, - limit, - self._seed, - _current_expected_place(), - ) - else: - std = math.sqrt(2.0 / float(fan_in + fan_out)) - - place = _current_expected_place() - out_var = _C_ops.gaussian( - out_var.shape, 0.0, std, self._seed, out_dtype, place - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - if self._uniform: - limit = math.sqrt(6.0 / float(fan_in + fan_out)) - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": out_dtype, - "min": -limit, - "max": limit, - "seed": self._seed, - }, - stop_gradient=True, - ) - else: - std = math.sqrt(2.0 / float(fan_in + fan_out)) - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": out_var.dtype, - "mean": 0.0, - "std": std, - "seed": self._seed, - }, - stop_gradient=True, - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class MSRAInitializer(Initializer): - r"""Implements the MSRA initializer a.k.a. Kaiming Initializer - - This class implements the weight initialization from the paper - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification `_ - by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a - robust initialization method that particularly considers the rectifier - nonlinearities. In case of Uniform distribution, the range is [-x, x], where - - .. math:: - - x = gain \times \sqrt{\frac{3}{fan\_in}} - - In case of Normal distribution, the mean is 0 and the standard deviation - is - - .. math:: - - \frac{gain}{\sqrt{{fan\_in}}} - - Args: - uniform (bool, optional): whether to use uniform or normal distribution - fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. default is None. - seed (int32, optional): random seed. - negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. - nonlinearity(str, optional): the non-linear function. default is relu. - - Note: - It is recommended to set fan_in to None for most cases. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - paddle.enable_static() - x = fluid.data(name="data", shape=[8, 32, 32], dtype="float32") - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.MSRA(uniform=False)) - +def _global_weight_initializer(): """ - - def __init__( - self, - uniform=True, - fan_in=None, - seed=0, - negative_slope=0, - nonlinearity='relu', - ): - """Constructor for MSRAInitializer""" - assert uniform is not None - assert seed is not None - super().__init__() - self._uniform = uniform - self._fan_in = fan_in - self._seed = seed - self._negative_slope = negative_slope - self._nonlinearity = nonlinearity - - def forward(self, var, block=None): - """Initialize the input tensor with MSRA initialization. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) - assert isinstance(block, framework.Block) - f_in, f_out = self._compute_fans(var) - - # If fan_in is passed, use it - fan_in = f_in if self._fan_in is None else self._fan_in - - if fan_in == 0: - if self._fan_in is None: - raise ValueError( - "The in_features of the Tensor contain zero, can not initialize the Tensor." - ) - else: - raise ValueError( - "fan_in should not be zero, can not initialize the Tensor." - ) - - if self._seed == 0: - self._seed = block.program.random_seed - - # to be compatible of fp16 initalizers - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['masra_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if in_dygraph_mode(): - if self._uniform: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - limit = gain * math.sqrt(3.0 / float(fan_in)) - out_var = _C_ops.uniform( - var.shape, - out_dtype, - -limit, - limit, - self._seed, - _current_expected_place(), - ) - else: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - std = gain / math.sqrt(float(fan_in)) - place = _current_expected_place() - out_var = _C_ops.gaussian( - out_var.shape, 0.0, std, self._seed, out_dtype, place - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - if self._uniform: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - limit = gain * math.sqrt(3.0 / float(fan_in)) - op = block.append_op( - type="uniform_random", - inputs={}, - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "min": -limit, - "max": limit, - "seed": self._seed, - }, - stop_gradient=True, - ) - - else: - gain = calculate_gain(self._nonlinearity, self._negative_slope) - std = gain / math.sqrt(float(fan_in)) - op = block.append_op( - type="gaussian_random", - outputs={"Out": out_var}, - attrs={ - "shape": out_var.shape, - "dtype": int(out_dtype), - "mean": 0.0, - "std": std, - "seed": self._seed, - }, - stop_gradient=True, - ) - - if var.dtype == VarDesc.VarType.FP16 or ( - var.dtype == VarDesc.VarType.BF16 and not self._uniform - ): - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class BilinearInitializer(Initializer): + Return the global weight initializer, The user doesn't need to use it. """ - This initializer can be used in transposed convolution operator to - act as upsampling. Users can upsample a feature map with shape of - (B, C, H, W) by any integer factor. The usage is: - - Examples: - - .. code-block:: python - - import math - - import paddle - import paddle.nn as nn - from paddle.regularizer import L2Decay - - factor = 2 - C = 2 - B = 8 - H = W = 32 - w_attr = paddle.ParamAttr(learning_rate=0., - regularizer=L2Decay(0.), - initializer=nn.initializer.Bilinear()) - data = paddle.rand([B, 3, H, W], dtype='float32') - conv_up = nn.Conv2DTranspose(3, - out_channels=C, - kernel_size=2 * factor - factor % 2, - padding=int( - math.ceil((factor - 1) / 2.)), - stride=factor, - weight_attr=w_attr, - bias_attr=False) - x = conv_up(data) + return _global_weight_initializer_ - Where, `out_channels=C` and `groups=C` means this is channel-wise transposed - convolution. The filter shape will be (C, 1, K, K) where K is `kernel_size`, - This initializer will set a (K, K) interpolation kernel for every channel - of the filter identically. The resulting shape of the output feature map - will be (B, C, factor * H, factor * W). Note that the learning rate and the - weight decay are set to 0 in order to keep coefficient values of bilinear - interpolation unchanged during training. +def _global_bias_initializer(): """ - - def __init__(self): - """Constructor for BilinearInitializer.""" - super().__init__() - - def forward(self, var, block=None): - """Initialize the input tensor with Bilinear initialization. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - if not isinstance(var, framework.Variable): - raise ValueError("var must be framework.Variable.") - - if not isinstance(block, framework.Block): - raise ValueError("block must be framework.Block.") - - shape = var.shape - if len(shape) != 4: - raise ValueError("the length of shape must be 4.") - if shape[2] != shape[3]: - raise ValueError("shape[2] must be equal to shape[3].") - - weight = np.zeros(np.prod(var.shape), dtype='float32') - size = shape[3] - # factor - f = np.ceil(size / 2.0) - # center - c = (2 * f - 1 - f % 2) / (2.0 * f) - for i in range(np.prod(shape)): - x = i % size - y = (i / size) % size - weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) - weight = np.reshape(weight, shape) - - # to be compatible of fp16 initalizers - if var.dtype in [ - VarDesc.VarType.FP16, - VarDesc.VarType.BF16, - VarDesc.VarType.FP64, - ]: - out_dtype = VarDesc.VarType.FP32 - out_var = block.create_var( - name=unique_name.generate( - ".".join(['bilinear_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_dtype = var.dtype - out_var = var - - if out_dtype == VarDesc.VarType.FP32: - value_name = "fp32_values" - values = [float(v) for v in weight.flat] - else: - raise TypeError("Unsupported dtype %s", var.dtype) - - if np.prod(shape) > 1024 * 1024: - raise ValueError("The size of input is too big. ") - - if in_dygraph_mode(): - _C_ops.assign_value_( - out_var, - list(shape), - out_dtype, - values, - _current_expected_place(), - ) - if var.dtype in [ - VarDesc.VarType.FP16, - VarDesc.VarType.BF16, - VarDesc.VarType.FP64, - ]: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - op = block.append_op( - type='assign_value', - outputs={'Out': [out_var]}, - attrs={ - 'dtype': out_dtype, - 'shape': list(shape), - value_name: values, - }, - ) - - if var.dtype in [ - VarDesc.VarType.FP16, - VarDesc.VarType.BF16, - VarDesc.VarType.FP64, - ]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op - - -class NumpyArrayInitializer(Initializer): - """Init an parameter with an numpy array - This op initialize the variable by numpy array. - - Args: - value (numpy): numpy array to initialize the variable - - Returns: - A Tensor variable initialized by numpy. - - Examples: - .. code-block:: python - - import paddle - import paddle.fluid as fluid - import numpy - paddle.enable_static() - x = fluid.data(name="x", shape=[2, 1], dtype='float32') - fc = paddle.static.nn.fc(x, size=10, - weight_attr=fluid.initializer.NumpyArrayInitializer(numpy.array([1,2]))) + Return the global weight initializer, The user doesn't need to use it. """ - - def __init__(self, value): - import numpy - - assert isinstance(value, numpy.ndarray) - super().__init__() - self._value = value - - def forward(self, var, block=None): - """Initialize the input tensor with Numpy array. - - Args: - var(Tensor): Tensor that needs to be initialized. - block(Block, optional): The block in which initialization ops - should be added. Used in static graph only, default None. - - Returns: - The initialization op - """ - block = self._check_block(block) - - assert isinstance(var, framework.Variable) - assert isinstance(block, framework.Block) - - # to be compatible of fp16 initalizers - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - out_dtype = VarDesc.VarType.FP32 - np_value = self._value.astype("float32") - out_var = block.create_var( - name=unique_name.generate( - ".".join(['numpy_array_init', var.name, 'tmp']) - ), - shape=var.shape, - dtype=out_dtype, - type=VarDesc.VarType.LOD_TENSOR, - persistable=False, - ) - else: - out_var = var - out_dtype = var.dtype - np_value = self._value - - if out_dtype == VarDesc.VarType.FP32: - value_name = "fp32_values" - values = [float(v) for v in np_value.flat] - elif out_dtype == VarDesc.VarType.INT32: - value_name = "int32_values" - values = [int(v) for v in np_value.flat] - else: - raise ValueError("Unsupported dtype %s", self._value.dtype) - if self._value.size > 1024 * 1024 * 1024: - raise ValueError( - "The size of input is too big. Please consider " - "saving it to file and 'load_op' to load it" - ) - - if in_dygraph_mode(): - _C_ops.assign_value_( - out_var, - list(self._value.shape), - out_dtype, - values, - _current_expected_place(), - ) - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - var_tmp = _C_ops.cast(out_var, var.dtype) - var_tmp._share_underline_tensor_to(var) - else: - out_var._share_underline_tensor_to(var) - return None - else: - op = block.append_op( - type='assign_value', - outputs={'Out': out_var}, - attrs={ - 'dtype': out_dtype, - 'shape': list(self._value.shape), - value_name: values, - }, - stop_gradient=True, - ) - - if var.dtype in [VarDesc.VarType.FP16, VarDesc.VarType.BF16]: - block.append_op( - type="cast", - inputs={"X": out_var}, - outputs={"Out": var}, - attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, - ) - - var.op = op - return op + return _global_bias_initializer_ def set_global_initializer(weight_init, bias_init=None): @@ -1218,7 +103,7 @@ def set_global_initializer(weight_init, bias_init=None): check_type( weight_init, 'weight_init', - (Initializer, type(None)), + (paddle.nn.initializer.Initializer, type(None)), 'set_global_initializer', ) global _global_weight_initializer_ @@ -1227,93 +112,8 @@ def set_global_initializer(weight_init, bias_init=None): check_type( bias_init, 'bias_init', - (Initializer, type(None)), + (paddle.nn.initializer.Initializer, type(None)), 'set_global_initializer', ) global _global_bias_initializer_ _global_bias_initializer_ = bias_init - - -def _global_weight_initializer(): - """ - Return the global weight initializer, The user doesn't need to use it. - """ - return _global_weight_initializer_ - - -def _global_bias_initializer(): - """ - Return the global weight initializer, The user doesn't need to use it. - """ - return _global_bias_initializer_ - - -def calculate_gain(nonlinearity, param=None): - """ - Get the recommended ``gain`` value of some nonlinearity function. ``gain`` value can be used in some - ``paddle.nn.initializer`` api to adjust the initialization value. - - Args: - nonlinearity(str): name of nonlinearity activation function. If it is a linear function, such as: - `linear/conv1d/conv2d/conv3d/conv1d_transpose/conv2d_transpose/conv3d_transpose` , 1.0 will be returned. - param(bool|int|float, optional): optional parameter for somme nonlinearity function. Now, it only applies to - 'leaky_relu'. Default: None, it will be calculated as 0.01 in the formula. - - Returns: - A float value, which is the recommended gain for this nonlinearity function. - - Examples: - .. code-block:: python - - import paddle - gain = paddle.nn.initializer.calculate_gain('tanh') # 5.0 / 3 - gain = paddle.nn.initializer.calculate_gain('leaky_relu', param=1.0) # 1.0 = math.sqrt(2.0 / (1+param^2)) - initializer = paddle.nn.initializer.Orthogonal(gain) - - """ - if param is None: - param = 0.01 - else: - assert isinstance(param, (bool, int, float)) - param = float(param) - recommended_gain = { - 'sigmoid': 1, - 'linear': 1, - 'conv1d': 1, - 'conv2d': 1, - 'conv3d': 1, - 'conv1d_transpose': 1, - 'conv2d_transpose': 1, - 'conv3d_transpose': 1, - 'tanh': 5.0 / 3, - 'relu': math.sqrt(2.0), - 'leaky_relu': math.sqrt(2.0 / (1 + param**2)), - 'selu': 3.0 / 4, - } - if nonlinearity in recommended_gain.keys(): - return recommended_gain[nonlinearity] - else: - raise ValueError( - "nonlinearity function {} is not suppported now.".format( - nonlinearity - ) - ) - - -# We short the class name, since users will use the initializer with the package -# name. The sample code: -# -# import paddle -# import paddle.fluid as fluid -# -# hidden = paddle.static.nn.fc(..., -# weight_attr=ParamAttr(fluid.initializer.Xavier())) -# -# It is no need to add an `Initializer` as the class suffix -Constant = ConstantInitializer -Uniform = UniformInitializer -Normal = NormalInitializer -TruncatedNormal = TruncatedNormalInitializer -Xavier = XavierInitializer -MSRA = MSRAInitializer -Bilinear = BilinearInitializer diff --git a/python/paddle/fluid/install_check.py b/python/paddle/fluid/install_check.py index 241dd71e200ab..ce93a25ccef9a 100644 --- a/python/paddle/fluid/install_check.py +++ b/python/paddle/fluid/install_check.py @@ -22,7 +22,6 @@ cpu_places, ) from .param_attr import ParamAttr -from .initializer import Constant from . import layers from . import backward from .dygraph import Layer @@ -42,7 +41,9 @@ def __init__(self, input_size): self._linear1 = paddle.nn.Linear( input_size, 3, - weight_attr=ParamAttr(initializer=Constant(value=0.1)), + weight_attr=ParamAttr( + initializer=paddle.nn.initializer.Constant(value=0.1) + ), ) def forward(self, inputs): diff --git a/python/paddle/fluid/layer_helper.py b/python/paddle/fluid/layer_helper.py index 9c3de1ba49862..0342017822cfd 100644 --- a/python/paddle/fluid/layer_helper.py +++ b/python/paddle/fluid/layer_helper.py @@ -13,7 +13,7 @@ # limitations under the License. import copy - +import paddle from .framework import ( Parameter, dtype_is_floating, @@ -22,7 +22,6 @@ _global_flags, ) from . import unique_name -from paddle.fluid.initializer import Constant, Xavier from .param_attr import ParamAttr from . import core @@ -178,10 +177,10 @@ def append_activation(self, input_var): # TODO (jiabin): should we remove this since it has never be used def _get_default_initializer(self, dtype): if dtype is None or dtype_is_floating(dtype) is True: - return Xavier() + return paddle.nn.initializer.XavierUniform() else: # For integer and boolean types, initialize with all zeros - return Constant() + return paddle.nn.initializer.Constant() # TODO (jiabin): reconstruct this in LayerObjHelper and avoid dependency of kwargs def is_instance(self, param_name, cls): diff --git a/python/paddle/fluid/layers/io.py b/python/paddle/fluid/layers/io.py index 994fc98038086..eb4d227f914ff 100644 --- a/python/paddle/fluid/layers/io.py +++ b/python/paddle/fluid/layers/io.py @@ -92,7 +92,7 @@ class ListenAndServ: shape=[32, 32], dtype='float32', name="X") - fluid.initializer.Constant(value=1.0)(x, main.global_block()) + paddle.nn.initializer.Constant(value=1.0)(x, main.global_block()) paddle.scale(x=x, scale=10.0, out=out_var) exe = fluid.Executor(place) diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py index fa0f49d01b997..1dd819df41168 100644 --- a/python/paddle/fluid/layers/nn.py +++ b/python/paddle/fluid/layers/nn.py @@ -22,7 +22,6 @@ import paddle from ..layer_helper import LayerHelper -from ..initializer import Normal, Constant from ..framework import ( Variable, OpProtoHolder, @@ -240,7 +239,7 @@ def embedding( w_param_attrs = fluid.ParamAttr( name="emb_weight", learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer(weight_data), + initializer=paddle.nn.initializer.Assign(weight_data), trainable=True) emb_2 = fluid.layers.embedding(input=data, size=(128, 100), param_attr=w_param_attrs, dtype='float32') """ @@ -673,7 +672,10 @@ def autoincreased_step_counter(counter_name=None, begin=1, step=1): ) if is_new_var: helper.set_variable_initializer( - counter, initializer=Constant(value=begin - 1, force_cpu=True) + counter, + initializer=paddle.nn.initializer.ConstantInitializer( + value=begin - 1, force_cpu=True + ), ) helper.main_program.global_block()._prepend_op( type='increment', diff --git a/python/paddle/fluid/metrics.py b/python/paddle/fluid/metrics.py index 5d702b8e521bf..b04611db66866 100644 --- a/python/paddle/fluid/metrics.py +++ b/python/paddle/fluid/metrics.py @@ -19,7 +19,6 @@ import copy from .layer_helper import LayerHelper -from .initializer import Constant from . import unique_name from .framework import Program, Variable, program_guard from . import layers diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index c5aa80c749027..d7ab914f80ffc 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -39,7 +39,6 @@ _get_no_grad_set_name, ) from .framework import program_guard -from .initializer import Constant from .layer_helper import LayerHelper from .dygraph import base as imperative_base from .dygraph import no_grad @@ -397,7 +396,8 @@ def _create_global_learning_rate(self): lr_value = float(self._learning_rate()) self.helper.set_variable_initializer( - lr_var, initializer=Constant(value=lr_value) + lr_var, + initializer=paddle.nn.initializer.Constant(value=lr_value), ) return @@ -713,7 +713,10 @@ def _add_accumulator( device = self._get_device_for_param(param.name) with device_guard(device): self.helper.set_variable_initializer( - var, initializer=Constant(value=float(fill_value)) + var, + initializer=paddle.nn.initializer.Constant( + value=float(fill_value) + ), ) if in_dygraph_mode(): @@ -774,7 +777,10 @@ def _add_global_accumulator( device = 'cpu' with device_guard(device): self.helper.set_variable_initializer( - var, initializer=Constant(value=float(fill_value)) + var, + initializer=paddle.nn.initializer.Constant( + value=float(fill_value) + ), ) if in_dygraph_mode(): @@ -1225,10 +1231,12 @@ def flatten_param_grads(self, params_grads): # NOTE(zhiqiu): the initializer should be set after coalesce_tensor op, # so the shape of flatten_param and flatten_grad will be inferred. self.helper.set_variable_initializer( - flatten_param, initializer=Constant(0.0) + flatten_param, + initializer=paddle.nn.initializer.Constant(0.0), ) self.helper.set_variable_initializer( - flatten_grad, initializer=Constant(0.0) + flatten_grad, + initializer=paddle.nn.initializer.Constant(0.0), ) return [(flatten_param, flatten_grad)] diff --git a/python/paddle/fluid/param_attr.py b/python/paddle/fluid/param_attr.py index f251a654a992b..6fdadd7904bd4 100644 --- a/python/paddle/fluid/param_attr.py +++ b/python/paddle/fluid/param_attr.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from .initializer import Initializer, Xavier, Constant +import paddle from .regularizer import WeightDecayRegularizer from paddle.fluid.data_feeder import check_type @@ -88,7 +88,10 @@ def __init__( check_type(do_model_average, "do_model_average", (bool), "ParamAttr") check_type(need_clip, "need_clip", (bool), "ParamAttr") check_type( - initializer, "initializer", (Initializer, type(None)), "ParamAttr" + initializer, + "initializer", + (paddle.nn.initializer.Initializer, type(None)), + "ParamAttr", ) check_type( regularizer, @@ -139,7 +142,7 @@ def _set_default_param_initializer(self): Returns: None. """ - self._set_default_initializer(Xavier()) + self._set_default_initializer(paddle.nn.initializer.XavierUniform()) def _set_default_bias_initializer(self): """ @@ -151,7 +154,7 @@ def _set_default_bias_initializer(self): Returns: None. """ - self._set_default_initializer(Constant(0.0)) + self._set_default_initializer(paddle.nn.initializer.Constant(0.0)) @staticmethod def _to_attr(arg): @@ -177,7 +180,7 @@ def _to_attr(arg): return arg elif isinstance(arg, str): return ParamAttr(name=arg) - elif isinstance(arg, Initializer): + elif isinstance(arg, paddle.nn.initializer.Initializer): return ParamAttr(initializer=arg) elif isinstance(arg, WeightDecayRegularizer): return ParamAttr(regularizer=arg) diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py index 593d79998a2d1..80ebe78963287 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_autoconvert.py @@ -36,7 +36,6 @@ save_distributed_checkpoint, ) from paddle.distributed.fleet import auto -from paddle.fluid.initializer import NumpyArrayInitializer paddle.enable_static() _global_parallel_strategy = None @@ -55,8 +54,12 @@ def __init__( np.random.seed(2021) arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr1 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) - weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) - weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) + weight_attr0 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr0) + ) + weight_attr1 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr1) + ) bias_attr = None self.linear0 = nn.Linear( d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py index 875536af57a35..1cb2a3e9bf1fe 100644 --- a/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py +++ b/python/paddle/fluid/tests/unittests/auto_parallel_save_load.py @@ -30,7 +30,6 @@ save_distributed_checkpoint, ) from paddle.distributed.fleet import auto -from paddle.fluid.initializer import NumpyArrayInitializer paddle.enable_static() _global_parallel_strategy = None @@ -48,7 +47,9 @@ def __init__( dim_feedforward = intermediate_size np.random.seed(2021) arr = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) - weight_attr = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr)) + weight_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr) + ) bias_attr = None self.linear0 = nn.Linear( diff --git a/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py index 59eee4cfeee2f..c1ed3175e100e 100644 --- a/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/collective/column_parallel_linear_api.py @@ -38,15 +38,11 @@ def get_model(self, main_prog, startup_program, rank): paddle.distributed.broadcast(data, src=0) if rank == 0: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( - np_array[:, 0:8] - ), + initializer=paddle.nn.initializer.Assign(np_array[:, 0:8]), ) else: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( - np_array[:, 8:16] - ), + initializer=paddle.nn.initializer.Assign(np_array[:, 8:16]), ) linear_out = paddle.distributed.split( diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py index a4d20264e7301..ca4ad63066ee8 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/parallel_dygraph_transformer.py @@ -242,10 +242,10 @@ def __init__(self, d_model, process_cmd, shape_len=None): self._layer_norm = paddle.nn.LayerNorm( normalized_shape=d_model, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), ) @@ -513,7 +513,9 @@ def __init__( sparse=is_sparse, weight_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), + initializer=paddle.nn.initializer.Normal( + 0.0, src_emb_dim**-0.5 + ), ), ) @@ -527,7 +529,7 @@ def __init__( sparse=is_sparse, weight_attr=fluid.ParamAttr( name=pos_enc_param_name, - initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), + initializer=paddle.nn.initializer.Assign(pos_inp), trainable=False, ), ) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py index 2eb0951756a59..1fff26b20b191 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) # To cover @RENAMED@GRADIENT @@ -74,7 +74,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) predict += predict2 diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py index e094d932d33e4..a1d8688fd41c3 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_multi_device.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) # To cover @RENAMED@GRADIENT @@ -74,7 +74,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) predict += predict2 diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py index 7e442f1914b2c..74c3c1a7269e4 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/pipeline_mnist_one_device.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -64,7 +64,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py index 7f247abc6d9cd..035a174775bd5 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_col.py @@ -33,11 +33,9 @@ def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py index b63e2065f431b..a480993e8ec50 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_by_row.py @@ -33,11 +33,9 @@ def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr @@ -65,7 +63,7 @@ def create_model(data, rank): data, size=OUT_SIZE, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(np_weight) + initializer=paddle.nn.initializer.Assign(np_weight) ), bias_attr=bias_attr, ) diff --git a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py index 914ee0852a043..689b068f025f2 100644 --- a/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py +++ b/python/paddle/fluid/tests/unittests/collective/fleet/static_model_parallel_embedding.py @@ -44,9 +44,7 @@ def create_model(data, rank): axis=0, num_partitions=MODEL_PARALLEL_SIZE, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - np_weight_part - ) + initializer=paddle.nn.initializer.Assign(np_weight_part) ), bias_attr=False, ) @@ -55,7 +53,7 @@ def create_model(data, rank): data, size=OUT_SIZE, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(np_weight) + initializer=paddle.nn.initializer.Assign(np_weight) ), bias_attr=False, ) diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py index de839e2c5eea4..ec864a1e40f9e 100644 --- a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py +++ b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_dpppmp.py @@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024): else: step = shape[0] // mp.nranks _w = w[mp.rank * step : mp.rank * step + step, :] - return paddle.fluid.initializer.NumpyArrayInitializer(_w) + return paddle.nn.initializer.Assign(_w) class Criterion(nn.Layer): diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py index 160ed85cc9424..3f9527209134f 100644 --- a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py +++ b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_fp16.py @@ -35,7 +35,7 @@ def weight_init(mp, shape, col=True, seed=1024): else: step = shape[0] // mp.nranks _w = w[mp.rank * step : mp.rank * step + step, :] - return paddle.fluid.initializer.NumpyArrayInitializer(_w) + return paddle.nn.initializer.Assign(_w) class Criterion(nn.Layer): diff --git a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py index 31daee3262291..af2b1b616d132 100644 --- a/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py +++ b/python/paddle/fluid/tests/unittests/collective/multinode/dygraph_hybrid_recompute.py @@ -36,7 +36,7 @@ def weight_init(mp, shape, col=True, seed=1024): else: step = shape[0] // mp.nranks _w = w[mp.rank * step : mp.rank * step + step, :] - return paddle.fluid.initializer.NumpyArrayInitializer(_w) + return paddle.nn.initializer.Assign(_w) class Criterion(nn.Layer): diff --git a/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py b/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py index 9dd3bade93aee..f89643e7bff5e 100644 --- a/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py +++ b/python/paddle/fluid/tests/unittests/collective/parallel_embedding_api.py @@ -42,13 +42,13 @@ def get_model(self, main_prog, startup_program, rank): per_part_size = size[0] // 2 if rank == 0: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[0:per_part_size, :] ), ) else: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[per_part_size : size[0], :] ), ) diff --git a/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py b/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py index afbb3f3334364..6c3817da5ae90 100644 --- a/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py +++ b/python/paddle/fluid/tests/unittests/collective/row_parallel_linear_api.py @@ -39,13 +39,13 @@ def get_model(self, main_prog, startup_program, rank): data = paddle.split(data, 2, axis=1)[rank] if rank == 0: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[0:500, :] ), ) else: param_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( np_array[500:1000, :] ), ) diff --git a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py index 30bcea4cb5cb2..044c6d78cac10 100644 --- a/python/paddle/fluid/tests/unittests/dist_allreduce_op.py +++ b/python/paddle/fluid/tests/unittests/dist_allreduce_op.py @@ -38,7 +38,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -49,7 +49,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -63,7 +63,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_ctr.py b/python/paddle/fluid/tests/unittests/dist_ctr.py index deb4cb921c1f3..dc9bd59df52fd 100644 --- a/python/paddle/fluid/tests/unittests/dist_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_ctr.py @@ -60,7 +60,7 @@ def get_model(self, batch_size=2): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=IS_SPARSE, ) @@ -74,7 +74,7 @@ def get_model(self, batch_size=2): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -87,7 +87,7 @@ def get_model(self, batch_size=2): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=IS_SPARSE, ) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py index 8e9341f9c5b1a..527ba34bae614 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_ctr.py @@ -107,7 +107,7 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, padding_idx=0, @@ -122,7 +122,7 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -135,7 +135,7 @@ def net(self, args, is_train=True, batch_size=4, lr=0.01): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, padding_idx=0, diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py index 3e71a1cb6054d..de0f32e3110a5 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_heter_pipeline_ctr.py @@ -78,7 +78,7 @@ def net(self, args, batch_size=4, lr=0.01): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -94,7 +94,7 @@ def net(self, args, batch_size=4, lr=0.01): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -109,7 +109,7 @@ def net(self, args, batch_size=4, lr=0.01): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py index dc0a7022b3434..453b715b50394 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer.py @@ -40,7 +40,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -51,7 +51,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py index ac1a4c632fd49..25f8663c7406a 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_raw_program_optimizer_fuse_allreduce.py @@ -40,7 +40,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -51,7 +51,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -65,7 +65,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py index b673bfeae16e2..bd4fc90fd244f 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_simnet_bow.py @@ -124,7 +124,8 @@ def train_network( is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__emb__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__emb__", ), is_sparse=is_sparse, ) @@ -137,7 +138,7 @@ def train_network( x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -149,7 +150,7 @@ def train_network( is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -164,7 +165,8 @@ def train_network( x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__fc__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__fc__", ), bias_attr=fluid.ParamAttr(name="__fc_b__"), ) @@ -175,7 +177,8 @@ def train_network( is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__emb__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__emb__", ), is_sparse=is_sparse, ) @@ -188,7 +191,8 @@ def train_network( x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), name="__fc__" + initializer=paddle.nn.initializer.Constant(value=0.01), + name="__fc__", ), bias_attr=fluid.ParamAttr(name="__fc_b__"), ) diff --git a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py index a9a2d7be0ba41..1780e7dfe2dde 100644 --- a/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/dist_fleet_sparse_embedding_ctr.py @@ -86,11 +86,11 @@ def net(self, args, batch_size=4, lr=0.01): inference = bool(int(os.getenv("INFERENCE", "0"))) if initializer == 0: - init = fluid.initializer.Constant(value=0.01) + init = paddle.nn.initializer.Constant(value=0.01) elif initializer == 1: - init = fluid.initializer.Uniform() + init = paddle.nn.initializer.Uniform() elif initializer == 2: - init = fluid.initializer.Normal() + init = paddle.nn.initializer.Normal() else: raise ValueError("error initializer code: {}".format(initializer)) @@ -113,7 +113,7 @@ def net(self, args, batch_size=4, lr=0.01): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -127,7 +127,7 @@ def net(self, args, batch_size=4, lr=0.01): entry=entry, param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), ) diff --git a/python/paddle/fluid/tests/unittests/dist_mnist.py b/python/paddle/fluid/tests/unittests/dist_mnist.py index 87eb22dceac1c..30c1130e33c85 100644 --- a/python/paddle/fluid/tests/unittests/dist_mnist.py +++ b/python/paddle/fluid/tests/unittests/dist_mnist.py @@ -39,7 +39,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) conv_pool_2 = fluid.nets.simple_img_conv_pool( @@ -50,7 +50,7 @@ def cnn_model(data): pool_stride=2, act="relu", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -64,7 +64,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/dist_se_resnext.py b/python/paddle/fluid/tests/unittests/dist_se_resnext.py index db3318d67d88a..6482ac53b09d8 100644 --- a/python/paddle/fluid/tests/unittests/dist_se_resnext.py +++ b/python/paddle/fluid/tests/unittests/dist_se_resnext.py @@ -121,7 +121,7 @@ def net(self, input, class_dim=1000): size=class_dim, activation='softmax', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), ) return out @@ -174,7 +174,7 @@ def conv_bn_layer( act=None, # avoid pserver CPU init differs from GPU param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), bias_attr=False, ) @@ -187,7 +187,7 @@ def squeeze_excitation(self, input, num_channels, reduction_ratio): x=pool, size=num_channels // reduction_ratio, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), activation='relu', ) @@ -196,7 +196,7 @@ def squeeze_excitation(self, input, num_channels, reduction_ratio): x=squeeze, size=num_channels, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.05) + initializer=paddle.nn.initializer.Constant(value=0.05) ), activation='sigmoid', ) diff --git a/python/paddle/fluid/tests/unittests/dist_text_classification.py b/python/paddle/fluid/tests/unittests/dist_text_classification.py index a287bd8a6c878..d29997ef8a08e 100644 --- a/python/paddle/fluid/tests/unittests/dist_text_classification.py +++ b/python/paddle/fluid/tests/unittests/dist_text_classification.py @@ -59,7 +59,7 @@ def conv_net( size=[dict_dim, emb_dim], is_sparse=False, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -70,7 +70,7 @@ def conv_net( act="tanh", pool_type="max", param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -78,7 +78,7 @@ def conv_net( x=[conv_3], size=fc0_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) @@ -87,7 +87,7 @@ def conv_net( size=class_dim, activation="softmax", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), ) diff --git a/python/paddle/fluid/tests/unittests/dist_transformer.py b/python/paddle/fluid/tests/unittests/dist_transformer.py index 5b0343bd81c24..e9ce91c197c1a 100644 --- a/python/paddle/fluid/tests/unittests/dist_transformer.py +++ b/python/paddle/fluid/tests/unittests/dist_transformer.py @@ -29,7 +29,9 @@ import paddle.fluid.layers as layers import paddle.nn.functional as F -const_para_attr = fluid.ParamAttr(initializer=fluid.initializer.Constant(0.001)) +const_para_attr = fluid.ParamAttr( + initializer=paddle.nn.initializer.Constant(0.001) +) const_bias_attr = const_para_attr # Fix seed for test @@ -1253,8 +1255,8 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout_rate=0.0): out = layers.layer_norm( out, begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.initializer.Constant(1.0), - bias_attr=fluid.initializer.Constant(0.0), + param_attr=paddle.nn.initializer.Constant(1.0), + bias_attr=paddle.nn.initializer.Constant(0.0), ) elif cmd == "d": # add dropout if dropout_rate: @@ -1292,7 +1294,7 @@ def prepare_encoder( size=[src_vocab_size, src_emb_dim], param_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.ConstantInitializer(0.001), + initializer=paddle.nn.initializer.Constant(0.001), ), ) else: @@ -1301,7 +1303,9 @@ def prepare_encoder( size=[src_vocab_size, src_emb_dim], param_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), + initializer=paddle.nn.initializer.Normal( + 0.0, src_emb_dim**-0.5 + ), ), ) @@ -1312,7 +1316,7 @@ def prepare_encoder( param_attr=fluid.ParamAttr( name=pos_enc_param_name, trainable=False, - initializer=fluid.initializer.ConstantInitializer(0.001), + initializer=paddle.nn.initializer.Constant(0.001), ), ) src_pos_enc.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/dist_word2vec.py b/python/paddle/fluid/tests/unittests/dist_word2vec.py index e10131667c745..f5de20385f26b 100644 --- a/python/paddle/fluid/tests/unittests/dist_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dist_word2vec.py @@ -41,7 +41,7 @@ def __network__(words): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) embed_second = fluid.layers.embedding( @@ -51,7 +51,7 @@ def __network__(words): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) embed_third = fluid.layers.embedding( @@ -61,7 +61,7 @@ def __network__(words): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) embed_forth = fluid.layers.embedding( @@ -71,7 +71,7 @@ def __network__(words): is_sparse=IS_SPARSE, param_attr=fluid.ParamAttr( name='shared_w', - initializer=fluid.initializer.Constant(value=0.1), + initializer=paddle.nn.initializer.Constant(value=0.1), ), ) @@ -84,7 +84,7 @@ def __network__(words): size=HIDDEN_SIZE, activation='sigmoid', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1) + initializer=paddle.nn.initializer.Constant(value=0.1) ), ) predict_word = paddle.static.nn.fc( @@ -92,7 +92,7 @@ def __network__(words): size=dict_size, activation='softmax', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1) + initializer=paddle.nn.initializer.Constant(value=0.1) ), ) cost = paddle.nn.functional.cross_entropy( diff --git a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py index a99b56974a8ae..9b9d45db082c0 100644 --- a/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py +++ b/python/paddle/fluid/tests/unittests/distributed_passes/test_auto_parallel_gradient_merge_pass.py @@ -26,7 +26,6 @@ import paddle.static as static import paddle.utils as utils from paddle.distributed.fleet import auto -from paddle.fluid.initializer import NumpyArrayInitializer logging.getLogger().setLevel(logging.INFO) paddle.enable_static() @@ -42,8 +41,12 @@ def __init__( np.random.seed(2021) arr0 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) - weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) - weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) + weight_attr0 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr0) + ) + weight_attr1 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr1) + ) bias_attr = None self.linear0 = nn.Linear( d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py index 1ea69dfbb1569..8629a3e185297 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/bert_dygraph_model.py @@ -203,8 +203,8 @@ def __init__(self, config, return_pooled_out=True, use_fp16=False): self._sent_emb_name = "sent_embedding" self._dtype = "float16" if use_fp16 else "float32" - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range'] + self._param_initializer = paddle.nn.initializer.TruncatedNormal( + std=config['initializer_range'] ) paddle.set_default_dtype(self._dtype) self._src_emb = paddle.nn.Embedding( @@ -317,8 +317,8 @@ def __init__( self._prepostprocess_dropout = config['hidden_dropout_prob'] self._word_emb_name = "word_embedding" - self._param_initializer = fluid.initializer.TruncatedNormal( - scale=config['initializer_range'] + self._param_initializer = paddle.nn.initializer.TruncatedNormal( + std=config['initializer_range'] ) self._weight_sharing = weight_sharing self.use_fp16 = use_fp16 @@ -343,7 +343,7 @@ def __init__( self.mask_lm_out_bias_attr = fluid.ParamAttr( name="mask_lm_out_fc.b_0", - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) if not self._weight_sharing: diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py index 783dfff262e8f..1e7950c29e222 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/darknet.py @@ -41,7 +41,7 @@ def __init__( padding=padding, groups=groups, weight_attr=ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02) + initializer=paddle.nn.initializer.Normal(0.0, 0.02) ), bias_attr=False, ) @@ -49,11 +49,11 @@ def __init__( num_channels=ch_out, is_test=is_test, param_attr=ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02), + initializer=paddle.nn.initializer.Normal(0.0, 0.02), regularizer=L2Decay(0.0), ), bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), regularizer=L2Decay(0.0), ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py index 99d90a7f1eaa5..88581c023f3d9 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/seq2seq_dygraph_model.py @@ -26,10 +26,8 @@ INF = 1.0 * 1e5 alpha = 0.6 -uniform_initializer = lambda x: fluid.initializer.UniformInitializer( - low=-x, high=x -) -zero_constant = fluid.initializer.Constant(0.0) +uniform_initializer = lambda x: paddle.nn.initializer.Uniform(low=-x, high=x) +zero_constant = paddle.nn.initializer.Constant(0.0) class BasicLSTMUnit(Layer): diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py index c76b4dba9cb8e..7f93c83b91433 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model.py @@ -17,9 +17,9 @@ import paddle import paddle.fluid as fluid import paddle.fluid.param_attr as attr +from paddle.common_ops_import import Variable from paddle.fluid.dygraph import Layer from paddle.jit.api import to_static -from paddle.static import Variable class EmbeddingLayer: @@ -48,7 +48,8 @@ def ops(self): sparse=True, padding_idx=self.padding_idx, weight_attr=attr.ParamAttr( - name=self.name, initializer=fluid.initializer.Xavier() + name=self.name, + initializer=paddle.nn.initializer.XavierUniform(), ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py index 99fe330c69241..d8c5956357827 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/simnet_dygraph_model_v2.py @@ -15,7 +15,7 @@ from functools import reduce import paddle -from paddle.static import Variable +from paddle.common_ops_import import Variable class EmbeddingLayer: diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py index 1f589b8d6fc8b..e1aaeabd48b8f 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_basic_api_transformation.py @@ -118,10 +118,10 @@ def dyfunc_BilinearTensorProduct(layer1, layer2): 4, 1000, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) @@ -138,10 +138,10 @@ def dyfunc_Conv2D(input): out_channels=2, kernel_size=3, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) res = conv2d(input) @@ -170,10 +170,10 @@ def dyfunc_Conv2DTranspose(input): 12, 12, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) ret = conv2dTranspose(input) @@ -222,7 +222,7 @@ def dyfunc_Pool2D(input): def dyfunc_Prelu(input): prelu0 = paddle.nn.PReLU( weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), ) res = prelu0(input) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py index a6a9d7281208d..55a93f769e25c 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_bmn.py @@ -115,11 +115,11 @@ def __init__( k = 1.0 / math.sqrt(fan_in) param_attr = ParamAttr( name=prefix + "_w", - initializer=fluid.initializer.Uniform(low=-k, high=k), + initializer=paddle.nn.initializer.Uniform(low=-k, high=k), ) bias_attr = ParamAttr( name=prefix + "_b", - initializer=fluid.initializer.Uniform(low=-k, high=k), + initializer=paddle.nn.initializer.Uniform(low=-k, high=k), ) self._conv2d = paddle.nn.Conv2D( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py index c14631c35b6b4..59df33e5aa9e7 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_convert_call.py @@ -125,10 +125,10 @@ def __init__(self): out_channels=2, kernel_size=3, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=paddle.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py index 0701750e3011a..b3556f0810197 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_cycle_gan.py @@ -352,7 +352,7 @@ def __init__( con_bias_attr = False else: con_bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ) self.conv = paddle.nn.Conv2D( @@ -362,9 +362,7 @@ def __init__( stride=stride, padding=padding, weight_attr=paddle.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=stddev - ) + initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev) ), bias_attr=con_bias_attr, ) @@ -378,10 +376,10 @@ def __init__( use_global_stats=True, # set True to use deterministic algorithm num_channels=num_filters, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer(1.0, 0.02) + initializer=paddle.nn.initializer.Normal(1.0, 0.02) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), trainable_statistics=True, ) @@ -421,7 +419,7 @@ def __init__( de_bias_attr = False else: de_bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ) self._deconv = paddle.nn.Conv2DTranspose( @@ -431,9 +429,7 @@ def __init__( stride=stride, padding=padding, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=stddev - ) + initializer=paddle.nn.initializer.Normal(mean=0.0, std=stddev) ), bias_attr=de_bias_attr, ) @@ -444,10 +440,10 @@ def __init__( use_global_stats=True, # set True to use deterministic algorithm num_channels=num_filters, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NormalInitializer(1.0, 0.02) + initializer=paddle.nn.initializer.Normal(1.0, 0.02) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), trainable_statistics=True, ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py index 94e1dba49313a..0d108b40406ba 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_lac.py @@ -98,7 +98,7 @@ def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None): in_features=input_dim, out_features=grnn_hidden_dim * 3, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -111,7 +111,7 @@ def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None): size=grnn_hidden_dim, h_0=h_0, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -124,7 +124,7 @@ def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None): in_features=input_dim, out_features=grnn_hidden_dim * 3, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -138,7 +138,7 @@ def __init__(self, input_dim, grnn_hidden_dim, init_bound, h_0=None): is_reverse=True, h_0=h_0, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-init_bound, high=init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( @@ -375,7 +375,7 @@ def __init__(self, args, length=None): weight_attr=fluid.ParamAttr( learning_rate=self.emb_lr, name="word_emb", - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound ), ), @@ -415,7 +415,7 @@ def __init__(self, args, length=None): in_features=self.grnn_hidden_dim * 2, out_features=self.num_labels, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform( + initializer=paddle.nn.initializer.Uniform( low=-self.init_bound, high=self.init_bound ), regularizer=fluid.regularizer.L2DecayRegularizer( diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py index d708dc1eadfed..72f3dd7c33190 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_mobile_net.py @@ -22,7 +22,6 @@ import paddle import paddle.fluid as fluid -from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr from paddle.jit.api import to_static from paddle.jit.translated_layer import INFER_MODEL_SUFFIX, INFER_PARAMS_SUFFIX @@ -61,7 +60,8 @@ def __init__( padding=padding, groups=num_groups, weight_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "_weights" + initializer=paddle.nn.initializer.KaimingUniform(), + name=self.full_name() + "_weights", ), bias_attr=False, ) @@ -259,7 +259,8 @@ def __init__(self, scale=1.0, class_dim=1000): int(1024 * scale), class_dim, weight_attr=ParamAttr( - initializer=MSRA(), name=self.full_name() + "fc7_weights" + initializer=paddle.nn.initializer.KaimingUniform(), + name=self.full_name() + "fc7_weights", ), bias_attr=ParamAttr(name="fc7_offset"), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py index 53687ca6c1ea5..1099f2dad667a 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_ptb_lm.py @@ -49,26 +49,26 @@ def __init__( for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -158,7 +158,7 @@ def __init__( sparse=False, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -167,7 +167,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -175,7 +175,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py index 911ca2ec9016f..407e11349c2de 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_resnet.py @@ -194,7 +194,7 @@ def __init__(self, layers=50, class_dim=102): self.pool2d_avg_output, class_dim, weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py index e01b77af7655b..723a7c742c198 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_se_resnet.py @@ -131,7 +131,7 @@ def __init__(self, num_channels, reduction_ratio): num_channels, num_channels // reduction_ratio, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) stdv = 1.0 / math.sqrt(num_channels / 16.0 * 1.0) @@ -139,7 +139,7 @@ def __init__(self, num_channels, reduction_ratio): num_channels // reduction_ratio, num_channels, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) @@ -316,7 +316,7 @@ def __init__(self, layers=50, class_dim=102): self.pool2d_avg_output, class_dim, weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py index 560132565907e..5eb7cfc1080c7 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/test_word2vec.py @@ -230,7 +230,7 @@ def __init__(self, name_scope, vocab_size, embedding_size, init_scale=0.1): self.embedding_size, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-0.5 / self.embedding_size, high=0.5 / self.embedding_size, ), @@ -242,7 +242,7 @@ def __init__(self, name_scope, vocab_size, embedding_size, init_scale=0.1): self.embedding_size, weight_attr=fluid.ParamAttr( name='embedding_out_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-0.5 / self.embedding_size, high=0.5 / self.embedding_size, ), diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py index 18afc4a4ab9d5..3928c715a6288 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/transformer_dygraph_model.py @@ -62,10 +62,10 @@ def __init__(self, process_cmd, d_model, dropout_rate): paddle.nn.LayerNorm( normalized_shape=d_model, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), ), ) @@ -295,7 +295,7 @@ def __init__(self, vocab_size, emb_dim, bos_idx=0): vocab_size, emb_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, emb_dim**-0.5) + initializer=paddle.nn.initializer.Normal(0.0, emb_dim**-0.5) ), ) @@ -330,7 +330,7 @@ def __init__( max_length, self.emb_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( position_encoding_init(max_length, self.emb_dim) ), trainable=False, @@ -522,7 +522,7 @@ def __init__( max_length, self.emb_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( position_encoding_init(max_length, self.emb_dim) ), trainable=False, diff --git a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py index 10df42faa2373..dbfc43cfc2432 100644 --- a/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py +++ b/python/paddle/fluid/tests/unittests/dygraph_to_static/yolov3.py @@ -253,10 +253,10 @@ def __init__(self, ch_in, is_train=True, use_random=False): stride=1, padding=0, weight_attr=ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02) + initializer=paddle.nn.initializer.Normal(0.0, 0.02) ), bias_attr=ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), regularizer=L2Decay(0.0), ), ), diff --git a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py index 917beec752d2a..895f71c4858e9 100644 --- a/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py +++ b/python/paddle/fluid/tests/unittests/fleet_heter_ps_training.py @@ -70,7 +70,7 @@ def net(batch_size=4, lr=0.01): size=[dnn_input_dim, dnn_layer_dims[0]], param_attr=fluid.ParamAttr( name="deep_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -86,7 +86,7 @@ def net(batch_size=4, lr=0.01): size=[lr_input_dim, 1], param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) @@ -99,7 +99,7 @@ def net(batch_size=4, lr=0.01): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) diff --git a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py index d48091f6c10c1..1d2b442d2dd05 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/program_config.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/program_config.py @@ -27,7 +27,6 @@ Operator, convert_np_dtype_to_dtype_, ) -from paddle.fluid.initializer import NumpyArrayInitializer from paddle.static.quantization import ( QuantizationFreezePass, QuantizationTransformPass, @@ -305,7 +304,7 @@ def create_fake_model(program_config): shape=tensor_config.shape, type=core.VarDesc.VarType.LOD_TENSOR, name=name, - initializer=NumpyArrayInitializer(tensor_config.data), + initializer=paddle.nn.initializer.Assign(tensor_config.data), ) in_vars = [] for name in sorted(save_var_map.keys()): diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py index 1f32de177e3ee..536f6c4d606d0 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_mkldnn_conv_bias_fuse_pass.py @@ -30,7 +30,7 @@ def setUp(self): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -63,7 +63,7 @@ def setUp(self): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -89,7 +89,7 @@ def setUp(self): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -114,7 +114,7 @@ def setUp(self): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierUniform(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -145,7 +145,7 @@ def setUp(self): name="data", shape=[-1, 3, 100, 100], dtype="float32" ) param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d( @@ -173,7 +173,7 @@ def setUp(self): with fluid.program_guard(self.main_program, self.startup_program): data = fluid.data(name="data", shape=[-1, 3, 5, 5], dtype="float32") param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv_out = paddle.static.nn.conv2d_transpose( diff --git a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py index da9a86725c008..0e7eb56da9133 100644 --- a/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py +++ b/python/paddle/fluid/tests/unittests/ir/inference/test_trt_subgraph_pass.py @@ -180,11 +180,11 @@ def setUp(self): ) param_attr = fluid.ParamAttr( name='instance_norm_w', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name='instance_norm_b', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) out = paddle.static.nn.instance_norm( input=data, param_attr=param_attr, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py index 17672d668d38a..590ebbf63efa5 100644 --- a/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py +++ b/python/paddle/fluid/tests/unittests/mlu/test_batch_norm_op_mlu_v2.py @@ -114,11 +114,11 @@ def compute_v3(x, is_test, trainable_statistics): shape[1], is_test=is_test, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), trainable=False, ), trainable_statistics=trainable_statistics, @@ -262,7 +262,7 @@ def test_global_stats(self): net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), use_global_stats=self.use_global_stats, trainable_statistics=self.trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py index 5bf239b5bc77d..646466e9504d4 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_adam_op_npu.py @@ -322,12 +322,12 @@ def _test( label_np = np.random.randint(2, size=(2, 1)).astype('int64') weight_attr1 = paddle.ParamAttr( name="weight1", - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), trainable=True, ) weight_attr2 = paddle.ParamAttr( name="weight2", - initializer=fluid.initializer.Constant(value=2.0), + initializer=paddle.nn.initializer.Constant(value=2.0), trainable=True, ) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) diff --git a/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py index 69769bbdc1f08..5369f4d410bda 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_run_program_op_npu.py @@ -298,7 +298,7 @@ def build_model(self): weight_attr = fluid.ParamAttr( name=self.input_names['Params'][0], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][0]] ), trainable=True, @@ -306,7 +306,7 @@ def build_model(self): bias_attr = fluid.ParamAttr( name=self.input_names['Params'][1], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][1]] ), trainable=True, diff --git a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py index 11aacd02439e9..96b0b734a174c 100644 --- a/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py +++ b/python/paddle/fluid/tests/unittests/parallel_dygraph_sparse_embedding.py @@ -41,7 +41,7 @@ def __init__( self.hidden_size, sparse=is_sparse, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ) ), @@ -50,7 +50,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -58,7 +58,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/simple_nets.py b/python/paddle/fluid/tests/unittests/simple_nets.py index a3ff2b6865744..cf4372818ba1e 100644 --- a/python/paddle/fluid/tests/unittests/simple_nets.py +++ b/python/paddle/fluid/tests/unittests/simple_nets.py @@ -26,7 +26,7 @@ def simple_fc_net_with_inputs(img, label, class_num=10): size=100, activation='relu', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc( @@ -53,7 +53,7 @@ def batchnorm_fc_with_inputs(img, label, class_num=10): size=200, activation='relu', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py index d018c52506bff..37048d7cd256b 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_attention.py @@ -25,11 +25,9 @@ def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py index 89cd0453d747b..4fca47635a1de 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_feedforward.py @@ -30,11 +30,9 @@ def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py index 047bd3ae8ad27..9c863d6d3be8b 100644 --- a/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py +++ b/python/paddle/fluid/tests/unittests/static_model_parallel_fused_multi_transformer.py @@ -25,11 +25,9 @@ def get_param_attr(weight, bias): weight_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(weight) - ) - bias_attr = paddle.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(bias) + initializer=paddle.nn.initializer.Assign(weight) ) + bias_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.Assign(bias)) return weight_attr, bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index d7052c94720a4..2983e5ca1958e 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -767,12 +767,12 @@ def _test( label_np = np.random.randint(2, size=(2, 1)).astype('int64') weight_attr1 = paddle.ParamAttr( name="weight1", - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), trainable=True, ) weight_attr2 = paddle.ParamAttr( name="weight2", - initializer=fluid.initializer.Constant(value=2.0), + initializer=paddle.nn.initializer.Constant(value=2.0), trainable=True, ) clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0) @@ -926,7 +926,7 @@ def test_adam_flatten_param_grads_with_regularizer(self): main = fluid.Program() weight_attr = paddle.ParamAttr( name="weight1", - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), regularizer=fluid.regularizer.L1DecayRegularizer( regularization_coeff=0.1 ), diff --git a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py index 25e4ab9aa8b4a..c15f647a380fe 100644 --- a/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py +++ b/python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py @@ -39,7 +39,6 @@ from paddle.distributed.auto_parallel.reshard import Resharder from paddle.distributed.fleet import auto from paddle.fluid import core -from paddle.fluid.initializer import NumpyArrayInitializer if os.getenv("CUDA_VISIBLE_DEVICES") is not None: os.environ["CUDA_VISIBLE_DEVICES"] = "" @@ -373,10 +372,18 @@ def __init__( arr1 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) arr2 = np.random.normal(0, 0.02, size=(d_model, dim_feedforward)) arr3 = np.random.normal(0, 0.02, size=(dim_feedforward, d_model)) - weight_attr0 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr0)) - weight_attr1 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr1)) - weight_attr2 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr2)) - weight_attr3 = paddle.ParamAttr(initializer=NumpyArrayInitializer(arr3)) + weight_attr0 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr0) + ) + weight_attr1 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr1) + ) + weight_attr2 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr2) + ) + weight_attr3 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Assign(arr3) + ) bias_attr = None self.linear0 = nn.Linear( d_model, dim_feedforward, weight_attr0, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py index 77062eee5a376..efbf4a538e009 100644 --- a/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py +++ b/python/paddle/fluid/tests/unittests/test_avoid_twice_initialization.py @@ -14,6 +14,7 @@ import unittest +import paddle import paddle.fluid as fluid @@ -22,7 +23,7 @@ def test_avoid_twice_initialization(self): cur_program = fluid.Program() cur_block = cur_program.current_block() var = cur_block.create_parameter( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), shape=[2, 2], dtype='float32', name='var_a', @@ -40,7 +41,7 @@ def test_avoid_twice_initialization(self): attrs={'ring_id': 0}, ) var2 = cur_block.create_parameter( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), shape=[2, 2], dtype='float32', name='var_a', diff --git a/python/paddle/fluid/tests/unittests/test_base_layer.py b/python/paddle/fluid/tests/unittests/test_base_layer.py index 05d9b71c1e437..7414c3732b18f 100644 --- a/python/paddle/fluid/tests/unittests/test_base_layer.py +++ b/python/paddle/fluid/tests/unittests/test_base_layer.py @@ -26,7 +26,7 @@ class L1(fluid.Layer): def __init__(self): super().__init__() self._param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.1) + initializer=paddle.nn.initializer.Constant(value=0.1) ) self.w1 = self.create_parameter( attr=self._param_attr, shape=[2, 2], dtype='float32', is_bias=False diff --git a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py index ece07889df4e9..d6127ff5dd78a 100644 --- a/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py +++ b/python/paddle/fluid/tests/unittests/test_batch_norm_op_v2.py @@ -183,11 +183,11 @@ def compute_v3(x, is_test, trainable_statistics): shape[1], is_test=is_test, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), trainable=False, ), trainable_statistics=trainable_statistics, @@ -378,7 +378,7 @@ def test_global_stats(self): net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), use_global_stats=self.use_global_stats, trainable_statistics=self.trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/test_calc_gradient.py b/python/paddle/fluid/tests/unittests/test_calc_gradient.py index d8d20e41aac26..38a1284f0ae4a 100644 --- a/python/paddle/fluid/tests/unittests/test_calc_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_calc_gradient.py @@ -48,7 +48,7 @@ def test1(self): name='x', shape=[1], dtype='float32', - default_initializer=fluid.initializer.Constant(3), + default_initializer=paddle.nn.initializer.Constant(3), ) (grad1,) = fluid.gradients(net(x), x) # 2x = 6 z = net(x - grad1) @@ -69,7 +69,7 @@ def test2(self): name='x', shape=[1], dtype='float32', - default_initializer=fluid.initializer.Constant(1), + default_initializer=paddle.nn.initializer.Constant(1), ) y = x * x (dx1,) = fluid.gradients(y, x) diff --git a/python/paddle/fluid/tests/unittests/test_communicator_geo.py b/python/paddle/fluid/tests/unittests/test_communicator_geo.py index 0e836dca1c2e5..a82c0e023c6c0 100644 --- a/python/paddle/fluid/tests/unittests/test_communicator_geo.py +++ b/python/paddle/fluid/tests/unittests/test_communicator_geo.py @@ -41,7 +41,7 @@ def net(self): size=[10000, 10], param_attr=fluid.ParamAttr( name="embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py index edd3d718c437e..7cb0a066141db 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_layer.py @@ -18,7 +18,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -110,11 +109,11 @@ def fluid_layer(self, place): else (-1, self.num_channels, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) if self.padding_mode != 'zeros': x_var = F.pad( x_var, diff --git a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py index 9eeb4fc82dfb1..50c80c3aa32d6 100644 --- a/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv2d_transpose_layer.py @@ -18,7 +18,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -101,11 +100,11 @@ def fluid_layer(self, place): else (-1, self.num_channels, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) y_var = paddle.static.nn.conv2d_transpose( x_var, diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py index 78276fbf76db1..8ef86daf69a03 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_layer.py @@ -18,7 +18,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -97,11 +96,11 @@ def fluid_layer(self, place): else (-1, self.num_channels, -1, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) y_var = paddle.static.nn.conv3d( x_var, self.num_filters, diff --git a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py index 1ea071142c6c7..82c08348f4bf1 100644 --- a/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py +++ b/python/paddle/fluid/tests/unittests/test_conv3d_transpose_layer.py @@ -18,7 +18,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid, nn @@ -99,11 +98,11 @@ def fluid_layer(self, place): else (-1, self.num_channels, -1, -1, -1) ) x_var = fluid.data("input", input_shape, dtype=self.dtype) - weight_attr = I.NumpyArrayInitializer(self.weight) + weight_attr = paddle.nn.initializer.Assign(self.weight) if self.bias is None: bias_attr = False else: - bias_attr = I.NumpyArrayInitializer(self.bias) + bias_attr = paddle.nn.initializer.Assign(self.bias) y_var = paddle.static.nn.conv3d_transpose( x_var, self.num_filters, diff --git a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py index 32f77ab290b88..b7a0c981bacd6 100644 --- a/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py +++ b/python/paddle/fluid/tests/unittests/test_cuda_random_seed.py @@ -120,15 +120,15 @@ def test_gen_TruncatedNormal_initializer(self): result_1 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) result_2 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) diff --git a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py index e02282cb9bee1..a5f193daa4c50 100644 --- a/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py +++ b/python/paddle/fluid/tests/unittests/test_decoupled_py_reader.py @@ -61,7 +61,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): size=hidden_size, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_desc_clone.py b/python/paddle/fluid/tests/unittests/test_desc_clone.py index ecb49c3172fb9..2bf68add10281 100644 --- a/python/paddle/fluid/tests/unittests/test_desc_clone.py +++ b/python/paddle/fluid/tests/unittests/test_desc_clone.py @@ -58,9 +58,7 @@ def cnn_model(data): size=SIZE, activation="softmax", weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - loc=0.0, scale=scale - ) + initializer=paddle.nn.initializer.Normal(loc=0.0, scale=scale) ), ) return predict diff --git a/python/paddle/fluid/tests/unittests/test_detach.py b/python/paddle/fluid/tests/unittests/test_detach.py index 1a01b7667feb1..d3622bd042de9 100644 --- a/python/paddle/fluid/tests/unittests/test_detach.py +++ b/python/paddle/fluid/tests/unittests/test_detach.py @@ -120,7 +120,7 @@ def detach_multi(self): initializer=paddle.nn.initializer.Constant(5.0) ) linear_b_param_attrs = fluid.ParamAttr( - initializer=fluid.initializer.Constant(6.0) + initializer=paddle.nn.initializer.Constant(6.0) ) linear = Linear( 4, @@ -132,7 +132,7 @@ def detach_multi(self): initializer=paddle.nn.initializer.Constant(7.0) ) linear1_b_param_attrs = fluid.ParamAttr( - initializer=fluid.initializer.Constant(8.0) + initializer=paddle.nn.initializer.Constant(8.0) ) linear1 = Linear( 10, diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py index c560dfa8dbb0b..f0f85e1645124 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_a_sync_optimizer_auto_async.py @@ -58,7 +58,7 @@ def test_a_sync_optimizer3(self): size=[1000000000, 100000], param_attr=paddle.fluid.ParamAttr( name="embedding", - initializer=paddle.fluid.initializer.Constant(value=0.01), + initializer=paddle.paddle.nn.initializer.Constant(value=0.01), ), is_sparse=True, ) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py index bc17b0d67f990..69b341a026762 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_heter_program.py @@ -89,7 +89,7 @@ def embedding_layer(input): size=[100001, 10], param_attr=fluid.ParamAttr( name="SparseFeatFactors", - initializer=fluid.initializer.Uniform(), + initializer=paddle.nn.initializer.Uniform(), ), ) @@ -103,8 +103,8 @@ def embedding_layer(input): size=400, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(concated.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(concated.shape[1]) ) ), name="fc1", @@ -116,8 +116,8 @@ def embedding_layer(input): size=400, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc1.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(fc1.shape[1]) ) ), name="fc2", @@ -129,8 +129,8 @@ def embedding_layer(input): size=400, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc2.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(fc2.shape[1]) ) ), name="fc3", @@ -142,8 +142,8 @@ def embedding_layer(input): size=2, activation="softmax", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( - scale=1 / math.sqrt(fc3.shape[1]) + initializer=paddle.nn.initializer.Normal( + std=1 / math.sqrt(fc3.shape[1]) ) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py index b60ff0db63e7d..da63b75f50fa4 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_minimize.py @@ -77,7 +77,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py index a330b45b52228..ea30485e5aba0 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps.py @@ -78,7 +78,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -93,7 +93,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -141,7 +141,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -156,7 +156,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py index 2143dc94d39e0..861e015568370 100755 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps11.py @@ -77,7 +77,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py index bee3cd9eb2239..1ab2d5178241b 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps12.py @@ -80,7 +80,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -94,7 +94,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -124,7 +124,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -139,7 +139,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -153,7 +153,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py index 58248d325b145..b17451098f405 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps13.py @@ -81,7 +81,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -95,7 +95,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -111,7 +111,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -140,7 +140,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -154,7 +154,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py index e207fb859de54..c9e6cb2035d69 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps2.py @@ -80,7 +80,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -95,7 +95,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -111,7 +111,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -140,7 +140,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -154,7 +154,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py index 4093fc34cc998..2a5f845b93b64 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps3.py @@ -78,7 +78,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -93,7 +93,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -141,7 +141,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -156,7 +156,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py index 025b3e90b37d4..094ea32967205 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps4.py @@ -77,7 +77,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py index 51bf54b3241b4..40abc45e0ab32 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps5.py @@ -78,7 +78,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -93,7 +93,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -110,7 +110,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -125,7 +125,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -141,7 +141,7 @@ def get_loss(cos_q_pt, cos_q_nt): is_distributed=is_distributed, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__tmp_", learning_rate=emb_lr, ), @@ -156,7 +156,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py index 165a8b6240aaf..a5811d4e0f12a 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_ps6.py @@ -77,7 +77,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -91,7 +91,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -107,7 +107,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -121,7 +121,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -136,7 +136,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -150,7 +150,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py index 517232fa54eb8..fae692f8fd57c 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_sparse_embedding_ctr.py @@ -211,7 +211,7 @@ def net(): datas = [dnn_data, lr_data, label] inference = True - init = fluid.initializer.Uniform() + init = paddle.nn.initializer.Uniform() dnn_layer_dims = [128, 64, 32] dnn_embedding = fluid.contrib.layers.sparse_embedding( @@ -232,7 +232,7 @@ def net(): size=dim, activation="relu", weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01) + initializer=paddle.nn.initializer.Constant(value=0.01) ), name='dnn-fc-%d' % i, ) @@ -245,7 +245,7 @@ def net(): is_test=inference, param_attr=fluid.ParamAttr( name="wide_embedding", - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), ), ) diff --git a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py index ba6e67a035095..ebcbfb9e4c4a6 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py +++ b/python/paddle/fluid/tests/unittests/test_dist_fleet_spmt.py @@ -75,7 +75,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=q, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -89,7 +89,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=q_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__q_fc__", learning_rate=base_lr, ), @@ -105,7 +105,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=pt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -119,7 +119,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=pt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), @@ -134,7 +134,7 @@ def get_loss(cos_q_pt, cos_q_nt): input=nt, size=[dict_dim, emb_dim], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__emb__", learning_rate=emb_lr, ), @@ -148,7 +148,7 @@ def get_loss(cos_q_pt, cos_q_nt): x=nt_ss, size=hid_dim, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.01), + initializer=paddle.nn.initializer.Constant(value=0.01), name="__fc__", learning_rate=base_lr, ), diff --git a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py index 368be77fdbbfb..5ab7ad21dbdc9 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py +++ b/python/paddle/fluid/tests/unittests/test_dist_sparse_load_ps0.py @@ -38,9 +38,7 @@ def net(self, emb_array, fc_array): size=[10, 10], param_attr=fluid.ParamAttr( name="embedding", - initializer=fluid.initializer.NumpyArrayInitializer( - emb_array - ), + initializer=paddle.nn.initializer.Assign(emb_array), ), ) @@ -50,9 +48,7 @@ def net(self, emb_array, fc_array): activation="relu", weight_attr=fluid.ParamAttr( name='fc', - initializer=fluid.initializer.NumpyArrayInitializer( - fc_array - ), + initializer=paddle.nn.initializer.Assign(fc_array), ), ) loss = paddle.mean(fc1) diff --git a/python/paddle/fluid/tests/unittests/test_dist_train.py b/python/paddle/fluid/tests/unittests/test_dist_train.py index 828b07baf7bbc..548f2bf8a0c83 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_train.py +++ b/python/paddle/fluid/tests/unittests/test_dist_train.py @@ -87,7 +87,9 @@ def init_serv(self, place): dtype='float32', name="X", ) - fluid.initializer.Constant(value=1.0)(x, main.global_block()) + paddle.nn.initializer.Constant(value=1.0)( + x, main.global_block() + ) ops._scale(x=x, scale=10.0, out=out_var) self.server_exe = fluid.Executor(place) @@ -108,7 +110,7 @@ def init_client(self, place, port): x = paddle.static.data(shape=[32, 32], dtype='float32', name='X') x.persistable = True - fluid.initializer.Constant(value=2.3)(x, main.global_block()) + paddle.nn.initializer.Constant(value=2.3)(x, main.global_block()) get_var = main.global_block().create_var( name="scale_0.tmp_0", # server side var @@ -116,7 +118,9 @@ def init_client(self, place, port): persistable=False, shape=[32, 32], ) - fluid.initializer.Constant(value=2.3)(get_var, main.global_block()) + paddle.nn.initializer.Constant(value=2.3)( + get_var, main.global_block() + ) # NOTE(zjl): `Send` is async send, which means that the sent # variable would be needed even though `Send` op runs. @@ -135,7 +139,7 @@ def run_local(self, place): main = fluid.Program() with fluid.program_guard(main): x = paddle.static.data(shape=[32, 32], dtype='float32', name='X') - fluid.initializer.Constant(value=2.3)(x, main.global_block()) + paddle.nn.initializer.Constant(value=2.3)(x, main.global_block()) o = paddle.scale(x=x, scale=10.0) exe = fluid.Executor(place) self.local_out = exe.run(main, fetch_list=[o]) diff --git a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py index e9b8f773c743b..e79a2f7276c00 100644 --- a/python/paddle/fluid/tests/unittests/test_dist_transpiler.py +++ b/python/paddle/fluid/tests/unittests/test_dist_transpiler.py @@ -356,7 +356,9 @@ def net_conf(self): size=[dict_size, embedding_size], param_attr=fluid.ParamAttr( name='emb', - initializer=fluid.initializer.Uniform(-init_width, init_width), + initializer=paddle.nn.initializer.Uniform( + -init_width, init_width + ), ), ) @@ -365,7 +367,8 @@ def net_conf(self): is_sparse=True, size=[dict_size, embedding_size], param_attr=fluid.ParamAttr( - name='emb_w', initializer=fluid.initializer.Constant(value=0.0) + name='emb_w', + initializer=paddle.nn.initializer.Constant(value=0.0), ), ) @@ -374,7 +377,8 @@ def net_conf(self): is_sparse=True, size=[dict_size, 1], param_attr=fluid.ParamAttr( - name='emb_b', initializer=fluid.initializer.Constant(value=0.0) + name='emb_b', + initializer=paddle.nn.initializer.Constant(value=0.0), ), ) @@ -1327,7 +1331,7 @@ def network_with_table(self, is_sparse, is_distributed): shape=[num_total_classes, 10], dtype='float32', name='nce_w', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) b_param = ( @@ -1337,7 +1341,7 @@ def network_with_table(self, is_sparse, is_distributed): shape=[num_total_classes, 1], dtype='float32', name='nce_b', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) @@ -1405,7 +1409,7 @@ def network_with_table(self, is_sparse, is_distributed): shape=[num_total_classes, 10], dtype='float32', name='hs_w', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) b_param = ( @@ -1415,7 +1419,7 @@ def network_with_table(self, is_sparse, is_distributed): shape=[3, 1], dtype='float32', name='hs_b', - initializer=fluid.initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) @@ -1424,7 +1428,7 @@ def network_with_table(self, is_sparse, is_distributed): is_sparse=is_sparse, size=[3, 3], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal( + initializer=paddle.nn.initializer.Normal( scale=1 / math.sqrt(num_total_classes) ) ), diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py index a12a17636bfc1..46977b13d7700 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_delete_vars.py @@ -41,7 +41,7 @@ def simple_fc_net(): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax') diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py index 5657eb174c303..05df1e96d7505 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_padding_rnn.py @@ -129,7 +129,7 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) @@ -138,7 +138,7 @@ def padding_rnn(input_embedding, len=3, init_hidden=None, init_cell=None): [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) bias_arr.append(bias_1) @@ -250,7 +250,7 @@ def encoder_static( [hidden_size * 2, hidden_size * 4], dtype="float32", name="fc_weight1_" + str(i), - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) @@ -259,7 +259,7 @@ def encoder_static( [hidden_size * 4], dtype="float32", name="fc_bias1_" + str(i), - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) bias_arr.append(bias_1) @@ -368,7 +368,7 @@ def encoder_static( is_sparse=False, param_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -406,7 +406,7 @@ def encoder_static( [hidden_size, vocab_size], dtype="float32", name="softmax_weight", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) @@ -414,7 +414,7 @@ def encoder_static( [vocab_size], dtype="float32", name='softmax_bias', - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py index bd4e08819570f..d9ce93c913017 100644 --- a/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_eager_deletion_recurrent_op.py @@ -312,7 +312,7 @@ def create_rnn_op(self): size=self.input_dim, weight_attr=ParamAttr( name='W', - initializer=fluid.initializer.ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, ) @@ -321,7 +321,7 @@ def create_rnn_op(self): size=self.input_dim, weight_attr=ParamAttr( name='U', - initializer=fluid.initializer.ConstantInitializer(0.0), + initializer=paddle.nn.initializer.Constant(0.0), ), bias_attr=False, ) diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index 9f05d354c463d..3fb03ac89f0d7 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -911,19 +911,19 @@ def func_fp16_initilaizer(self): 1, 3, bias_attr=False, - weight_attr=paddle.fluid.initializer.Uniform(), + weight_attr=paddle.nn.initializer.Uniform(), ) linear3 = paddle.nn.Linear( 1, 3, bias_attr=False, - weight_attr=paddle.fluid.initializer.TruncatedNormalInitializer(), + weight_attr=paddle.nn.initializer.TruncatedNormal(), ) linear4 = paddle.nn.Linear( 1, 3, bias_attr=False, - weight_attr=paddle.fluid.initializer.MSRAInitializer(), + weight_attr=paddle.nn.initializer.KaimingUniform(), ) res = [ linear1.weight.numpy(), diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py index ce83ba62acb97..00cc6c07aac8b 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d.py @@ -19,7 +19,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -97,10 +96,10 @@ def static_graph_case_1(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=self.act, data_format=self.data_format, ) @@ -515,10 +514,10 @@ def static_graph_case(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py index f45cf48afbf0d..2981748cf6178 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv2d_transpose.py @@ -19,7 +19,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -99,10 +98,10 @@ def static_graph_case_1(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), data_format=self.data_format, ) exe = fluid.Executor(self.place) @@ -523,10 +522,10 @@ def static_graph_case(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py index bdd8360f97174..62322f8e3dc8f 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d.py @@ -19,7 +19,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -97,10 +96,10 @@ def static_graph_case_1(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=self.act, data_format=self.data_format, ) @@ -490,10 +489,10 @@ def static_graph_case(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py index ae402c874e639..7a8549b1240aa 100644 --- a/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py +++ b/python/paddle/fluid/tests/unittests/test_functional_conv3d_transpose.py @@ -19,7 +19,6 @@ import paddle import paddle.fluid.dygraph as dg -import paddle.fluid.initializer as I import paddle.nn.functional as F from paddle import fluid @@ -99,10 +98,10 @@ def static_graph_case_1(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.weight), + param_attr=paddle.nn.initializer.Assign(self.weight), bias_attr=False if self.no_bias - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=self.act, data_format=self.data_format, ) @@ -548,10 +547,10 @@ def static_graph_case(self): padding=self.padding, dilation=self.dilation, groups=self.groups, - param_attr=I.NumpyArrayInitializer(self.filter), + param_attr=paddle.nn.initializer.Assign(self.filter), bias_attr=False if self.bias is None - else I.NumpyArrayInitializer(self.bias), + else paddle.nn.initializer.Assign(self.bias), act=None, data_format=self.data_format, ) diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py index 4d7fb60d4660e..83574bae6b462 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_act_pass.py @@ -37,11 +37,11 @@ def build_program(self, main_program, startup_program, use_cuda, seed=1): ) param_attr = fluid.ParamAttr( name='batch_norm_w', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name='batch_norm_b', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) hidden2 = paddle.static.nn.batch_norm( input=hidden1, diff --git a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py index d981ccbe14ccb..c00f10d91d4b4 100644 --- a/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py +++ b/python/paddle/fluid/tests/unittests/test_fuse_bn_add_act_pass.py @@ -31,33 +31,33 @@ class TestFusedBnAddActAPI(unittest.TestCase): def setUp(self): self.conv_param_attr1 = fluid.ParamAttr( name='conv2d_1.weight', - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) self.conv_param_attr2 = fluid.ParamAttr( name='conv2d_2.weight', - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) self.bn_param_attr1 = fluid.ParamAttr( name='batch_norm_w_1', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) self.bn_bias_attr1 = fluid.ParamAttr( name='batch_norm_b_1', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) self.bn_param_attr2 = fluid.ParamAttr( name='batch_norm_w_2', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) self.bn_bias_attr2 = fluid.ParamAttr( name='batch_norm_b_2', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) self.fc_param_attr = fluid.ParamAttr( name='fc.weight', - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ) def build_fused_program( diff --git a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py index 8068387cfdcba..9264c8f2e77c6 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_multi_transformer_op.py @@ -53,7 +53,7 @@ def setUp(self): self.__class__.no_need_check_grad = False bias_attr = paddle.fluid.ParamAttr( - initializer=paddle.fluid.initializer.Constant(value=0.0005) + initializer=paddle.paddle.nn.initializer.Constant(value=0.0005) ) self.q_proj = Linear( self.embed_dim, @@ -1027,16 +1027,16 @@ def config(self): self.has_attn_mask = False self.x_type = np.float32 self.weight_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(0.0) + initializer=paddle.paddle.nn.initializer.Constant(0.0) ) self.bias_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(0.0005) + initializer=paddle.paddle.nn.initializer.Constant(0.0005) ) self.ln_w_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(1.0) + initializer=paddle.paddle.nn.initializer.Constant(1.0) ) self.ln_b_attr = paddle.ParamAttr( - initializer=paddle.fluid.initializer.Constant(0.0) + initializer=paddle.paddle.nn.initializer.Constant(0.0) ) def test_fused_multi_transformer_op(self): diff --git a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py index fcbc91edee31e..b0625050b889f 100644 --- a/python/paddle/fluid/tests/unittests/test_generator_dataloader.py +++ b/python/paddle/fluid/tests/unittests/test_generator_dataloader.py @@ -62,7 +62,7 @@ def simple_fc_net(places, use_legacy_py_reader, use_double_buffer): size=hidden_size, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py index 75e5d1ee2ee15..abf0ba0ac2650 100644 --- a/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py +++ b/python/paddle/fluid/tests/unittests/test_hsigmoid_op.py @@ -20,7 +20,6 @@ import paddle import paddle.fluid as fluid -import paddle.fluid.initializer as I import paddle.nn.functional as F paddle.enable_static() @@ -302,7 +301,7 @@ def hs_net_conf(self, is_sparse): is_sparse=is_sparse, size=[3, 3], param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Normal(scale=1 / math.sqrt(3)) + initializer=paddle.nn.initializer.Normal(std=1 / math.sqrt(3)) ), ) @@ -555,8 +554,8 @@ def test_dygraph_api(self): x, labels, self.num_classes, weight, bias, path_table, path_code ) - weight_attr = I.NumpyArrayInitializer(self.weight_np) - bias_attr = I.NumpyArrayInitializer(self.bias_np) + weight_attr = paddle.nn.initializer.Assign(self.weight_np) + bias_attr = paddle.nn.initializer.Assign(self.bias_np) m = paddle.nn.HSigmoidLoss( self.feature_size, self.num_classes, @@ -593,10 +592,10 @@ def test_static_api(self): ) weight_attr = paddle.framework.ParamAttr( - initializer=I.NumpyArrayInitializer(self.weight_np) + initializer=paddle.nn.initializer.Assign(self.weight_np) ) bias_attr = paddle.framework.ParamAttr( - initializer=I.NumpyArrayInitializer(self.bias_np) + initializer=paddle.nn.initializer.Assign(self.bias_np) ) m = paddle.nn.HSigmoidLoss( self.feature_size, @@ -636,8 +635,8 @@ def test_fluid_api(self): if self.is_custom: path_table = fluid.data('path_table', [-1, -1], 'int64') path_code = fluid.data('path_code', [-1, -1], 'int64') - weight_attr = I.NumpyArrayInitializer(self.weight_np) - bias_attr = I.NumpyArrayInitializer(self.bias_np) + weight_attr = paddle.nn.initializer.Assign(self.weight_np) + bias_attr = paddle.nn.initializer.Assign(self.bias_np) loss = paddle.nn.HSigmoidLoss( feature_size=x.shape[1], num_classes=self.num_classes, diff --git a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py index f34c8d6a2a858..51e32c5259f45 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_deepcf.py @@ -123,7 +123,7 @@ def __init__(self, num_users, num_items, matrix): shape=matrix.shape, dtype=matrix.dtype, is_bias=False, - default_initializer=fluid.initializer.NumpyArrayInitializer(matrix), + default_initializer=paddle.nn.initializer.Assign(matrix), ) self._rating_matrix.stop_gradient = True diff --git a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py index 0eb037bc6a02e..af6e32ac6b897 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_lod_tensor_to_selected_rows.py @@ -47,7 +47,7 @@ def __init__( sparse=is_sparse, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -56,7 +56,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py index 12be3af2d9cf9..5c48252cb0b7f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ocr_attention_model.py @@ -77,12 +77,12 @@ def __init__( filter_size = 3 conv_std_0 = (2.0 / (filter_size**2 * channels[0])) ** 0.5 conv_param_0 = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, conv_std_0) + initializer=paddle.nn.initializer.Normal(0.0, conv_std_0) ) conv_std_1 = (2.0 / (filter_size**2 * channels[1])) ** 0.5 conv_param_1 = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, conv_std_1) + initializer=paddle.nn.initializer.Normal(0.0, conv_std_1) ) self.conv_0_layer = paddle.nn.Conv2D( @@ -200,10 +200,11 @@ def __init__( super().__init__() self.rnn_hidden_size = rnn_hidden_size para_attr = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02) + initializer=paddle.nn.initializer.Normal(0.0, 0.02) ) bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0 + initializer=paddle.nn.initializer.Normal(0.0, 0.02), + learning_rate=2.0, ) if fluid.framework._non_static_mode(): h_0 = np.zeros( diff --git a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py index 6eb0c9d6e6c03..8917230d52c4e 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_ptb_rnn.py @@ -51,26 +51,26 @@ def _create_parameter(self): for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -176,7 +176,7 @@ def __init__( sparse=is_sparse, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -185,7 +185,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -193,7 +193,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py index bc46ad12d3df0..2936b0730386f 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_resnet.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_resnet.py @@ -226,7 +226,7 @@ def __init__(self, layers=50, class_dim=102, use_cudnn=True): self.pool2d_avg_output, class_dim, weight_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv) + initializer=paddle.nn.initializer.Uniform(-stdv, stdv) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py index 7fd322d358366..2ef0b8afcc5c7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_save_load_v2.py @@ -48,26 +48,26 @@ def __init__( for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -172,7 +172,7 @@ def __init__( sparse=False, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -182,7 +182,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -190,7 +190,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py index 647710fba61f1..fb833c6525846 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_selected_rows_to_lod_tensor.py @@ -48,7 +48,7 @@ def __init__( sparse=is_sparse, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -57,7 +57,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.hidden_size, self.hidden_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -65,7 +65,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.hidden_size], dtype=dtype, - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py index e171899289aa4..46bd8890d21da 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_transformer_sorted_gradient.py @@ -399,10 +399,10 @@ def __init__(self, d_model, process_cmd, shape_len=None): self._layer_norm = paddle.nn.LayerNorm( normalized_shape=d_model, weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.0) + initializer=paddle.nn.initializer.Constant(0.0) ), ) @@ -662,7 +662,9 @@ def __init__( sparse=is_sparse, weight_attr=fluid.ParamAttr( name=word_emb_param_name, - initializer=fluid.initializer.Normal(0.0, src_emb_dim**-0.5), + initializer=paddle.nn.initializer.Normal( + 0.0, src_emb_dim**-0.5 + ), ), ) @@ -676,7 +678,7 @@ def __init__( sparse=is_sparse, weight_attr=fluid.ParamAttr( name=pos_enc_param_name, - initializer=fluid.initializer.NumpyArrayInitializer(pos_inp), + initializer=paddle.nn.initializer.Assign(pos_inp), trainable=False, ), ) diff --git a/python/paddle/fluid/tests/unittests/test_initializer.py b/python/paddle/fluid/tests/unittests/test_initializer.py index 07d9d7b48c29f..f87e62cb02098 100644 --- a/python/paddle/fluid/tests/unittests/test_initializer.py +++ b/python/paddle/fluid/tests/unittests/test_initializer.py @@ -20,7 +20,6 @@ import paddle import paddle.fluid as fluid import paddle.fluid.framework as framework -import paddle.fluid.initializer as initializer from paddle.fluid.core import VarDesc from paddle.regularizer import L2Decay @@ -67,7 +66,7 @@ def test_constant_initializer_default_value(self, dtype="float32"): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) num_ops = 1 self.assertEqual(len(block.ops), num_ops) @@ -86,7 +85,7 @@ def test_constant_initializer(self, dtype="float32"): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.ConstantInitializer(2.3), + initializer=paddle.nn.initializer.Constant(2.3), ) num_ops = 1 self.assertEqual(len(block.ops), num_ops) @@ -119,7 +118,7 @@ def test_uniform_initializer_default_value(self, dtype="float32"): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.UniformInitializer(), + initializer=paddle.nn.initializer.Uniform(), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -141,14 +140,14 @@ def test_uniform_initializer_random_seed(self): shape=[5, 10], lod_level=0, name="param1", - initializer=initializer.UniformInitializer(), + initializer=paddle.nn.initializer.Uniform(), ) block.create_parameter( dtype="float32", shape=[5, 10], lod_level=0, name="param2", - initializer=initializer.UniformInitializer(seed=456), + initializer=paddle.nn.initializer.UniformInitializer(seed=456), ) init_op = block.ops[1] self.assertEqual(init_op.attr("seed"), 456) @@ -165,7 +164,9 @@ def test_uniform_initializer(self, dtype="float32"): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.UniformInitializer(-4.2, 3.1, 123), + initializer=paddle.nn.initializer.UniformInitializer( + -4.2, 3.1, 123 + ), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -186,7 +187,9 @@ def test_uniform_initializer_two_op(self, dtype="float32"): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.UniformInitializer(-4.2, float(i), 123), + initializer=paddle.nn.initializer.UniformInitializer( + -4.2, float(i), 123 + ), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -226,7 +229,7 @@ def test_normal_initializer_default_value(self): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.NormalInitializer(), + initializer=paddle.nn.initializer.Normal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -245,7 +248,9 @@ def test_normal_initializer(self, dtype="float32"): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.NormalInitializer(2.3, 1.9, 123), + initializer=paddle.nn.initializer.NormalInitializer( + 2.3, 1.9, 123 + ), ) num_ops = 1 self.assertEqual(len(block.ops), num_ops) @@ -278,7 +283,7 @@ def test_uniform_xavier_initializer(self): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.XavierInitializer(), + initializer=paddle.nn.initializer.XavierUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -300,7 +305,7 @@ def test_uniform_xavier_initializer_conv(self): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.XavierInitializer(), + initializer=paddle.nn.initializer.XavierUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -325,7 +330,7 @@ def test_normal_xavier_initializer(self): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.XavierInitializer(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -347,7 +352,7 @@ def test_normal_xavier_initializer_conv(self): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.XavierInitializer(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -372,7 +377,7 @@ def test_xavier_initializer_supplied_arguments( shape=[5, 10], lod_level=0, name="param", - initializer=initializer.XavierInitializer( + initializer=paddle.nn.initializer.XavierInitializer( uniform=uniform, fan_in=12, fan_out=23, seed=134 ), ) @@ -421,7 +426,7 @@ def test_uniform_msra_initializer(self): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(), + initializer=paddle.nn.initializer.KaimingUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -443,7 +448,7 @@ def test_uniform_msra_initializer_conv(self): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(), + initializer=paddle.nn.initializer.KaimingUniform(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -466,7 +471,7 @@ def test_normal_msra_initializer(self): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(uniform=False), + initializer=paddle.nn.initializer.KaimingNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -488,7 +493,7 @@ def test_normal_msra_initializer_conv(self): shape=[5, 10, 15, 20], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(uniform=False), + initializer=paddle.nn.initializer.KaimingNormal(), ) self.assertEqual(len(block.ops), 1) init_op = block.ops[0] @@ -509,7 +514,9 @@ def test_msra_initializer_supplied_arguments(self, dtype="float32"): shape=[5, 10], lod_level=0, name="param", - initializer=initializer.MSRAInitializer(fan_in=12, seed=134), + initializer=paddle.nn.initializer.MSRAInitializer( + fan_in=12, seed=134 + ), ) num_ops = 2 if dtype == "float16" else 1 self.assertEqual(len(block.ops), num_ops) @@ -542,7 +549,7 @@ def test_bilinear_initializer(self, dtype="float32"): shape=[8, 1, 3, 3], lod_level=0, name="param", - initializer=initializer.BilinearInitializer(), + initializer=paddle.nn.initializer.Bilinear(), ) num_ops = 2 if dtype in ["float16", "uint16", "float64"] else 1 self.assertEqual(len(block.ops), num_ops) @@ -576,7 +583,7 @@ def func_test_case(self): w_attr = paddle.ParamAttr( learning_rate=0.0, regularizer=L2Decay(0.0), - initializer=initializer.BilinearInitializer(), + initializer=paddle.nn.initializer.Bilinear(), ) data = paddle.rand([B, 3, H, W], dtype='float32') conv_up = paddle.nn.Conv2DTranspose( @@ -597,7 +604,7 @@ def func_test_case_fp16(self): w_attr = paddle.ParamAttr( learning_rate=0.0, regularizer=L2Decay(0.0), - initializer=initializer.BilinearInitializer(), + initializer=paddle.nn.initializer.Bilinear(), ) conv2d = paddle.nn.Conv2D(1, 2, 3, weight_attr=w_attr) paddle.set_default_dtype("float32") @@ -632,7 +639,7 @@ def test_numpy_array_initializer(self, dtype="float32"): shape=np_array.shape, lod_level=0, name="param", - initializer=initializer.NumpyArrayInitializer(np_array), + initializer=paddle.nn.initializer.Assign(np_array), ) num_ops = 2 if dtype in ["float16", "uint16"] else 1 self.assertEqual(len(block.ops), num_ops) @@ -657,7 +664,9 @@ def test_set_global_weight_initilizer(self): """Test Set Global Param initilizer with UniformInitializer""" main_prog = framework.Program() startup_prog = framework.Program() - fluid.set_global_initializer(initializer.Uniform(low=-0.5, high=0.5)) + fluid.set_global_initializer( + paddle.nn.initializer.Uniform(low=-0.5, high=0.5) + ) with fluid.program_guard(main_prog, startup_prog): x = fluid.data(name="x", shape=[1, 3, 32, 32]) # default initilizer of param in layers.conv2d is NormalInitializer @@ -683,8 +692,8 @@ def test_set_global_bias_initilizer(self): main_prog = framework.Program() startup_prog = framework.Program() fluid.set_global_initializer( - initializer.Uniform(low=-0.5, high=0.5), - bias_init=initializer.Normal(loc=0.0, scale=2.0), + paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_init=paddle.nn.initializer.Normal(0.0, 2.0), ) with fluid.program_guard(main_prog, startup_prog): x = fluid.data(name="x", shape=[1, 3, 32, 32]) @@ -746,9 +755,7 @@ def test_xvarier_initializer(self, dtype="float32"): tensor = paddle.zeros([1024, 1024, 16]) tensor.stop_gradient = False - xavier_ = paddle.fluid.initializer.XavierInitializer( - uniform=False, fan_in=3, fan_out=5 - ) + xavier_ = paddle.nn.initializer.XavierNormal(fan_in=3, fan_out=5) xavier_(tensor) hist, _ = output_hist(tensor.numpy()) @@ -771,9 +778,7 @@ def test_msra_initializer(self, dtype="float32"): tensor = paddle.zeros([1024, 1024, 16]) tensor.stop_gradient = False - msra_ = paddle.fluid.initializer.MSRAInitializer( - uniform=False, fan_in=4 - ) + msra_ = paddle.nn.initializer.KaimingNormal(fan_in=4) msra_(tensor) hist, _ = output_hist(tensor.numpy()) @@ -1188,7 +1193,7 @@ def func_kaiminguniform_initializer_fan_in_zero(self): def test_type_error(self): self.assertRaises( - ValueError, self.func_kaiminguniform_initializer_fan_in_zero + ZeroDivisionError, self.func_kaiminguniform_initializer_fan_in_zero ) diff --git a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py index bc4ef3d386ccb..7dcf964c41e31 100644 --- a/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py +++ b/python/paddle/fluid/tests/unittests/test_ir_inplace_pass.py @@ -34,7 +34,7 @@ def fc_with_batchnorm(use_feed): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py index 51715e2ae1ce2..192585e6c16db 100644 --- a/python/paddle/fluid/tests/unittests/test_layers.py +++ b/python/paddle/fluid/tests/unittests/test_layers.py @@ -138,7 +138,9 @@ def test_linear(self): name='data', shape=[3, 32, 32], dtype='float32' ) linear = paddle.nn.Linear( - 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1) + 32, + 4, + bias_attr=paddle.nn.initializer.Constant(value=1), ) ret = linear(t) static_ret = self.get_static_graph_result( @@ -147,7 +149,9 @@ def test_linear(self): with self.dynamic_graph(): t = base.to_variable(inp) linear = paddle.nn.Linear( - 32, 4, bias_attr=fluid.initializer.ConstantInitializer(value=1) + 32, + 4, + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_ret = linear(t) dy_ret_value = dy_ret.numpy() @@ -162,7 +166,7 @@ def test_Variable(): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret1 = linear(inp) @@ -175,7 +179,7 @@ def test_type(): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret2 = linear(inp) @@ -248,7 +252,7 @@ def test_Variable(): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret1 = linear(inp) @@ -261,7 +265,7 @@ def test_type(): linear = paddle.nn.Linear( 32, 4, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) linear_ret2 = linear(inp) @@ -396,7 +400,7 @@ def test_conv2d_transpose(self): num_filters=10, filter_size=27, act='sigmoid', - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) static_rlt = self.get_static_graph_result( feed={'pixel': inp_np}, fetch_list=[out] @@ -409,7 +413,7 @@ def test_conv2d_transpose(self): 3, 10, 27, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) out = conv2d_transpose(img) out = paddle.nn.functional.sigmoid(out) @@ -421,7 +425,7 @@ def test_conv2d_transpose(self): 3, 10, 27, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_rlt = conv2d_transpose(base.to_variable(inp_np)) dy_rlt = paddle.nn.functional.sigmoid(dy_rlt) @@ -433,9 +437,7 @@ def test_conv2d_transpose(self): images = np.ones([2, 3, 5, 5], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) conv2d1 = paddle.nn.Conv2DTranspose(3, 3, [2, 2]) conv2d2 = paddle.nn.Conv2DTranspose( @@ -503,7 +505,7 @@ def test_bilinear_tensor_product(self): data_x, data_y, 6, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), act='sigmoid', ) @@ -518,7 +520,7 @@ def test_bilinear_tensor_product(self): 3, 3, 6, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) out = btp(data_x, data_y) out = paddle.nn.functional.sigmoid(out) @@ -530,7 +532,7 @@ def test_bilinear_tensor_product(self): 3, 3, 6, - bias_attr=fluid.initializer.ConstantInitializer(value=1), + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_rlt = btp(base.to_variable(inp_np_x), base.to_variable(inp_np_y)) dy_rlt = paddle.nn.functional.sigmoid(dy_rlt) @@ -566,9 +568,7 @@ def test_bilinear_tensor_product(self): with self.dynamic_graph(): custom_weight = np.random.randn(6, 3, 3).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) btp1 = paddle.nn.Bilinear(3, 3, 6) btp2 = paddle.nn.Bilinear(3, 3, 6, weight_attr=weight_attr) @@ -641,9 +641,7 @@ def test_embeding(self): with self.dynamic_graph(): custom_weight = np.random.randn(dict_size, 32).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) emb1 = paddle.nn.Embedding(dict_size, 32, sparse=False) emb2 = paddle.nn.Embedding( @@ -741,9 +739,7 @@ def test_conv3d(self): images = np.ones([2, 3, 6, 6, 6], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2, 2).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) conv3d1 = paddle.nn.Conv3D( in_channels=3, out_channels=3, kernel_size=2 @@ -798,8 +794,8 @@ def test_group_norm(self): ret = paddle.static.nn.group_norm( input=X, groups=2, - param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), - bias_attr=fluid.initializer.ConstantInitializer(value=1), + param_attr=paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_attr=paddle.nn.initializer.Constant(value=1), ) static_ret = self.get_static_graph_result( feed={ @@ -818,8 +814,8 @@ def test_group_norm(self): groupNorm = paddle.nn.GroupNorm( num_channels=shape[1], num_groups=2, - weight_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), - bias_attr=fluid.initializer.ConstantInitializer(value=1), + weight_attr=paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_attr=paddle.nn.initializer.Constant(value=1), ) ret = groupNorm(X) static_ret2 = self.get_static_graph_result( @@ -836,8 +832,8 @@ def test_group_norm(self): groupNorm = paddle.nn.GroupNorm( num_channels=shape[1], num_groups=2, - weight_attr=fluid.initializer.Uniform(low=-0.5, high=0.5), - bias_attr=fluid.initializer.ConstantInitializer(value=1), + weight_attr=paddle.nn.initializer.Uniform(low=-0.5, high=0.5), + bias_attr=paddle.nn.initializer.Constant(value=1), ) dy_ret = groupNorm(base.to_variable(input)) dy_rlt_value = dy_ret.numpy() @@ -990,9 +986,7 @@ def test_conv3d_transpose(self): images = np.ones([2, 3, 6, 6, 6], dtype='float32') custom_weight = np.random.randn(3, 3, 2, 2, 2).astype("float32") weight_attr = fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - custom_weight - ) + initializer=paddle.nn.initializer.Assign(custom_weight) ) conv3d1 = paddle.nn.Conv3DTranspose( in_channels=3, @@ -2213,13 +2207,13 @@ def test_batch_fc(self): param_attr=fluid.ParamAttr( learning_rate=1.0, name="w_0", - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ), bias_size=[16, 10], bias_attr=fluid.ParamAttr( learning_rate=1.0, name="b_0", - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ), act="relu", ) @@ -2238,7 +2232,7 @@ def test_rank_attention(self): rank_param_attr=fluid.ParamAttr( learning_rate=1.0, name="ubm_rank_param.w_0", - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), ), max_rank=3, ) diff --git a/python/paddle/fluid/tests/unittests/test_linear.py b/python/paddle/fluid/tests/unittests/test_linear.py index 71f5c831ae4b6..36496004b18d5 100644 --- a/python/paddle/fluid/tests/unittests/test_linear.py +++ b/python/paddle/fluid/tests/unittests/test_linear.py @@ -50,14 +50,14 @@ def paddle_nn_layer(self, place): learning_rate=1.0, trainable=False, regularizer=None, - initializer=paddle.fluid.initializer.ConstantInitializer(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name="linear_bias", learning_rate=1.0, trainable=False, regularizer=None, - initializer=paddle.fluid.initializer.ConstantInitializer(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) linear = paddle.nn.Linear( 2, 2, weight_attr=weight_attr, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py index cc11e96f5a915..649a2e5937c3c 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_bf16_op.py @@ -217,7 +217,7 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): """ def set_initializer(self): - self.initializer = fluid.initializer.Constant(value=self.value) + self.initializer = paddle.nn.initializer.Constant(value=self.value) def setUp(self): self.ids_shape = [4, 1] diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py index 8cbc6242b3af9..0f6affcd26c07 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_bf16_op.py @@ -84,7 +84,7 @@ class TestEmbeddingLayerBF16ConstantInitializer(unittest.TestCase): """ def set_initializer(self): - self.initializer = fluid.initializer.Constant(value=self.value) + self.initializer = paddle.nn.initializer.Constant(value=self.value) def setUp(self): self.op_type = "lookup_table_v2" diff --git a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py index 74b6eec7198c6..6aea5ef118c11 100644 --- a/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py +++ b/python/paddle/fluid/tests/unittests/test_lookup_table_v2_op.py @@ -209,9 +209,7 @@ def get_w_grad(self, is_sparse): param_attr=fluid.ParamAttr( name="emb_weight", learning_rate=10, - initializer=fluid.initializer.NumpyArrayInitializer( - self.w_data - ), + initializer=paddle.nn.initializer.Assign(self.w_data), ), is_sparse=is_sparse, ) diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py index a4dc9f33279db..bdc4af3bdcd32 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_iterable_dataset_static.py @@ -58,10 +58,10 @@ def simple_fc_net_static(): label = fluid.data(name='label', shape=[None, 1], dtype='int64') hidden = image param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.8) + initializer=paddle.nn.initializer.Constant(value=0.8) ) bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ) for hidden_size in [10, 20, 30]: hidden = paddle.static.nn.fc( diff --git a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py index 24c008a60271f..a38c77386a67a 100644 --- a/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py +++ b/python/paddle/fluid/tests/unittests/test_multiprocess_dataloader_static.py @@ -58,10 +58,10 @@ def simple_fc_net_static(): label = fluid.data(name='label', shape=[None, 1], dtype='int64') hidden = image param_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.8) + initializer=paddle.nn.initializer.Constant(value=0.8) ) bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ) for hidden_size in [10, 20, 30]: hidden = paddle.static.nn.fc( diff --git a/python/paddle/fluid/tests/unittests/test_nce.py b/python/paddle/fluid/tests/unittests/test_nce.py index e2923da7113df..80787e7fd3f38 100644 --- a/python/paddle/fluid/tests/unittests/test_nce.py +++ b/python/paddle/fluid/tests/unittests/test_nce.py @@ -19,7 +19,6 @@ import paddle import paddle.fluid as fluid -import paddle.fluid.initializer as initializer from paddle.fluid import Program, program_guard @@ -199,7 +198,7 @@ def train_network( shape=[num_total_classes, 10], dtype='float32', name='nce_w', - initializer=initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) b_param = ( @@ -209,7 +208,7 @@ def train_network( shape=[num_total_classes, 1], dtype='float32', name='nce_b', - initializer=initializer.ConstantInitializer(), + initializer=paddle.nn.initializer.Constant(), ) ) diff --git a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py index d89af631baa45..95df8aa0be0ac 100644 --- a/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py +++ b/python/paddle/fluid/tests/unittests/test_nn_functional_embedding_static.py @@ -27,7 +27,7 @@ def test_1(self): with fluid.program_guard(prog): def test_bad_x(): - initializer = fluid.initializer.NumpyArrayInitializer( + initializer = paddle.nn.initializer.Assign( np.random.random(size=(128, 100)) ) @@ -59,7 +59,7 @@ def test_2(self): with fluid.program_guard(prog): def test_bad_x(): - initializer = fluid.initializer.NumpyArrayInitializer( + initializer = paddle.nn.initializer.Assign( np.random.random(size=(128, 100)) ) diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py index fc5fbec82cd0a..626521577d173 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_grad.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_grad.py @@ -81,20 +81,20 @@ def build_net(self, cond_i, use_bf16=False): dtype="float32", shape=self.shape, attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_x"), - default_initializer=fluid.initializer.NumpyArrayInitializer(self.x), + default_initializer=paddle.nn.initializer.Assign(self.x), ) param_y = paddle.create_parameter( dtype="float32", shape=self.shape, attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_y"), - default_initializer=fluid.initializer.NumpyArrayInitializer(self.y), + default_initializer=paddle.nn.initializer.Assign(self.y), ) param_z = paddle.create_parameter( dtype="float32", shape=self.shape, attr=fluid.ParamAttr(learning_rate=self.param_lr, name="param_z"), - default_initializer=fluid.initializer.NumpyArrayInitializer(self.z), + default_initializer=paddle.nn.initializer.Assign(self.z), ) sum_xy = paddle.add(param_x, param_y, name='sum_xy') diff --git a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py index 3b32c9ca4ee78..ab9b99d8cb249 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer_in_control_flow.py @@ -51,10 +51,10 @@ def double_fc_net(image): size=FC_SIZE, activation='relu', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.99) + initializer=paddle.nn.initializer.Constant(value=0.99) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.5) + initializer=paddle.nn.initializer.Constant(value=0.5) ), name="hidden", ) @@ -64,10 +64,10 @@ def double_fc_net(image): size=CLASS_NUM, activation='softmax', weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.2) + initializer=paddle.nn.initializer.Constant(value=1.2) ), bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.8) + initializer=paddle.nn.initializer.Constant(value=0.8) ), name="prediction", ) diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py index 850ddc379c609..1f6429620f689 100644 --- a/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py +++ b/python/paddle/fluid/tests/unittests/test_parallel_executor_mnist.py @@ -33,7 +33,7 @@ def simple_fc_net(use_feed): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax') @@ -56,7 +56,7 @@ def fc_with_batchnorm(use_feed): size=200, activation='tanh', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_parameter.py b/python/paddle/fluid/tests/unittests/test_parameter.py index 5ce6f31318395..909feb2a48ff3 100644 --- a/python/paddle/fluid/tests/unittests/test_parameter.py +++ b/python/paddle/fluid/tests/unittests/test_parameter.py @@ -23,7 +23,6 @@ from paddle.fluid.dygraph import guard from paddle.fluid.executor import Executor from paddle.fluid.framework import ParamBase, Variable, default_main_program -from paddle.fluid.initializer import ConstantInitializer paddle.enable_static() main_program = default_main_program() @@ -38,7 +37,7 @@ def test_parameter(self): name='fc.w', shape=shape, dtype='float32', - initializer=ConstantInitializer(val), + initializer=paddle.nn.initializer.Constant(val), ) self.assertIsNotNone(param) self.assertEqual('fc.w', param.name) diff --git a/python/paddle/fluid/tests/unittests/test_prelu_op.py b/python/paddle/fluid/tests/unittests/test_prelu_op.py index 9c95d5b946ce4..4a4d5921bbb94 100644 --- a/python/paddle/fluid/tests/unittests/test_prelu_op.py +++ b/python/paddle/fluid/tests/unittests/test_prelu_op.py @@ -153,7 +153,7 @@ def test_dygraph_api(self): x = paddle.to_tensor(self.x_np) m = paddle.nn.PReLU( weight_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(0.5) + initializer=paddle.nn.initializer.Constant(0.5) ) ) out = m(x) @@ -438,7 +438,7 @@ def prelu_t(x, mode, param_attr=None, name=None, data_format='NCHW'): shape=alpha_shape, dtype='float32', is_bias=False, - default_initializer=fluid.initializer.ConstantInitializer(0.25), + default_initializer=paddle.nn.initializer.Constant(0.25), ) out = helper.create_variable_for_type_inference(dtype) helper.append_op( diff --git a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py index 5364dcaa6e14a..885c8fa829aa9 100755 --- a/python/paddle/fluid/tests/unittests/test_program_prune_backward.py +++ b/python/paddle/fluid/tests/unittests/test_program_prune_backward.py @@ -40,7 +40,7 @@ def simple_fc_net_with_accuracy(use_feed): size=200, activation='relu', bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax') diff --git a/python/paddle/fluid/tests/unittests/test_prune.py b/python/paddle/fluid/tests/unittests/test_prune.py index 30e3aefe0a738..a93516da417a4 100644 --- a/python/paddle/fluid/tests/unittests/test_prune.py +++ b/python/paddle/fluid/tests/unittests/test_prune.py @@ -170,7 +170,7 @@ def net1(self): w_param_attrs = fluid.ParamAttr( name="fc_weight", learning_rate=0.5, - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=True, ) y = paddle.static.nn.fc( @@ -198,13 +198,13 @@ def net2(self): w1_param_attrs = fluid.ParamAttr( name="fc_weight1", learning_rate=0.5, - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=True, ) w2_param_attrs = fluid.ParamAttr( name="fc_weight2", learning_rate=0.5, - initializer=fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=True, ) y1 = paddle.static.nn.fc( diff --git a/python/paddle/fluid/tests/unittests/test_py_func_op.py b/python/paddle/fluid/tests/unittests/test_py_func_op.py index a90e37a4755c2..526e08e9d5940 100644 --- a/python/paddle/fluid/tests/unittests/test_py_func_op.py +++ b/python/paddle/fluid/tests/unittests/test_py_func_op.py @@ -79,7 +79,7 @@ def simple_fc_net(img, label, use_py_func_op): hidden, size=200, bias_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ), ) if not use_py_func_op: diff --git a/python/paddle/fluid/tests/unittests/test_random_seed.py b/python/paddle/fluid/tests/unittests/test_random_seed.py index 856b2be783d36..0798fa8864f42 100644 --- a/python/paddle/fluid/tests/unittests/test_random_seed.py +++ b/python/paddle/fluid/tests/unittests/test_random_seed.py @@ -378,15 +378,15 @@ def test_gen_TruncatedNormal_initializer(self): result_1 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) result_2 = paddle.static.nn.fc( x, size=10, - weight_attr=fluid.initializer.TruncatedNormal( - loc=0.0, scale=2.0 + weight_attr=paddle.nn.initializer.TruncatedNormal( + mean=0.0, std=2.0 ), ) diff --git a/python/paddle/fluid/tests/unittests/test_recurrent_op.py b/python/paddle/fluid/tests/unittests/test_recurrent_op.py index 4ea5ed0e0d35f..8991b143846c4 100644 --- a/python/paddle/fluid/tests/unittests/test_recurrent_op.py +++ b/python/paddle/fluid/tests/unittests/test_recurrent_op.py @@ -301,7 +301,7 @@ def create_rnn_op(self): size=self.input_dim, weight_attr=ParamAttr( name='W', - initializer=fluid.initializer.ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, ) @@ -310,7 +310,7 @@ def create_rnn_op(self): size=self.input_dim, weight_attr=ParamAttr( name='U', - initializer=fluid.initializer.ConstantInitializer(0.0), + initializer=paddle.nn.initializer.Constant(0.0), ), bias_attr=False, ) @@ -686,7 +686,7 @@ def create_rnn_op(self): size=self.input_dim, weight_attr=ParamAttr( name="W", - initializer=fluid.initializer.ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, ) @@ -695,7 +695,7 @@ def create_rnn_op(self): size=self.input_dim, weight_attr=ParamAttr( name="U", - initializer=fluid.initializer.ConstantInitializer(0.0), + initializer=paddle.nn.initializer.Constant(0.0), ), bias_attr=False, ) diff --git a/python/paddle/fluid/tests/unittests/test_row_conv_op.py b/python/paddle/fluid/tests/unittests/test_row_conv_op.py index d160a9982577f..408a5f8a7405e 100644 --- a/python/paddle/fluid/tests/unittests/test_row_conv_op.py +++ b/python/paddle/fluid/tests/unittests/test_row_conv_op.py @@ -197,7 +197,7 @@ def check_identity(self): out = paddle.static.nn.row_conv( x, self.context_length, - param_attr=fluid.initializer.NumpyArrayInitializer(self.w), + param_attr=paddle.nn.initializer.Assign(self.w), ) place = fluid.CPUPlace() exe = fluid.Executor(place) diff --git a/python/paddle/fluid/tests/unittests/test_run_program_op.py b/python/paddle/fluid/tests/unittests/test_run_program_op.py index fe012ded3993e..73ad833a3efe2 100644 --- a/python/paddle/fluid/tests/unittests/test_run_program_op.py +++ b/python/paddle/fluid/tests/unittests/test_run_program_op.py @@ -403,7 +403,7 @@ def build_model(self): weight_attr = fluid.ParamAttr( name=self.input_names['Params'][0], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][0]] ), trainable=True, @@ -411,7 +411,7 @@ def build_model(self): bias_attr = fluid.ParamAttr( name=self.input_names['Params'][1], learning_rate=0.5, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][1]] ), trainable=True, @@ -469,7 +469,7 @@ def build_model(self): param_attr=fluid.ParamAttr( name="emb_weight", learning_rate=10, - initializer=fluid.initializer.NumpyArrayInitializer( + initializer=paddle.nn.initializer.Assign( self.inputs['Params'][self.input_names['Params'][0]] ), ), diff --git a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py index c599f08ae2bf9..3424d393952b3 100644 --- a/python/paddle/fluid/tests/unittests/test_set_bool_attr.py +++ b/python/paddle/fluid/tests/unittests/test_set_bool_attr.py @@ -26,11 +26,11 @@ def test_set_bool_attr(self): ) param_attr = fluid.ParamAttr( name='batch_norm_w', - initializer=fluid.initializer.Constant(value=1.0), + initializer=paddle.nn.initializer.Constant(value=1.0), ) bias_attr = fluid.ParamAttr( name='batch_norm_b', - initializer=fluid.initializer.Constant(value=0.0), + initializer=paddle.nn.initializer.Constant(value=0.0), ) bn = paddle.static.nn.batch_norm( input=x, param_attr=param_attr, bias_attr=bias_attr diff --git a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py index 89515c931c250..c63be2c6f2be8 100644 --- a/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py +++ b/python/paddle/fluid/tests/unittests/test_sgd_op_bf16.py @@ -322,7 +322,7 @@ def _check_output( print(e) def _set_initializer(self): - self.initializer = fluid.initializer.Constant(value=self.value) + self.initializer = paddle.nn.initializer.Constant(value=self.value) def _data_reader(self): for sample in range(self.sample_count): diff --git a/python/paddle/fluid/tests/unittests/test_static_save_load.py b/python/paddle/fluid/tests/unittests/test_static_save_load.py index d043e3785c498..0ac2644d90a11 100644 --- a/python/paddle/fluid/tests/unittests/test_static_save_load.py +++ b/python/paddle/fluid/tests/unittests/test_static_save_load.py @@ -59,26 +59,26 @@ def __init__( for i in range(self._num_layers): weight_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 2, self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ), ) self.weight_1_arr.append(self.add_parameter('w_%d' % i, weight_1)) bias_1 = self.create_parameter( attr=fluid.ParamAttr( - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-self._init_scale, high=self._init_scale ) ), shape=[self._hidden_size * 4], dtype="float32", - default_initializer=fluid.initializer.Constant(0.0), + default_initializer=paddle.nn.initializer.Constant(0.0), ) self.bias_arr.append(self.add_parameter('b_%d' % i, bias_1)) @@ -184,7 +184,7 @@ def __init__( embedding_dim=hidden_size, weight_attr=fluid.ParamAttr( name='embedding_para', - initializer=fluid.initializer.UniformInitializer( + initializer=paddle.nn.initializer.Uniform( low=-init_scale, high=init_scale ), ), @@ -193,7 +193,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.hidden_size, self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) @@ -201,7 +201,7 @@ def __init__( attr=fluid.ParamAttr(), shape=[self.vocab_size], dtype="float32", - default_initializer=fluid.initializer.UniformInitializer( + default_initializer=paddle.nn.initializer.Uniform( low=-self.init_scale, high=self.init_scale ), ) diff --git a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py index 2481a48f01793..eaa139714660f 100644 --- a/python/paddle/fluid/tests/unittests/test_tdm_child_op.py +++ b/python/paddle/fluid/tests/unittests/test_tdm_child_op.py @@ -151,9 +151,7 @@ def test_shape(self): node_nums=26, child_nums=2, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - tree_info_np - ) + initializer=paddle.nn.initializer.Assign(tree_info_np) ), ) diff --git a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py index 217d84b4b9f8a..c54c6c0c9de02 100644 --- a/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py +++ b/python/paddle/fluid/tests/unittests/test_tdm_sampler_op.py @@ -290,12 +290,10 @@ def test_shape(self): layer_node_num_list, leaf_node_num, tree_travel_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer( - travel_array - ) + initializer=paddle.nn.initializer.Assign(travel_array) ), tree_layer_attr=fluid.ParamAttr( - initializer=fluid.initializer.NumpyArrayInitializer(layer_array) + initializer=paddle.nn.initializer.Assign(layer_array) ), output_positive=True, output_list=True, diff --git a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py index c31d763dbff7c..407d70b4dadf3 100644 --- a/python/paddle/fluid/tests/unittests/test_uniform_random_op.py +++ b/python/paddle/fluid/tests/unittests/test_uniform_random_op.py @@ -286,7 +286,7 @@ def test_api(self): y = paddle.static.nn.fc( x, size=16, - weight_attr=fluid.initializer.Uniform( + weight_attr=paddle.nn.initializer.UniformInitializer( low=-0.5, high=0.5, seed=10, diff --git a/python/paddle/fluid/tests/unittests/test_weight_normalization.py b/python/paddle/fluid/tests/unittests/test_weight_normalization.py index 17a05bdb01caa..f649fe1a28152 100644 --- a/python/paddle/fluid/tests/unittests/test_weight_normalization.py +++ b/python/paddle/fluid/tests/unittests/test_weight_normalization.py @@ -20,7 +20,6 @@ import paddle import paddle.fluid as fluid import paddle.fluid.core as core -from paddle.fluid.initializer import ConstantInitializer from paddle.fluid.param_attr import WeightNormParamAttr @@ -44,7 +43,7 @@ def set_program(cls): weight_attr=WeightNormParamAttr( dim=None, name='weight_norm_param', - initializer=ConstantInitializer(1.0), + initializer=paddle.nn.initializer.Constant(1.0), ), bias_attr=False, activation=None, diff --git a/python/paddle/fluid/tests/unittests/transformer_model.py b/python/paddle/fluid/tests/unittests/transformer_model.py index 42436b6e242b4..d847ac9ee4433 100644 --- a/python/paddle/fluid/tests/unittests/transformer_model.py +++ b/python/paddle/fluid/tests/unittests/transformer_model.py @@ -76,8 +76,8 @@ def __compute_qkv(queries, keys, values, n_head, d_key, d_value): q = paddle.static.nn.fc( x=queries, size=d_key * n_head, - weight_attr=fluid.initializer.Xavier( - uniform=False, fan_in=d_model * d_key, fan_out=n_head * d_key + weight_attr=paddle.nn.initializer.XavierNormal( + fan_in=d_model * d_key, fan_out=n_head * d_key ), bias_attr=False, num_flatten_dims=2, @@ -85,8 +85,8 @@ def __compute_qkv(queries, keys, values, n_head, d_key, d_value): k = paddle.static.nn.fc( x=keys, size=d_key * n_head, - weight_attr=fluid.initializer.Xavier( - uniform=False, fan_in=d_model * d_key, fan_out=n_head * d_key + weight_attr=paddle.nn.initializer.XavierNormal( + fan_in=d_model * d_key, fan_out=n_head * d_key ), bias_attr=False, num_flatten_dims=2, @@ -94,8 +94,7 @@ def __compute_qkv(queries, keys, values, n_head, d_key, d_value): v = paddle.static.nn.fc( x=values, size=d_value * n_head, - weight_attr=fluid.initializer.Xavier( - uniform=False, + weight_attr=paddle.nn.initializer.XavierNormal( fan_in=d_model * d_value, fan_out=n_head * d_value, ), @@ -187,7 +186,7 @@ def __softmax(x, eps=1e-9): proj_out = paddle.static.nn.fc( x=out, size=d_model, - weight_attr=fluid.initializer.Xavier(uniform=False), + weight_attr=paddle.nn.initializer.XavierNormal(), bias_attr=False, num_flatten_dims=2, ) @@ -204,7 +203,7 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid): x, size=d_inner_hid, num_flatten_dims=2, - weight_attr=fluid.initializer.Uniform( + weight_attr=paddle.nn.initializer.Uniform( low=-(d_hid**-0.5), high=(d_hid**-0.5) ), activation="relu", @@ -213,7 +212,7 @@ def positionwise_feed_forward(x, d_inner_hid, d_hid): x=hidden, size=d_hid, num_flatten_dims=2, - weight_attr=fluid.initializer.Uniform( + weight_attr=paddle.nn.initializer.Uniform( low=-(d_inner_hid**-0.5), high=(d_inner_hid**-0.5) ), ) @@ -235,8 +234,8 @@ def pre_post_process_layer(prev_out, out, process_cmd, dropout=0.0): out = paddle.static.nn.layer_norm( out, begin_norm_axis=len(out.shape) - 1, - param_attr=fluid.initializer.Constant(1.0), - bias_attr=fluid.initializer.Constant(0.0), + param_attr=paddle.nn.initializer.Constant(1.0), + bias_attr=paddle.nn.initializer.Constant(0.0), ) elif cmd == "d": # add dropout if dropout: @@ -269,7 +268,7 @@ def prepare_encoder( src_word, size=[src_vocab_size, src_emb_dim], padding_idx=src_pad_idx, - param_attr=fluid.initializer.Normal(0.0, 1.0), + param_attr=paddle.nn.initializer.Normal(0.0, 1.0), ) src_pos_enc = layers.embedding( src_pos, @@ -587,7 +586,7 @@ def transformer( x=paddle.static.nn.fc( x=dec_output, size=trg_vocab_size, - weight_attr=fluid.initializer.Xavier(uniform=False), + weight_attr=paddle.nn.initializer.XavierNormal(), bias_attr=False, num_flatten_dims=2, ), diff --git a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py index d3909193cd6ce..3ee0469b6145d 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_batch_norm_op_xpu.py @@ -369,7 +369,7 @@ def test_global_stats(self): net1 = paddle.nn.BatchNorm( 6, param_attr=fluid.ParamAttr( - initializer=fluid.initializer.Constant(1.0) + initializer=paddle.nn.initializer.Constant(1.0) ), use_global_stats=self.use_global_stats, trainable_statistics=self.trainable_statistics, diff --git a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py index 3518083d75678..1764400403f26 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_fused_resnet_basic_block_op_xpu.py @@ -73,34 +73,34 @@ def Base(self): paddle.disable_static() conv1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) conv3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) bn1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) bn1_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) bn2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) bn2_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) bn3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) bn3_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) self.conv1 = nn.Conv2D( @@ -173,34 +173,34 @@ def FusedResNetBasicBlock(self): paddle.disable_static() fused_conv1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) fused_conv2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) fused_conv3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Xavier(uniform=False), + initializer=paddle.nn.initializer.XavierNormal(), learning_rate=0.001, ) fused_bn1_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) fused_bn1_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) fused_bn2_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) fused_bn2_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) fused_bn3_weight = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=1.0) + initializer=paddle.nn.initializer.Constant(value=1.0) ) fused_bn3_bias = fluid.ParamAttr( - initializer=fluid.initializer.Constant(value=0.0) + initializer=paddle.nn.initializer.Constant(value=0.0) ) if self.has_shortcut: diff --git a/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py b/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py index 2f699ca3c026d..666c29f7fcaa8 100644 --- a/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py +++ b/python/paddle/fluid/tests/unittests/xpu/test_prelu_op_xpu.py @@ -163,7 +163,7 @@ def prelu_t(x, mode, param_attr=None, name=None, data_format='NCHW'): shape=alpha_shape, dtype='float32', is_bias=False, - default_initializer=fluid.initializer.ConstantInitializer(0.25), + default_initializer=paddle.nn.initializer.Constant(0.25), ) out = helper.create_variable_for_type_inference(dtype) helper.append_op( diff --git a/python/paddle/fluid/transpiler/distribute_transpiler.py b/python/paddle/fluid/transpiler/distribute_transpiler.py index 29901363dbeef..32486a8dadd2b 100644 --- a/python/paddle/fluid/transpiler/distribute_transpiler.py +++ b/python/paddle/fluid/transpiler/distribute_transpiler.py @@ -39,7 +39,7 @@ import numpy as np from .ps_dispatcher import RoundRobin, PSDispatcher -from .. import core, framework, unique_name, initializer +from .. import core, framework, unique_name from ..framework import ( Program, default_main_program, @@ -2856,7 +2856,7 @@ def _get_lr_ops(self): dtype=var.dtype, shape=var.shape, persistable=var.persistable, - initializer=initializer.Constant(1), + initializer=paddle.nn.initializer.Constant(1), ) op_role_attr_name = ( core.op_proto_and_checker_maker.kOpRoleAttrName() diff --git a/python/paddle/incubate/asp/asp.py b/python/paddle/incubate/asp/asp.py index df1c81bffe835..7bf04dc151c7f 100644 --- a/python/paddle/incubate/asp/asp.py +++ b/python/paddle/incubate/asp/asp.py @@ -24,7 +24,6 @@ import paddle from paddle.fluid import core, global_scope, program_guard from paddle.fluid.framework import dygraph_only -from paddle.fluid.initializer import ConstantInitializer from paddle.incubate import asp from .supported_layer_list import ( @@ -882,7 +881,9 @@ def _create_mask_variables(cls, main_program, startup_program, params): name=ASPHelper._get_mask_name(param.name), shape=param.shape, dtype=param.dtype, - default_initializer=ConstantInitializer(value=1.0), + default_initializer=paddle.nn.initializer.Constant( + value=1.0 + ), ) mask_param.stop_gradient = True mask_param.trainable = False diff --git a/python/paddle/nn/decode.py b/python/paddle/nn/decode.py index 4ad72077014f0..4ce504d8f8b66 100644 --- a/python/paddle/nn/decode.py +++ b/python/paddle/nn/decode.py @@ -19,8 +19,8 @@ import numpy as np import paddle +from paddle.common_ops_import import default_main_program from paddle.framework import _non_static_mode -from paddle.static import default_main_program from ..fluid.data_feeder import convert_dtype from ..fluid.layers.utils import flatten, map_structure diff --git a/python/paddle/nn/functional/common.py b/python/paddle/nn/functional/common.py index 57a1e0023d4fc..d8777d2c4779d 100644 --- a/python/paddle/nn/functional/common.py +++ b/python/paddle/nn/functional/common.py @@ -16,10 +16,10 @@ import paddle from paddle import _C_ops, _legacy_C_ops +from paddle.common_ops_import import Variable, default_main_program from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers.tensor import fill_constant from paddle.framework import core, in_dynamic_mode -from paddle.static import Variable, default_main_program from paddle.tensor.creation import full from ...fluid.data_feeder import ( diff --git a/python/paddle/nn/functional/conv.py b/python/paddle/nn/functional/conv.py index 74a97e25938ed..82d25747eadb0 100644 --- a/python/paddle/nn/functional/conv.py +++ b/python/paddle/nn/functional/conv.py @@ -22,6 +22,7 @@ from paddle.fluid.framework import _global_flags, in_dygraph_mode from paddle.tensor.math import _add_with_axis +from ...common_ops_import import Variable from ...device import get_cudnn_version from ...fluid.data_feeder import check_dtype, check_variable_and_dtype from ...fluid.layer_helper import LayerHelper @@ -32,7 +33,6 @@ convert_to_list, ) from ...framework import no_grad -from ...static import Variable from ...tensor.manipulation import squeeze, unsqueeze __all__ = [] diff --git a/python/paddle/nn/functional/extension.py b/python/paddle/nn/functional/extension.py index 3b566b3de3044..533bf138a1a49 100644 --- a/python/paddle/nn/functional/extension.py +++ b/python/paddle/nn/functional/extension.py @@ -18,6 +18,7 @@ from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode +from ...common_ops_import import Variable from ...fluid.data_feeder import ( check_dtype, check_type, @@ -26,7 +27,6 @@ from ...fluid.framework import in_dygraph_mode from ...fluid.layer_helper import LayerHelper from ...framework import convert_np_dtype_to_dtype_, core -from ...static import Variable from ...tensor.creation import assign from ...tensor.layer_function_generator import templatedoc diff --git a/python/paddle/nn/functional/input.py b/python/paddle/nn/functional/input.py index 8964b69df2a71..eccaffcb729a8 100644 --- a/python/paddle/nn/functional/input.py +++ b/python/paddle/nn/functional/input.py @@ -14,10 +14,10 @@ from paddle import _C_ops +from ...common_ops_import import Variable from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.framework import in_dygraph_mode from ...fluid.layer_helper import LayerHelper -from ...static import Variable __all__ = [] diff --git a/python/paddle/nn/functional/loss.py b/python/paddle/nn/functional/loss.py index 90697cb63476f..001efd74a6733 100644 --- a/python/paddle/nn/functional/loss.py +++ b/python/paddle/nn/functional/loss.py @@ -20,10 +20,10 @@ from paddle.framework import core from paddle.utils import deprecated +from ...common_ops_import import Variable from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.framework import _current_expected_place, in_dygraph_mode from ...fluid.layer_helper import LayerHelper -from ...static import Variable from ...tensor.manipulation import reshape __all__ = [] diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 4f164e991f328..1178928acc2da 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -15,10 +15,10 @@ from paddle import _C_ops, _legacy_C_ops, in_dynamic_mode from paddle.fluid.framework import in_dygraph_mode +from ...common_ops_import import Variable from ...device import get_cudnn_version, is_compiled_with_rocm from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.layer_helper import LayerHelper -from ...static import Variable __all__ = [] diff --git a/python/paddle/nn/initializer/Bilinear.py b/python/paddle/nn/initializer/Bilinear.py new file mode 100644 index 0000000000000..b3a1766d07ccc --- /dev/null +++ b/python/paddle/nn/initializer/Bilinear.py @@ -0,0 +1,182 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import numpy as np + +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer + +__all__ = [] + + +class Bilinear(Initializer): + """ + This initializer can be used in transposed convolution operator to + act as upsampling. Users can upsample a feature map with shape of + (B, C, H, W) by any integer factor. The usage is: + + Examples: + + .. code-block:: python + + import math + + import paddle + import paddle.nn as nn + from paddle.regularizer import L2Decay + + factor = 2 + C = 2 + B = 8 + H = W = 32 + w_attr = paddle.ParamAttr(learning_rate=0., + regularizer=L2Decay(0.), + initializer=nn.initializer.Bilinear()) + data = paddle.rand([B, 3, H, W], dtype='float32') + conv_up = nn.Conv2DTranspose(3, + out_channels=C, + kernel_size=2 * factor - factor % 2, + padding=int( + math.ceil((factor - 1) / 2.)), + stride=factor, + weight_attr=w_attr, + bias_attr=False) + x = conv_up(data) + + Where, `out_channels=C` and `groups=C` means this is channel-wise transposed + convolution. The filter shape will be (C, 1, K, K) where K is `kernel_size`, + This initializer will set a (K, K) interpolation kernel for every channel + of the filter identically. The resulting shape of the output feature map + will be (B, C, factor * H, factor * W). Note that the learning rate and the + weight decay are set to 0 in order to keep coefficient values of bilinear + interpolation unchanged during training. + + """ + + def __init__(self): + """Constructor for BilinearInitializer.""" + super().__init__() + + def forward(self, var, block=None): + """Initialize the input tensor with Bilinear initialization. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + if not isinstance(var, framework.Variable): + raise ValueError("var must be framework.Variable.") + + if not isinstance(block, framework.Block): + raise ValueError("block must be framework.Block.") + + shape = var.shape + if len(shape) != 4: + raise ValueError("the length of shape must be 4.") + if shape[2] != shape[3]: + raise ValueError("shape[2] must be equal to shape[3].") + + weight = np.zeros(np.prod(var.shape), dtype='float32') + size = shape[3] + # factor + f = np.ceil(size / 2.0) + # center + c = (2 * f - 1 - f % 2) / (2.0 * f) + for i in range(np.prod(shape)): + x = i % size + y = (i / size) % size + weight[i] = (1 - abs(x / f - c)) * (1 - abs(y / f - c)) + weight = np.reshape(weight, shape) + + # to be compatible of fp16 initalizers + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + core.VarDesc.VarType.FP64, + ]: + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['bilinear_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if out_dtype == core.VarDesc.VarType.FP32: + value_name = "fp32_values" + values = [float(v) for v in weight.flat] + else: + raise TypeError("Unsupported dtype %s", var.dtype) + + if np.prod(shape) > 1024 * 1024: + raise ValueError("The size of input is too big. ") + + if in_dygraph_mode(): + _C_ops.assign_value_( + out_var, + list(shape), + out_dtype, + values, + _current_expected_place(), + ) + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + core.VarDesc.VarType.FP64, + ]: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + op = block.append_op( + type='assign_value', + outputs={'Out': [out_var]}, + attrs={ + 'dtype': out_dtype, + 'shape': list(shape), + value_name: values, + }, + ) + + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + core.VarDesc.VarType.FP64, + ]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op diff --git a/python/paddle/nn/initializer/__init__.py b/python/paddle/nn/initializer/__init__.py index e078e19ed2b4d..6ef516c8b6af5 100644 --- a/python/paddle/nn/initializer/__init__.py +++ b/python/paddle/nn/initializer/__init__.py @@ -13,9 +13,9 @@ # limitations under the License. # TODO: define the initializers to create a Parameter in neural network -from ...fluid.initializer import Bilinear # noqa: F401 from ...fluid.initializer import set_global_initializer # noqa: F401 -from ...fluid.initializer import calculate_gain # noqa: F401 + +from .Bilinear import Bilinear # noqa: F401 from .constant import Constant # noqa: F401 @@ -36,6 +36,15 @@ from .dirac import Dirac # noqa: F401 +from .initializer import Initializer, calculate_gain # noqa: F401 +from .uniform import UniformInitializer # noqa: F401 +from .constant import ConstantInitializer # noqa: F401 +from .normal import NormalInitializer # noqa: F401 +from .normal import TruncatedNormalInitializer # noqa: F401 +from .xavier import XavierInitializer # noqa: F401 +from .kaiming import MSRAInitializer # noqa: F401 +from .assign import NumpyArrayInitializer # noqa: F401 + __all__ = [ # noqa 'Bilinear', 'Constant', diff --git a/python/paddle/nn/initializer/assign.py b/python/paddle/nn/initializer/assign.py index 052da37af244e..3ab5a896e463a 100644 --- a/python/paddle/nn/initializer/assign.py +++ b/python/paddle/nn/initializer/assign.py @@ -12,20 +12,134 @@ # See the License for the specific language governing permissions and # limitations under the License. import paddle +from paddle import _C_ops +from ...fluid import core, framework, unique_name from ...fluid.data_feeder import check_type -from ...fluid.initializer import NumpyArrayInitializer +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class NumpyArrayInitializer(Initializer): + """Init an parameter with an numpy array + This api initialize the tensor by numpy array. + + Args: + value (numpy): numpy array to initialize the tensor + + Returns: + A Tensor initialized by numpy. + + """ + + def __init__(self, value): + import numpy + + assert isinstance(value, numpy.ndarray) + super().__init__() + self._value = value + + def forward(self, var, block=None): + """Initialize the input tensor with Numpy array. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + + # to be compatible of fp16 initalizers + if var.dtype in [core.VarDesc.VarType.FP16, core.VarDesc.VarType.BF16]: + out_dtype = core.VarDesc.VarType.FP32 + np_value = self._value.astype("float32") + out_var = block.create_var( + name=unique_name.generate( + ".".join(['numpy_array_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_var = var + out_dtype = var.dtype + np_value = self._value + + if out_dtype == core.VarDesc.VarType.FP32: + value_name = "fp32_values" + values = [float(v) for v in np_value.flat] + elif out_dtype == core.VarDesc.VarType.INT32: + value_name = "int32_values" + values = [int(v) for v in np_value.flat] + else: + raise ValueError("Unsupported dtype %s", self._value.dtype) + if self._value.size > 1024 * 1024 * 1024: + raise ValueError( + "The size of input is too big. Please consider " + "saving it to file and 'load_op' to load it" + ) + + if in_dygraph_mode(): + _C_ops.assign_value_( + out_var, + list(self._value.shape), + out_dtype, + values, + _current_expected_place(), + ) + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + op = block.append_op( + type='assign_value', + outputs={'Out': out_var}, + attrs={ + 'dtype': out_dtype, + 'shape': list(self._value.shape), + value_name: values, + }, + stop_gradient=True, + ) + + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class Assign(NumpyArrayInitializer): """Init an parameter with a numpy array, list, or tensor. Args: value (Tensor|numpy.ndarray|list|tuple): numpy array, list, tuple, or tensor to initialize the parameter. - name(str, optional): The default value is None. Normally there is no need for user to set this - property. For more information, please refer to :ref:`api_guide_Name`. + name(str, optional): Normally there is no need for user to set this + property. For more information, please refer to :ref:`api_guide_Name`. Default is None. Returns: A parameter initialized by the input numpy array, list, or tensor. diff --git a/python/paddle/nn/initializer/constant.py b/python/paddle/nn/initializer/constant.py index 637ae6299005c..0016467f117b0 100644 --- a/python/paddle/nn/initializer/constant.py +++ b/python/paddle/nn/initializer/constant.py @@ -12,12 +12,75 @@ # See the License for the specific language governing permissions and # limitations under the License. +from paddle import _C_ops + +from ...fluid import core, framework +from ...fluid.framework import _current_expected_place, in_dygraph_mode + # TODO: define the initializers of Constant in neural network -from ...fluid.initializer import ConstantInitializer +from .initializer import Initializer __all__ = [] +class ConstantInitializer(Initializer): + """Implements the constant initializer + + Args: + value (float32, optional): constant value to initialize the variable. Default: 0.0. + + """ + + def __init__(self, value=0.0, force_cpu=False): + assert value is not None + super().__init__() + self._value = value + self._force_cpu = force_cpu + + def forward(self, var, block=None): + """Initialize the input tensor with constant. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) or isinstance( + var, framework.EagerParamBase + ) + assert isinstance(block, framework.Block) + + if in_dygraph_mode(): + place = _current_expected_place() + if self._force_cpu: + place = core.CPUPlace() + _C_ops.full_( + var, var.shape, str(float(self._value)), var.dtype, place + ) + return None + else: + op = block.append_op( + type="fill_constant", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "dtype": int(var.dtype), + "value": float(self._value), + 'str_value': str(float(self._value)), + 'force_cpu': self._force_cpu, + }, + stop_gradient=True, + ) + + var.op = op + return op + + class Constant(ConstantInitializer): """Implement the constant initializer. diff --git a/python/paddle/nn/initializer/dirac.py b/python/paddle/nn/initializer/dirac.py index 0917859415d36..3abcc300bc64e 100644 --- a/python/paddle/nn/initializer/dirac.py +++ b/python/paddle/nn/initializer/dirac.py @@ -20,7 +20,7 @@ from ...fluid.core import VarDesc from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.framework import _current_expected_place -from ...fluid.initializer import Initializer +from .initializer import Initializer __all__ = [] diff --git a/python/paddle/nn/initializer/initializer.py b/python/paddle/nn/initializer/initializer.py new file mode 100644 index 0000000000000..c320fa68cd114 --- /dev/null +++ b/python/paddle/nn/initializer/initializer.py @@ -0,0 +1,159 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import functools +import math + +import numpy as np + +from ...fluid.framework import default_main_program, in_dygraph_mode +from ...fluid.lazy_init import lazy_init_helper + +__all__ = [] + + +class Initializer: + """Base class for parameter initializers + + Defines the common interface of parameter initializers. + They add operations to the init program that are used + to initialize parameter. Users should not use this class + directly, but need to use one of its implementations. + """ + + def __init__(self): + pass + + def __call__(self, param, block=None): + if not lazy_init_helper().state: + return self.forward(param, block) + + return self._lazy_init(param, block) + + def forward(self, param, block=None): + """Add corresponding initialization operations to the network""" + raise NotImplementedError() + + def _lazy_init(self, param, block=None): + """ + Apply lazy initialization + """ + assert in_dygraph_mode() + + def init_op_creator(forward, param, block): + new_var = param._to_static_var(True, block=block) + # Record initializer operator + with lazy_init_helper(): + forward(new_var, block) + + # Add hook function for initializing param in dygraph mode + param.set_init_func(functools.partial(self.forward, param, block)) + param._init_op_creator = functools.partial( + init_op_creator, self.forward, param + ) + + return param + + def _check_block(self, block): + if block is None: + block = default_main_program().global_block() + + return block + + def _compute_fans(self, var): + """Compute the fan_in and the fan_out for layers + + This method computes the fan_in and the fan_out + for neural network layers, if not specified. It is + not possible to perfectly estimate fan_in and fan_out. + This method will estimate it correctly for matrix multiply and + convolutions. + + Args: + var: variable for which fan_in and fan_out have to be computed + + Returns: + tuple of two integers (fan_in, fan_out) + """ + shape = var.shape + if not shape or len(shape) == 0: + fan_in = fan_out = 1 + elif len(shape) == 1: + fan_in = fan_out = shape[0] + elif len(shape) == 2: + # This is the case for simple matrix multiply + fan_in = shape[0] + fan_out = shape[1] + else: + # Assume this to be a convolutional kernel + # In PaddlePaddle, the shape of the kernel is like: + # [num_filters, num_filter_channels, ...] where the remaining + # dimensions are the filter_size + receptive_field_size = np.prod(shape[2:]) + fan_in = shape[1] * receptive_field_size + fan_out = shape[0] * receptive_field_size + + return (fan_in, fan_out) + + +def calculate_gain(nonlinearity, param=None): + """ + Get the recommended ``gain`` value of some nonlinearity function. ``gain`` value can be used in some + ``paddle.nn.initializer`` api to adjust the initialization value. + + Args: + nonlinearity(str): name of nonlinearity activation function. If it is a linear function, such as: + `linear/conv1d/conv2d/conv3d/conv1d_transpose/conv2d_transpose/conv3d_transpose` , 1.0 will be returned. + param(bool|int|float, optional): optional parameter for somme nonlinearity function. Now, it only applies to + 'leaky_relu'. Default: None, it will be calculated as 0.01 in the formula. + + Returns: + A float value, which is the recommended gain for this nonlinearity function. + + Examples: + .. code-block:: python + + import paddle + gain = paddle.nn.initializer.calculate_gain('tanh') # 5.0 / 3 + gain = paddle.nn.initializer.calculate_gain('leaky_relu', param=1.0) # 1.0 = math.sqrt(2.0 / (1+param^2)) + initializer = paddle.nn.initializer.Orthogonal(gain) + + """ + if param is None: + param = 0.01 + else: + assert isinstance(param, (bool, int, float)) + param = float(param) + recommended_gain = { + 'sigmoid': 1, + 'linear': 1, + 'conv1d': 1, + 'conv2d': 1, + 'conv3d': 1, + 'conv1d_transpose': 1, + 'conv2d_transpose': 1, + 'conv3d_transpose': 1, + 'tanh': 5.0 / 3, + 'relu': math.sqrt(2.0), + 'leaky_relu': math.sqrt(2.0 / (1 + param**2)), + 'selu': 3.0 / 4, + } + if nonlinearity in recommended_gain.keys(): + return recommended_gain[nonlinearity] + else: + raise ValueError( + "nonlinearity function {} is not suppported now.".format( + nonlinearity + ) + ) diff --git a/python/paddle/nn/initializer/kaiming.py b/python/paddle/nn/initializer/kaiming.py index f214e46fa4b2b..c3a8732315db3 100644 --- a/python/paddle/nn/initializer/kaiming.py +++ b/python/paddle/nn/initializer/kaiming.py @@ -13,11 +13,185 @@ # limitations under the License. # TODO: define the initializers of Kaiming functions in neural network -from ...fluid.initializer import MSRAInitializer +import math + +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer, calculate_gain __all__ = [] +class MSRAInitializer(Initializer): + r"""Implements the MSRA initializer a.k.a. Kaiming Initializer + + This class implements the weight initialization from the paper + `Delving Deep into Rectifiers: Surpassing Human-Level Performance on + ImageNet Classification `_ + by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a + robust initialization method that particularly considers the rectifier + nonlinearities. In case of Uniform distribution, the range is [-x, x], where + + .. math:: + + x = gain \times \sqrt{\frac{3}{fan\_in}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \frac{gain}{\sqrt{{fan\_in}}} + + Args: + uniform (bool, optional): whether to use uniform or normal distribution. Default is True. + fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None. + seed (int32, optional): random seed. Default is 0. + negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0. + nonlinearity(str, optional): the non-linear function. Default is relu. + + Note: + It is recommended to set fan_in to None for most cases. + + """ + + def __init__( + self, + uniform=True, + fan_in=None, + seed=0, + negative_slope=0, + nonlinearity='relu', + ): + """Constructor for MSRAInitializer""" + assert uniform is not None + assert seed is not None + super().__init__() + self._uniform = uniform + self._fan_in = fan_in + self._seed = seed + self._negative_slope = negative_slope + self._nonlinearity = nonlinearity + + def forward(self, var, block=None): + """Initialize the input tensor with MSRA initialization. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + f_in, f_out = self._compute_fans(var) + + # If fan_in is passed, use it + fan_in = f_in if self._fan_in is None else self._fan_in + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['masra_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + if self._uniform: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + limit = gain * math.sqrt(3.0 / float(fan_in)) + out_var = _C_ops.uniform( + var.shape, + out_dtype, + -limit, + limit, + self._seed, + _current_expected_place(), + ) + else: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + std = gain / math.sqrt(float(fan_in)) + place = _current_expected_place() + out_var = _C_ops.gaussian( + out_var.shape, 0.0, std, self._seed, out_dtype, place + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + if self._uniform: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + limit = gain * math.sqrt(3.0 / float(fan_in)) + op = block.append_op( + type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": int(out_dtype), + "min": -limit, + "max": limit, + "seed": self._seed, + }, + stop_gradient=True, + ) + + else: + gain = calculate_gain(self._nonlinearity, self._negative_slope) + std = gain / math.sqrt(float(fan_in)) + op = block.append_op( + type="gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": int(out_dtype), + "mean": 0.0, + "std": std, + "seed": self._seed, + }, + stop_gradient=True, + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class KaimingNormal(MSRAInitializer): r"""Implements the Kaiming Normal initializer @@ -36,9 +210,9 @@ class KaimingNormal(MSRAInitializer): \frac{gain}{\sqrt{{fan\_in}}} Args: - fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. default is None. - negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. - nonlinearity(str, optional): the non-linear function. default is relu. + fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None. + negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0. + nonlinearity(str, optional): the non-linear function. Default is relu. Note: It is recommended to set fan_in to None for most cases. @@ -84,9 +258,9 @@ class KaimingUniform(MSRAInitializer): x = gain \times \sqrt{\frac{3}{fan\_in}} Args: - fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. default is None. - negative_slope (float, optional): negative_slope (only used with leaky_relu). default is 0.0. - nonlinearity(str, optional): the non-linear function. default is relu. + fan_in (float32|None, optional): fan_in (in_features) of trainable Tensor, If None, it will be infered automaticly. If you don't want to use in_features of the Tensor, you can set the value of 'fan_in' smartly by yourself. Default is None. + negative_slope (float, optional): negative_slope (only used with leaky_relu). Default is 0.0. + nonlinearity(str, optional): the non-linear function. Default is relu. Note: It is recommended to set fan_in to None for most cases. diff --git a/python/paddle/nn/initializer/normal.py b/python/paddle/nn/initializer/normal.py index 5ead30f4f1e3e..030ec95940db6 100644 --- a/python/paddle/nn/initializer/normal.py +++ b/python/paddle/nn/initializer/normal.py @@ -12,19 +12,99 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.initializer import NormalInitializer, TruncatedNormalInitializer +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class NormalInitializer(Initializer): + """Implements the Random Normal(Gaussian) distribution initializer + + Args: + loc (float, optional): mean of the normal distribution. Default is 0.0. + scale (float, optional): standard deviation of the normal distribution. Default is 1.0. + seed (int, optional): random seed. Default is 0. + + """ + + def __init__(self, loc=0.0, scale=1.0, seed=0): + assert loc is not None + assert scale is not None + assert seed is not None + super().__init__() + self._mean = loc + self._std_dev = scale + self._seed = seed + + def forward(self, var, block=None): + """Initialize the input tensor with Normal distribution. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(block, framework.Block) + + check_variable_and_dtype( + var, + "Out", + ["uint16", "float16", "float32", "float64"], + "guassian_random", + ) + + if self._seed == 0: + self._seed = block.program.random_seed + + if in_dygraph_mode(): + place = _current_expected_place() + out_var = _C_ops.gaussian( + var.shape, + self._mean, + self._std_dev, + self._seed, + var.dtype, + place, + ) + out_var._share_underline_tensor_to(var) + return None + + else: + op = block.append_op( + type="gaussian_random", + outputs={"Out": var}, + attrs={ + "shape": var.shape, + "dtype": var.dtype, + "mean": self._mean, + "std": self._std_dev, + "seed": self._seed, + "use_mkldnn": False, + }, + stop_gradient=True, + ) + var.op = op + return op + + class Normal(NormalInitializer): """The Random Normal (Gaussian) distribution initializer. Args: - mean (float, optional): mean of the normal distribution. The default value is 0.0. - std (float, optional): standard deviation of the normal distribution. The default value is 1.0. + mean (float, optional): mean of the normal distribution. Default is 0.0. + std (float, optional): standard deviation of the normal distribution. Default is 1.0. name(str, optional): The default value is None. Normally there is no need for user to set this - property. For more information, please refer to :ref:`api_guide_Name`. + property. For more information, please refer to :ref:`api_guide_Name`. Default: None. Returns: A parameter initialized by Random Normal (Gaussian) distribution. @@ -58,12 +138,113 @@ def __init__(self, mean=0.0, std=1.0, name=None): super().__init__(loc=mean, scale=std, seed=0) +class TruncatedNormalInitializer(Initializer): + """Implements the Random TruncatedNormal(Gaussian) distribution initializer + + Args: + loc (float, optional): Mean of the normal distribution. Default is :math:`0.0`. + scale (float, optional): Standard deviation of the normal distribution. Default is :math:`1.0`. + seed (int, optional): random seed. Default is 0. + + """ + + def __init__(self, loc=0.0, scale=1.0, seed=0): + assert loc is not None + assert scale is not None + assert seed is not None + super().__init__() + self._mean = loc + self._std_dev = scale + self._seed = seed + + def forward(self, var, block=None): + """Initialize the input tensor with TruncatedNormal distribution. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(var, framework.Variable) + assert isinstance(block, framework.Block) + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype in [core.VarDesc.VarType.FP16, core.VarDesc.VarType.BF16]: + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['truncated_gaussian_random', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + out_var = _C_ops.truncated_gaussian_random( + var.shape, + self._mean, + self._std_dev, + self._seed, + out_dtype, + _current_expected_place(), + ) + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + + else: + op = block.append_op( + type="truncated_gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": var.shape, + "dtype": out_dtype, + "mean": self._mean, + "std": self._std_dev, + "seed": self._seed, + }, + stop_gradient=True, + ) + + if var.dtype in [ + core.VarDesc.VarType.FP16, + core.VarDesc.VarType.BF16, + ]: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + var.op = op + return op + + class TruncatedNormal(TruncatedNormalInitializer): """The truncated normal distribution (Gaussian distribution) initializer. Args: - mean (float, optional): Mean of the normal distribution. The default value is :math:`0.0`. - std (float, optional): Standard deviation of the normal distribution. The default value is :math:`1.0`. + mean (float, optional): Mean of the normal distribution. Default is :math:`0.0`. + std (float, optional): Standard deviation of the normal distribution. Default is :math:`1.0`. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: diff --git a/python/paddle/nn/initializer/orthogonal.py b/python/paddle/nn/initializer/orthogonal.py index 0bbfd9eaaaa86..65a496f2b1069 100644 --- a/python/paddle/nn/initializer/orthogonal.py +++ b/python/paddle/nn/initializer/orthogonal.py @@ -18,7 +18,7 @@ from ...fluid import framework from ...fluid.data_feeder import check_variable_and_dtype from ...fluid.dygraph import no_grad -from ...fluid.initializer import Initializer +from .initializer import Initializer __all__ = [] diff --git a/python/paddle/nn/initializer/uniform.py b/python/paddle/nn/initializer/uniform.py index 011cb6eff6dfa..cd64a15b7519e 100644 --- a/python/paddle/nn/initializer/uniform.py +++ b/python/paddle/nn/initializer/uniform.py @@ -12,17 +12,144 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.initializer import UniformInitializer +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class UniformInitializer(Initializer): + """Implements the random uniform distribution initializer + + Args: + low (float, optional): Lower boundary of the uniform distribution. Default is :math:`-1.0`. + high (float, optional): Upper boundary of the uniform distribution. Default is :math:`1.0`. + seed (int, optional): Random seed. Default is 0. + diag_num (int, optional): the number of diagonal elements to initialize. + If set to 0, diagonal initialization will be not performed. Default is 0. + diag_step (int, optional): Step size between two diagonal elements, + which is generally the width of the square matrix. Default is 0. + diag_val (float, optional): the value of the diagonal element to be initialized, + default 1.0. It takes effect only if the diag_num is greater than 0. Default is :math:`1.0`. + + """ + + def __init__( + self, low=-1.0, high=1.0, seed=0, diag_num=0, diag_step=0, diag_val=1.0 + ): + assert low is not None + assert high is not None + assert high >= low + assert seed is not None + assert diag_num is not None + assert diag_step is not None + assert diag_val is not None + if diag_num > 0 or diag_step > 0: + assert diag_num > 0 and diag_step > 0 + super().__init__() + self._low = low + self._high = high + self._seed = seed + self._diag_num = diag_num + self._diag_step = diag_step + self._diag_val = diag_val + + def forward(self, var, block=None): + """Initialize the input tensor with Uniform distribution. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(block, framework.Block) + if not in_dygraph_mode(): + check_variable_and_dtype( + var, + "Out", + ["uint16", "float16", "float32", "float64"], + "uniform_random", + ) + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initializers + if var.dtype == core.VarDesc.VarType.FP16: + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['uniform_random', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + out_var = _C_ops.uniform( + var.shape, + out_dtype, + self._low, + self._high, + self._seed, + _current_expected_place(), + ) + if var.dtype == core.VarDesc.VarType.FP16: + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + op = block.append_op( + type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": var.shape, + "dtype": out_dtype, + "min": self._low, + "max": self._high, + "seed": self._seed, + "diag_num": self._diag_num, + "diag_step": self._diag_step, + "diag_val": self._diag_val, + }, + stop_gradient=True, + ) + + if var.dtype == core.VarDesc.VarType.FP16: + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class Uniform(UniformInitializer): """The uniform distribution initializer. Args: - low (float, optional): Lower boundary of the uniform distribution. The default value is :math:`-1.0`. - high (float, optional): Upper boundary of the uniform distribution. The default value is :math:`1.0`. + low (float, optional): Lower boundary of the uniform distribution. Default is :math:`-1.0`. + high (float, optional): Upper boundary of the uniform distribution. Default is :math:`1.0`. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: diff --git a/python/paddle/nn/initializer/xavier.py b/python/paddle/nn/initializer/xavier.py index 35e104edba111..6d17c029f587c 100644 --- a/python/paddle/nn/initializer/xavier.py +++ b/python/paddle/nn/initializer/xavier.py @@ -12,11 +12,183 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.initializer import XavierInitializer +import math + +from paddle import _C_ops + +from ...fluid import core, framework, unique_name +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.framework import _current_expected_place, in_dygraph_mode +from .initializer import Initializer __all__ = [] +class XavierInitializer(Initializer): + r""" + This class implements the Xavier weight initializer from the paper + `Understanding the difficulty of training deep feedforward neural + networks `_ + by Xavier Glorot and Yoshua Bengio. + + This initializer is designed to keep the scale of the gradients + approximately same in all the layers. In case of Uniform distribution, + the range is [-x, x], where + + .. math:: + + x = \sqrt{\\frac{6.0}{fan\_in + fan\_out}} + + In case of Normal distribution, the mean is 0 and the standard deviation + is + + .. math:: + + \sqrt{\\frac{2.0}{fan\_in + fan\_out}} + + + Args: + uniform (bool, optional): whether to use uniform ,if False use normal distribution. Default is True. + fan_in (float, optional): fan_in for Xavier initialization. If None, it is + inferred from the variable. Default is None. + fan_out (float, optional): fan_out for Xavier initialization. If None, it is + inferred from the variable. Default is None. + seed (int, optional): Random seed. Default is 0. + + Note: + It is recommended to set fan_in and fan_out to None for most cases. + + """ + + def __init__(self, uniform=True, fan_in=None, fan_out=None, seed=0): + assert uniform is not None + assert seed is not None + super().__init__() + self._uniform = uniform + self._fan_in = fan_in + self._fan_out = fan_out + self._seed = seed + + def forward(self, var, block=None): + """Initialize the input tensor with Xavier initialization. + + Args: + var(Tensor): Tensor that needs to be initialized. + block(Block, optional): The block in which initialization ops + should be added. Used in static graph only, default None. + + Returns: + The initialization op + """ + block = self._check_block(block) + + assert isinstance(block, framework.Block) + check_variable_and_dtype( + var, + "Out", + ["uint16", "float16", "float32", "float64"], + "xavier_init", + ) + + f_in, f_out = self._compute_fans(var) + + # If fan_in and fan_out are passed, use them + fan_in = f_in if self._fan_in is None else self._fan_in + fan_out = f_out if self._fan_out is None else self._fan_out + + if self._seed == 0: + self._seed = block.program.random_seed + + # to be compatible of fp16 initalizers + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + out_dtype = core.VarDesc.VarType.FP32 + out_var = block.create_var( + name=unique_name.generate( + ".".join(['xavier_init', var.name, 'tmp']) + ), + shape=var.shape, + dtype=out_dtype, + type=core.VarDesc.VarType.LOD_TENSOR, + persistable=False, + ) + else: + out_dtype = var.dtype + out_var = var + + if in_dygraph_mode(): + if self._uniform: + limit = math.sqrt(6.0 / float(fan_in + fan_out)) + out_var = _C_ops.uniform( + out_var.shape, + out_dtype, + -limit, + limit, + self._seed, + _current_expected_place(), + ) + else: + std = math.sqrt(2.0 / float(fan_in + fan_out)) + + place = _current_expected_place() + out_var = _C_ops.gaussian( + out_var.shape, 0.0, std, self._seed, out_dtype, place + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + var_tmp = _C_ops.cast(out_var, var.dtype) + var_tmp._share_underline_tensor_to(var) + else: + out_var._share_underline_tensor_to(var) + return None + else: + if self._uniform: + limit = math.sqrt(6.0 / float(fan_in + fan_out)) + op = block.append_op( + type="uniform_random", + inputs={}, + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": out_dtype, + "min": -limit, + "max": limit, + "seed": self._seed, + }, + stop_gradient=True, + ) + else: + std = math.sqrt(2.0 / float(fan_in + fan_out)) + op = block.append_op( + type="gaussian_random", + outputs={"Out": out_var}, + attrs={ + "shape": out_var.shape, + "dtype": out_var.dtype, + "mean": 0.0, + "std": std, + "seed": self._seed, + }, + stop_gradient=True, + ) + + if var.dtype == core.VarDesc.VarType.FP16 or ( + var.dtype == core.VarDesc.VarType.BF16 and not self._uniform + ): + block.append_op( + type="cast", + inputs={"X": out_var}, + outputs={"Out": var}, + attrs={"in_dtype": out_var.dtype, "out_dtype": var.dtype}, + ) + + var.op = op + return op + + class XavierNormal(XavierInitializer): r""" This class implements the Xavier weight initializer from the paper @@ -31,9 +203,9 @@ class XavierNormal(XavierInitializer): Args: fan_in (float, optional): fan_in for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. fan_out (float, optional): fan_out for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: @@ -83,9 +255,9 @@ class XavierUniform(XavierInitializer): Args: fan_in (float, optional): fan_in for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. fan_out (float, optional): fan_out for Xavier initialization, which is - inferred from the Tensor. The default value is None. + inferred from the Tensor. Default is None. name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. Returns: diff --git a/python/paddle/nn/layer/rnn.py b/python/paddle/nn/layer/rnn.py index 2617c76ae6e79..4bf31ca30ea28 100644 --- a/python/paddle/nn/layer/rnn.py +++ b/python/paddle/nn/layer/rnn.py @@ -20,15 +20,20 @@ import paddle from paddle import _C_ops, _legacy_C_ops, framework, in_dynamic_mode +from paddle.common_ops_import import Variable from paddle.fluid.data_feeder import check_type, check_variable_and_dtype -from paddle.fluid.framework import _non_static_mode, in_dygraph_mode +from paddle.fluid.framework import ( + _non_static_mode, + default_startup_program, + in_dygraph_mode, + program_guard, +) from paddle.fluid.layers import control_flow, sequence_lod, utils from paddle.fluid.layers.utils import flatten, map_structure from paddle.framework import core from paddle.nn import Layer from paddle.nn import functional as F from paddle.nn import initializer as I -from paddle.static import Variable, default_startup_program, program_guard from paddle.tensor.manipulation import tensor_array_to_tensor from .container import LayerList diff --git a/python/paddle/optimizer/optimizer.py b/python/paddle/optimizer/optimizer.py index cad226952be41..d9e1cd456042c 100644 --- a/python/paddle/optimizer/optimizer.py +++ b/python/paddle/optimizer/optimizer.py @@ -34,7 +34,6 @@ from ..fluid import framework, unique_name from ..fluid.backward import _get_no_grad_set_name, append_backward from ..fluid.framework import Parameter, program_guard -from ..fluid.initializer import Constant from ..fluid.layer_helper import LayerHelper from .lr import LRScheduler @@ -453,7 +452,8 @@ def _create_global_learning_rate(self): lr_value = float(self._learning_rate()) self.helper.set_variable_initializer( - lr_var, initializer=Constant(value=lr_value) + lr_var, + initializer=paddle.nn.initializer.Constant(value=lr_value), ) elif isinstance(self._learning_rate, float): # only create global lr_var once @@ -726,7 +726,10 @@ def _add_accumulator( else: with device_guard(device): self.helper.set_variable_initializer( - var, initializer=Constant(value=float(fill_value)) + var, + initializer=paddle.nn.initializer.Constant( + value=float(fill_value) + ), ) if framework._non_static_mode(): diff --git a/python/paddle/static/nn/common.py b/python/paddle/static/nn/common.py index 1581f299214df..ef49b5642a37c 100644 --- a/python/paddle/static/nn/common.py +++ b/python/paddle/static/nn/common.py @@ -28,9 +28,9 @@ from paddle.fluid import core from paddle.fluid.data_feeder import check_dtype from paddle.fluid.framework import Variable, _non_static_mode, static_only -from paddle.fluid.initializer import Constant, Normal from paddle.fluid.layers.layer_function_generator import templatedoc from paddle.fluid.param_attr import ParamAttr +from paddle.nn.initializer import Constant, Normal __all__ = [] @@ -1012,7 +1012,7 @@ def _get_default_param_initializer(): "filter size.".format(filter_elem_num) ) std = (2.0 / filter_elem_num) ** 0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) filter_param = helper.create_parameter( attr=helper.param_attr, @@ -1315,7 +1315,7 @@ def _get_default_param_initializer(): ) std = (2.0 / filter_elem_num) ** 0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) filter_param = helper.create_parameter( attr=helper.param_attr, @@ -2286,7 +2286,7 @@ def _get_default_param_initializer(): "filter size.".format(filter_elem_num) ) std = (2.0 / filter_elem_num) ** 0.5 - return paddle.nn.initializer.normal.NormalInitializer(0.0, std, 0) + return paddle.nn.initializer.normal.Normal(0.0, std) filter_param = helper.create_parameter( attr=helper.param_attr, @@ -2757,7 +2757,7 @@ def batch_norm( attr=helper.param_attr, shape=param_shape, dtype=dtype, - default_initializer=paddle.fluid.initializer.Constant(1.0), + default_initializer=paddle.nn.initializer.Constant(1.0), ) bias = helper.create_parameter( attr=helper.bias_attr, shape=param_shape, dtype=dtype, is_bias=True @@ -2766,7 +2766,7 @@ def batch_norm( mean = helper.create_parameter( attr=paddle.ParamAttr( name=moving_mean_name, - initializer=paddle.fluid.initializer.Constant(0.0), + initializer=paddle.nn.initializer.Constant(0.0), trainable=False, do_model_average=do_model_average_for_mean_and_var, ), @@ -2778,7 +2778,7 @@ def batch_norm( variance = helper.create_parameter( attr=paddle.ParamAttr( name=moving_variance_name, - initializer=paddle.fluid.initializer.Constant(1.0), + initializer=paddle.nn.initializer.Constant(1.0), trainable=False, do_model_average=do_model_average_for_mean_and_var, ), diff --git a/python/paddle/static/nn/loss.py b/python/paddle/static/nn/loss.py index 20c7641e2d9de..3f464928c289d 100644 --- a/python/paddle/static/nn/loss.py +++ b/python/paddle/static/nn/loss.py @@ -16,12 +16,12 @@ import numpy as np from paddle.fluid.framework import static_only -from paddle.fluid.initializer import NumpyArrayInitializer # TODO: define loss functions of neural network from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers.layer_function_generator import templatedoc from paddle.fluid.param_attr import ParamAttr +from paddle.nn.initializer import Assign from ...fluid.data_feeder import check_variable_and_dtype @@ -209,7 +209,7 @@ def _init_by_numpy_array(numpy_array): attr=ParamAttr(), shape=numpy_array.shape, dtype=numpy_array.dtype, - default_initializer=NumpyArrayInitializer(numpy_array), + default_initializer=Assign(numpy_array), ) ret.stop_gradient = True return ret diff --git a/python/paddle/static/nn/metric.py b/python/paddle/static/nn/metric.py index 7406525b9df0a..bcb3cfc130fcd 100644 --- a/python/paddle/static/nn/metric.py +++ b/python/paddle/static/nn/metric.py @@ -18,9 +18,9 @@ from paddle import _legacy_C_ops from paddle.fluid.data_feeder import check_variable_and_dtype from paddle.fluid.framework import Variable, _non_static_mode, _varbase_creator -from paddle.fluid.initializer import Constant from paddle.fluid.layer_helper import LayerHelper from paddle.fluid.layers import tensor +from paddle.nn.initializer import ConstantInitializer __all__ = [] @@ -266,7 +266,8 @@ def auc( for var in [batch_stat_pos, batch_stat_neg, stat_pos, stat_neg]: helper.set_variable_initializer( - var, Constant(value=0.0, force_cpu=False) + var, + ConstantInitializer(value=0.0, force_cpu=False), ) # "InsTagWeight": [ins_tag_weight] diff --git a/python/paddle/tensor/array.py b/python/paddle/tensor/array.py index 70b606c3c6fbe..84fc94b5eec85 100644 --- a/python/paddle/tensor/array.py +++ b/python/paddle/tensor/array.py @@ -14,9 +14,9 @@ # Define functions about array. +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..framework import LayerHelper, core, in_dygraph_mode -from ..static import Variable __all__ = [] diff --git a/python/paddle/tensor/attribute.py b/python/paddle/tensor/attribute.py index 37a1aaf3c86d9..c79c9553c2f08 100644 --- a/python/paddle/tensor/attribute.py +++ b/python/paddle/tensor/attribute.py @@ -19,10 +19,10 @@ import paddle from paddle import _C_ops +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.framework import in_dygraph_mode from ..framework import LayerHelper, core -from ..static import Variable from .creation import _complex_to_real_dtype, assign __all__ = [] diff --git a/python/paddle/tensor/creation.py b/python/paddle/tensor/creation.py index 7523845c2b8b2..808e4d86d6032 100644 --- a/python/paddle/tensor/creation.py +++ b/python/paddle/tensor/creation.py @@ -35,7 +35,6 @@ _in_eager_without_dygraph_check, device_guard, ) -from ..fluid.initializer import Constant, Initializer from ..fluid.layers import utils from ..fluid.param_attr import ParamAttr from ..framework import ( @@ -140,7 +139,10 @@ def create_global_var( stop_gradient=True, ) helper.set_variable_initializer( - var, initializer=Constant(value=float(value), force_cpu=force_cpu) + var, + initializer=paddle.nn.initializer.ConstantInitializer( + value=float(value), force_cpu=force_cpu + ), ) return var @@ -214,7 +216,7 @@ def create_parameter( check_type( default_initializer, 'default_initializer', - (type(None), Initializer), + (type(None), paddle.nn.initializer.Initializer), 'create_parameter', ) diff --git a/python/paddle/tensor/layer_function_generator.py b/python/paddle/tensor/layer_function_generator.py index 299e41d2aea94..6d9c5fe288057 100644 --- a/python/paddle/tensor/layer_function_generator.py +++ b/python/paddle/tensor/layer_function_generator.py @@ -19,6 +19,7 @@ from paddle import _C_ops, _legacy_C_ops +from ..common_ops_import import Variable from ..fluid.data_feeder import check_variable_and_dtype from ..fluid.proto import framework_pb2 from ..framework import ( @@ -28,7 +29,6 @@ core, in_dygraph_mode, ) -from ..static import Variable __all__ = [] diff --git a/python/paddle/tensor/linalg.py b/python/paddle/tensor/linalg.py index 10c8c24a78724..c59202977fde9 100644 --- a/python/paddle/tensor/linalg.py +++ b/python/paddle/tensor/linalg.py @@ -18,13 +18,13 @@ from paddle import _C_ops from paddle.common_ops_import import VarDesc +from ..common_ops_import import Variable from ..fluid.data_feeder import ( check_dtype, check_type, check_variable_and_dtype, ) from ..framework import LayerHelper, in_dygraph_mode -from ..static import Variable from .creation import full from .logic import logical_not from .manipulation import cast diff --git a/python/paddle/tensor/logic.py b/python/paddle/tensor/logic.py old mode 100755 new mode 100644 index 375f3614e5e30..ad6c30e319a81 --- a/python/paddle/tensor/logic.py +++ b/python/paddle/tensor/logic.py @@ -16,9 +16,9 @@ import paddle +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.framework import global_var -from ..static import Variable from .layer_function_generator import templatedoc if global_var._in_eager_mode_: diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index b5308e6cee63d..b9feee2fe1dd9 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -20,7 +20,7 @@ from paddle import _C_ops from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only -from ..common_ops_import import fill_constant +from ..common_ops_import import Variable, fill_constant from ..fluid.data_feeder import ( check_dtype, check_type, @@ -35,7 +35,6 @@ dygraph_only, in_dygraph_mode, ) -from ..static import Variable from .creation import _complex_to_real_dtype, _real_to_complex_dtype, zeros __all__ = [] diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 81b092f4c38b4..6f797b82e1d08 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -25,6 +25,7 @@ # TODO: define math functions from paddle.utils.inplace_utils import inplace_apis_in_dygraph_only +from ..common_ops_import import Variable from ..fluid.data_feeder import ( check_dtype, check_type, @@ -38,7 +39,6 @@ core, in_dygraph_mode, ) -from ..static import Variable from .creation import _complex_to_real_dtype from .layer_function_generator import generate_layer_fn, templatedoc from .manipulation import cast diff --git a/python/paddle/tensor/random.py b/python/paddle/tensor/random.py index 59958df236131..ff48780423fd6 100644 --- a/python/paddle/tensor/random.py +++ b/python/paddle/tensor/random.py @@ -16,8 +16,8 @@ import paddle from paddle import _C_ops, _legacy_C_ops +from paddle.common_ops_import import Variable from paddle.fluid.framework import _current_expected_place, in_dygraph_mode -from paddle.static import Variable from ..fluid.data_feeder import ( check_dtype, diff --git a/python/paddle/tensor/stat.py b/python/paddle/tensor/stat.py index cc94aee415541..f9784478393dc 100644 --- a/python/paddle/tensor/stat.py +++ b/python/paddle/tensor/stat.py @@ -18,9 +18,9 @@ from paddle import _C_ops, _legacy_C_ops from paddle.fluid.framework import in_dygraph_mode +from ..common_ops_import import Variable from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..framework import LayerHelper, core -from ..static import Variable from .math import _get_reduce_axis_with_tensor from .search import where diff --git a/python/paddle/vision/ops.py b/python/paddle/vision/ops.py index 0d43bd0fc54ce..2cd582884abf4 100755 --- a/python/paddle/vision/ops.py +++ b/python/paddle/vision/ops.py @@ -19,11 +19,11 @@ from ..fluid.data_feeder import check_type, check_variable_and_dtype from ..fluid.framework import Variable, in_dygraph_mode -from ..fluid.initializer import Normal from ..fluid.layer_helper import LayerHelper from ..fluid.layers import utils from ..framework import _current_expected_place from ..nn import BatchNorm2D, Conv2D, Layer, ReLU, Sequential +from ..nn.initializer import Normal __all__ = [ # noqa 'yolo_loss', @@ -1120,7 +1120,7 @@ def __init__( def _get_default_param_initializer(): filter_elem_num = np.prod(self._kernel_size) * self._in_channels std = (2.0 / filter_elem_num) ** 0.5 - return Normal(0.0, std, 0) + return Normal(0.0, std) self.weight = self.create_parameter( shape=filter_shape, From ec6e0a2c117d0763fe5e6d0eeff238bf4bd5b97b Mon Sep 17 00:00:00 2001 From: Hui Zhang Date: Thu, 2 Feb 2023 09:43:40 +0800 Subject: [PATCH 86/89] jit layer optimzer model param memory usage (#50135) * jit layer support multi thread --- paddle/fluid/jit/engine/interpreter_engine.cc | 7 ++--- paddle/fluid/jit/engine/interpreter_engine.h | 4 +-- paddle/fluid/jit/engine/predictor_engine.cc | 26 +++++++++++-------- paddle/fluid/jit/engine/predictor_engine.h | 3 ++- paddle/fluid/jit/function_utils.cc | 6 ++--- paddle/fluid/jit/function_utils.h | 4 +-- paddle/fluid/jit/layer.cc | 8 +++--- paddle/fluid/jit/layer.h | 8 +++--- paddle/fluid/jit/serializer.cc | 26 +++++++++++-------- paddle/fluid/jit/serializer.h | 4 +-- 10 files changed, 53 insertions(+), 43 deletions(-) diff --git a/paddle/fluid/jit/engine/interpreter_engine.cc b/paddle/fluid/jit/engine/interpreter_engine.cc index 36f8a2271d1ef..b16d0c98dee81 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.cc +++ b/paddle/fluid/jit/engine/interpreter_engine.cc @@ -25,9 +25,10 @@ namespace paddle { namespace jit { -InterpreterEngine::InterpreterEngine(const std::shared_ptr &info, - const VariableMap ¶ms_dict, - const phi::Place &place) +InterpreterEngine::InterpreterEngine( + const std::shared_ptr &info, + const std::shared_ptr ¶ms_dict, + const phi::Place &place) : info_(info), params_dict_(params_dict), place_(place) { info_->RemoveDescFeedFetch(); PADDLE_ENFORCE_GT( diff --git a/paddle/fluid/jit/engine/interpreter_engine.h b/paddle/fluid/jit/engine/interpreter_engine.h index d7aa5d610a50e..367bc1b86dcc6 100644 --- a/paddle/fluid/jit/engine/interpreter_engine.h +++ b/paddle/fluid/jit/engine/interpreter_engine.h @@ -36,7 +36,7 @@ using InterpreterCore = framework::InterpreterCore; class InterpreterEngine : public BaseEngine { public: InterpreterEngine(const std::shared_ptr &info, - const VariableMap ¶ms_dict, + const std::shared_ptr ¶ms_dict, const phi::Place &place); ~InterpreterEngine() noexcept {} @@ -54,7 +54,7 @@ class InterpreterEngine : public BaseEngine { private: std::shared_ptr info_; - VariableMap params_dict_; + std::shared_ptr params_dict_; framework::Scope scope_; phi::Place place_; std::shared_ptr inner_interpreter_; diff --git a/paddle/fluid/jit/engine/predictor_engine.cc b/paddle/fluid/jit/engine/predictor_engine.cc index bac6f993b04f6..d18f4f487dbe2 100644 --- a/paddle/fluid/jit/engine/predictor_engine.cc +++ b/paddle/fluid/jit/engine/predictor_engine.cc @@ -27,11 +27,15 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, DenseTensor *t, const platform::Place &place); -PredictorEngine::PredictorEngine(const std::shared_ptr &info, - const VariableMap ¶ms_dict, - const phi::Place &place) - : info_(info), scope_(new framework::Scope()), place_(place) { - utils::ShareParamsIntoScope(info_->ParamNames(), params_dict, scope_.get()); +PredictorEngine::PredictorEngine( + const std::shared_ptr &info, + const std::shared_ptr ¶ms_dict, + const phi::Place &place) + : info_(info), + params_dict_(params_dict), + scope_(new framework::Scope()), + place_(place) { + utils::ShareParamsIntoScope(info_->ParamNames(), params_dict_, scope_.get()); VLOG(6) << framework::GenScopeTreeDebugInfo(scope_.get()); // TODO(Aurelius84): Expose AnalysisConfig to user. @@ -66,6 +70,12 @@ PredictorEngine::PredictorEngine( predictor_(std::dynamic_pointer_cast( predictor)) {} +std::unique_ptr PredictorEngine::Clone(void *stream) { + auto *x = new PredictorEngine( + info_, scope_, place_, std::move(predictor_->Clone(stream))); + return std::unique_ptr(x); +} + std::vector PredictorEngine::operator()( const std::vector &inputs) { auto dense_tensors = utils::ToDenseTensors(inputs); @@ -199,11 +209,5 @@ static bool PaddleTensorToDenseTensor(const PaddleTensor &pt, return true; } -std::unique_ptr PredictorEngine::Clone(void *stream) { - auto *x = new PredictorEngine( - info_, scope_, place_, std::move(predictor_->Clone(stream))); - return std::unique_ptr(x); -} - } // namespace jit } // namespace paddle diff --git a/paddle/fluid/jit/engine/predictor_engine.h b/paddle/fluid/jit/engine/predictor_engine.h index ad07a7a7ffbf5..b2da6f4210a37 100644 --- a/paddle/fluid/jit/engine/predictor_engine.h +++ b/paddle/fluid/jit/engine/predictor_engine.h @@ -31,7 +31,7 @@ namespace jit { class PredictorEngine : public BaseEngine { public: PredictorEngine(const std::shared_ptr &info, - const VariableMap ¶ms_dict, + const std::shared_ptr ¶ms_dict, const phi::Place &place); PredictorEngine(const std::shared_ptr &info, @@ -50,6 +50,7 @@ class PredictorEngine : public BaseEngine { private: std::shared_ptr info_; + std::shared_ptr params_dict_; std::shared_ptr scope_; phi::Place place_; std::shared_ptr predictor_; diff --git a/paddle/fluid/jit/function_utils.cc b/paddle/fluid/jit/function_utils.cc index b67b5ba5b0518..3bd8c23411315 100644 --- a/paddle/fluid/jit/function_utils.cc +++ b/paddle/fluid/jit/function_utils.cc @@ -71,18 +71,18 @@ void ShareIntoScope(const std::vector &ordered_input_names, } void ShareParamsIntoScope(const std::vector ¶m_names, - const VariableMap ¶ms_dict, + const std::shared_ptr ¶ms_dict, framework::Scope *scope) { for (size_t i = 0; i < param_names.size(); ++i) { std::string name = param_names[i]; - PADDLE_ENFORCE_EQ(params_dict.count(name), + PADDLE_ENFORCE_EQ(params_dict->count(name), 1, phi::errors::InvalidArgument( "Parameter named %s is not existed in params_dict. " "Please check that your model was saved correctly", name)); - auto ¶m = params_dict.find(name)->second; + auto ¶m = params_dict->find(name)->second; auto &dense_tensor = param->Get(); auto *var = scope->Var(name); auto *dst_tensor = var->GetMutable(); diff --git a/paddle/fluid/jit/function_utils.h b/paddle/fluid/jit/function_utils.h index d61b720cec88f..5daa5ada200f4 100644 --- a/paddle/fluid/jit/function_utils.h +++ b/paddle/fluid/jit/function_utils.h @@ -51,14 +51,14 @@ void ShareIntoScope(const std::vector &ordered_input_names, framework::Scope *scope); void ShareParamsIntoScope(const std::vector ¶m_names, - const VariableMap ¶ms_dict, + const std::shared_ptr ¶ms_dict, framework::Scope *scope); void RemoveFeedFetch(framework::ProgramDesc *program_desc); template std::shared_ptr MakeEngine(const std::shared_ptr &info, - const VariableMap ¶ms_dict, + const std::shared_ptr ¶ms_dict, const phi::Place &place) { return std::make_shared(info, params_dict, place); } diff --git a/paddle/fluid/jit/layer.cc b/paddle/fluid/jit/layer.cc index 332c53a8e3649..2e8dba0f5a731 100644 --- a/paddle/fluid/jit/layer.cc +++ b/paddle/fluid/jit/layer.cc @@ -26,8 +26,8 @@ namespace paddle { namespace jit { -Layer::Layer(const VariableMap& params_map, - const VariableMap& attrs_map, +Layer::Layer(const std::shared_ptr& params_map, + const std::shared_ptr& attrs_map, const FunctionInfoMap& info_map, const phi::Place& place) : params_map_(params_map), @@ -80,12 +80,12 @@ std::vector Layer::FunctionNames() const { #define PD_SPECIALZE_ATTRIBUTE_TYPE(T) \ template <> \ T Layer::Attribute(const std::string& name) const { \ - if (attrs_map_.find(name) == attrs_map_.end()) { \ + if (attrs_map_->find(name) == attrs_map_->end()) { \ PADDLE_THROW(phi::errors::NotFound( \ "Attribute can not found %s, please check if it exists.")); \ return T(); \ } \ - auto var = attrs_map_.at(name); \ + auto var = attrs_map_->at(name); \ T ret = var->Get(); \ return ret; \ } diff --git a/paddle/fluid/jit/layer.h b/paddle/fluid/jit/layer.h index ed8b739a0b72f..4f76a41d06f3e 100644 --- a/paddle/fluid/jit/layer.h +++ b/paddle/fluid/jit/layer.h @@ -43,8 +43,8 @@ using FunctionInfoMap = class Layer { public: - Layer(const VariableMap& params_map, - const VariableMap& attrs_map_, + Layer(const std::shared_ptr& params_map, + const std::shared_ptr& attrs_map_, const FunctionInfoMap& info_map, const phi::Place& place); @@ -70,8 +70,8 @@ class Layer { std::shared_ptr Clone(void* stream = nullptr); private: - VariableMap params_map_; - VariableMap attrs_map_; + std::shared_ptr params_map_; + std::shared_ptr attrs_map_; FunctionInfoMap info_map_; phi::Place place_; std::shared_ptr unit_; diff --git a/paddle/fluid/jit/serializer.cc b/paddle/fluid/jit/serializer.cc index 436717a8dc389..21a187ad67100 100644 --- a/paddle/fluid/jit/serializer.cc +++ b/paddle/fluid/jit/serializer.cc @@ -58,12 +58,12 @@ Layer Deserializer::operator()(const std::string& path, info_map[func_name]->SetProgramFilePath(it.second); } - VariableMap params_dict; - VariableMap attrs_dict; - ReadTensorData(path + PDPARAMS_SUFFIX, param_names_set, place, ¶ms_dict); + auto params_dict = std::make_shared(); + auto attrs_dict = std::make_shared(); + ReadTensorData(path + PDPARAMS_SUFFIX, param_names_set, place, params_dict); if (utils::FileExists(path + PROPERTY_SUFFIX)) { - ReadAttributeData(path + PROPERTY_SUFFIX, &attrs_dict); + ReadAttributeData(path + PROPERTY_SUFFIX, attrs_dict); VLOG(3) << "Read Property Success!"; } @@ -90,10 +90,11 @@ Layer Deserializer::operator()(const std::string& path, return layer; } -void Deserializer::ReadTensorData(const std::string& file_name, - const std::set& var_name, - const phi::Place& place, - VariableMap* params_dict) const { +void Deserializer::ReadTensorData( + const std::string& file_name, + const std::set& var_name, + const phi::Place& place, + std::shared_ptr params_dict) const { VLOG(3) << "ReadTensorData from: " << file_name; std::ifstream fin(file_name, std::ios::binary); platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); @@ -108,12 +109,15 @@ void Deserializer::ReadTensorData(const std::string& file_name, } } -void Deserializer::ReadAttributeData(const std::string& file_path, - VariableMap* attrs_dict) const { +void Deserializer::ReadAttributeData( + const std::string& file_path, + std::shared_ptr attrs_dict) const { VLOG(3) << "ReadPropertyData from: " << file_path; Property p; p.Deserialization(file_path); - *attrs_dict = static_cast(p.Values()); + for (auto& it : p.Values()) { + attrs_dict->emplace(it.first, it.second); + } return; } diff --git a/paddle/fluid/jit/serializer.h b/paddle/fluid/jit/serializer.h index b93eaa44fe632..926e9a6afda37 100644 --- a/paddle/fluid/jit/serializer.h +++ b/paddle/fluid/jit/serializer.h @@ -55,11 +55,11 @@ class Deserializer { void ReadTensorData(const std::string& file_name, const std::set& var_name, const phi::Place& place, - VariableMap* params_dict) const; + std::shared_ptr params_dict) const; // property pb void ReadAttributeData(const std::string& file_path, - VariableMap* attrs_dict) const; + std::shared_ptr attrs_dict) const; // void ReadExtraInfo(const std::string& file_name) const; From 14dd68e1d7d05552f0f5de02adb5de76271f71d0 Mon Sep 17 00:00:00 2001 From: liuruyan <44316842+liuruyan@users.noreply.github.com> Date: Thu, 2 Feb 2023 10:15:18 +0800 Subject: [PATCH 87/89] Fix the FP16 precision problem of add_n. (#50129) --- paddle/phi/kernels/gpu/add_n_kernel.cu | 20 +++--- .../fluid/tests/unittests/test_add_n_op.py | 64 +++++++++++++++++++ 2 files changed, 75 insertions(+), 9 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_add_n_op.py diff --git a/paddle/phi/kernels/gpu/add_n_kernel.cu b/paddle/phi/kernels/gpu/add_n_kernel.cu index f32ba597f5b68..69bc248a7e2f2 100644 --- a/paddle/phi/kernels/gpu/add_n_kernel.cu +++ b/paddle/phi/kernels/gpu/add_n_kernel.cu @@ -14,11 +14,10 @@ #include "paddle/phi/kernels/add_n_kernel.h" -#include "paddle/phi/kernels/impl/add_n_kernel_impl.h" - #include "paddle/fluid/memory/malloc.h" #include "paddle/fluid/memory/memcpy.h" - +#include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/kernels/impl/add_n_kernel_impl.h" namespace phi { #define CEIL_DIV(x, y) (((x) + (y)-1) / (y)) @@ -38,16 +37,18 @@ __global__ void Sum2CUDAKernel(const T *in_0, template __global__ void SumArrayCUDAKernel( T **in, T *out, int64_t N, size_t in_size, bool read_dst) { + using MPType = typename phi::dtype::MPTypeTrait::Type; int id = blockIdx.x * blockDim.x + threadIdx.x; while (id < N) { - T total(read_dst ? out[id] : static_cast(0)); + MPType total(read_dst ? static_cast(out[id]) + : static_cast(0)); for (int i = 0; i < in_size; ++i) { const T *tmp = in[i]; if (tmp) { - total += tmp[id]; + total += static_cast(tmp[id]); } } - out[id] = total; + out[id] = static_cast(total); id += blockDim.x * gridDim.x; } } @@ -116,11 +117,12 @@ void AddNKernel(const Context &dev_ctx, int64_t length_0 = in_0.numel(); int64_t length_1 = in_1.numel(); if (length_0 && length_1 && in_0.IsInitialized() && in_1.IsInitialized()) { + using MPType = typename phi::dtype::MPTypeTrait::Type; auto result = EigenVector::Flatten(*out); auto &place = *dev_ctx.eigen_device(); - auto in_0_e = EigenVector::Flatten(in_0); - auto in_1_e = EigenVector::Flatten(in_1); - result.device(place) = in_0_e + in_1_e; + auto in_0_e = EigenVector::Flatten(in_0).template cast(); + auto in_1_e = EigenVector::Flatten(in_1).template cast(); + result.device(place) = (in_0_e + in_1_e).template cast(); } else if (length_0 && in_0.IsInitialized()) { auto result = EigenVector::Flatten(*out); auto &place = *dev_ctx.eigen_device(); diff --git a/python/paddle/fluid/tests/unittests/test_add_n_op.py b/python/paddle/fluid/tests/unittests/test_add_n_op.py new file mode 100644 index 0000000000000..3ca485b1419fd --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_add_n_op.py @@ -0,0 +1,64 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import unittest + +import numpy as np + +import paddle + + +class TestAddnOp(unittest.TestCase): + def setUp(self): + np.random.seed(20) + l = 32 + self.x_np = np.random.random([l, 16, 256]) + + def check_main(self, x_np, dtype, axis=None): + paddle.disable_static() + x = [] + for i in range(x_np.shape[0]): + val = paddle.to_tensor(x_np[i].astype(dtype)) + val.stop_gradient = False + x.append(val) + y = paddle.add_n(x) + x_g = paddle.grad(y, x) + y_np = y.numpy().astype('float32') + x_g_np = [] + for val in x_g: + x_g_np.append(val.numpy().astype('float32')) + paddle.enable_static() + return y_np, x_g_np + + def test_add_n_fp16(self): + if not paddle.is_compiled_with_cuda(): + return + y_np_16, x_g_np_16 = self.check_main(self.x_np, 'float16') + y_np_32, x_g_np_32 = self.check_main(self.x_np, 'float32') + + np.testing.assert_allclose(y_np_16, y_np_32, rtol=1e-03) + for i in range(len(x_g_np_32)): + np.testing.assert_allclose(x_g_np_16[i], x_g_np_32[i], rtol=1e-03) + + def test_add_n_api(self): + if not paddle.is_compiled_with_cuda(): + return + + y_np_32, x_g_np_32 = self.check_main(self.x_np, 'float32') + y_np_gt = np.sum(self.x_np, axis=0).astype('float32') + + np.testing.assert_allclose(y_np_32, y_np_gt, rtol=1e-06) + + +if __name__ == "__main__": + unittest.main() From e48c882f42fe3bca4c1d707098be99fd7ab04659 Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Thu, 2 Feb 2023 10:43:01 +0800 Subject: [PATCH 88/89] pass PYTHON_EXECUTABLE envs to thirdparty cinn (#50142) --- cmake/external/cinn.cmake | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/cmake/external/cinn.cmake b/cmake/external/cinn.cmake index 3ec194a6bfb37..5e23a0f36f04a 100644 --- a/cmake/external/cinn.cmake +++ b/cmake/external/cinn.cmake @@ -40,7 +40,10 @@ set(CINN_OPTIONAL_ARGS -DWITH_MKL_CBLAS=${WITH_MKL} -DWITH_MKLDNN=${WITH_MKL} -DPUBLISH_LIBS=ON - -DWITH_TESTING=ON) + -DWITH_TESTING=ON + -DPYTHON_EXECUTABLE=${PYTHON_EXECUTABLE} + -DPYTHON_INCLUDE_DIR=${PYTHON_INCLUDE_DIR} + -DPYTHON_LIBRARIES=${PYTHON_LIBRARIES}) set(CINN_BUILD_COMMAND ${CMAKE_COMMAND} --build . --target cinnapi -j) set(CINN_BINARY_DIR ${CINN_PREFIX_DIR}/src/external_cinn-build) set(CINN_LIB_NAME "libcinnapi.so") From 3c557e2fdd1a42d46fa98faadbd1c1664e6c1ad8 Mon Sep 17 00:00:00 2001 From: YuanRisheng Date: Thu, 2 Feb 2023 10:55:23 +0800 Subject: [PATCH 89/89] [BugFix]Fix bugs when compile with OneDNN (#50096) * fix bugs * fix ci bugs --- paddle/fluid/framework/feed_fetch_type.h | 5 +++ paddle/fluid/framework/string_array.h | 12 ++++-- paddle/fluid/operators/controlflow/feed_op.cc | 38 ------------------- paddle/phi/kernels/funcs/CMakeLists.txt | 2 +- 4 files changed, 15 insertions(+), 42 deletions(-) diff --git a/paddle/fluid/framework/feed_fetch_type.h b/paddle/fluid/framework/feed_fetch_type.h index 571667bff47eb..e51f22a2c3c18 100644 --- a/paddle/fluid/framework/feed_fetch_type.h +++ b/paddle/fluid/framework/feed_fetch_type.h @@ -26,6 +26,11 @@ namespace framework { using FeedType = paddle::variant; +template <> +struct PhiVectorType { + const char *type_name = "PhiVectorFeedType"; +}; + using FeedList = paddle::framework::PhiVector; using FetchType = paddle::variant +struct PhiVectorType; + +template <> +struct PhiVectorType { + const char* type_name = "PhiVectorString"; +}; + template class PhiVector : public phi::ExtendedTensor, public phi::TypeInfoTraits> { @@ -129,9 +137,7 @@ class PhiVector : public phi::ExtendedTensor, public: /// \brief Returns the name of the class for type traits. /// \return The name of the class. - static const char* name() { - return (std::string("PhiVector_") + std::string(typeid(T).name())).c_str(); - } + static const char* name() { return PhiVectorType().type_name; } size_t size() const { return data_.size(); } diff --git a/paddle/fluid/operators/controlflow/feed_op.cc b/paddle/fluid/operators/controlflow/feed_op.cc index 09684b8d737ba..9d266b81d0bab 100644 --- a/paddle/fluid/operators/controlflow/feed_op.cc +++ b/paddle/fluid/operators/controlflow/feed_op.cc @@ -267,44 +267,6 @@ PD_REGISTER_GENERAL_KERNEL( ALL_LAYOUT, paddle::operators::FeedStringsKernel, ALL_DTYPE) {} -#elif defined(PADDLE_WITH_ASCEND_CL) -PD_REGISTER_GENERAL_KERNEL( - feed_dense_tensor, - npu, - ALL_LAYOUT, - paddle::operators::FeedDenseTensorKernel, - ALL_DTYPE) {} -PD_REGISTER_GENERAL_KERNEL( - feed_sparse_coo_tensor, - npu, - ALL_LAYOUT, - paddle::operators::FeedSparseCooTensorKernel, - ALL_DTYPE) {} -PD_REGISTER_GENERAL_KERNEL( - feed_strings, - npu, - ALL_LAYOUT, - paddle::operators::FeedStringsKernel, - ALL_DTYPE) {} -#elif defined(PADDLE_WITH_MLU) -PD_REGISTER_GENERAL_KERNEL( - feed_dense_tensor, - CustomMLU, - ALL_LAYOUT, - paddle::operators::FeedDenseTensorKernel, - ALL_DTYPE) {} -PD_REGISTER_GENERAL_KERNEL( - feed_sparse_coo_tensor, - CustomMLU, - ALL_LAYOUT, - paddle::operators::FeedSparseCooTensorKernel, - ALL_DTYPE) {} -PD_REGISTER_GENERAL_KERNEL( - feed_strings, - CustomMLU, - ALL_LAYOUT, - paddle::operators::FeedStringsKernel, - ALL_DTYPE) {} #endif #ifdef PADDLE_WITH_CUSTOM_DEVICE namespace paddle { diff --git a/paddle/phi/kernels/funcs/CMakeLists.txt b/paddle/phi/kernels/funcs/CMakeLists.txt index e4f779c807570..da8f47c7bffd4 100644 --- a/paddle/phi/kernels/funcs/CMakeLists.txt +++ b/paddle/phi/kernels/funcs/CMakeLists.txt @@ -27,7 +27,7 @@ math_library(sequence_scale) cc_library( phi_data_layout_transform SRCS data_layout_transform.cc - DEPS tensor) + DEPS tensor blas) if(WITH_GPU OR WITH_ROCM) if(MKL_FOUND AND WITH_ONEMKL)