@@ -69,7 +69,8 @@ void generic_moe_gemm_kernelLauncher(const T* A,
69
69
cudaStream_t stream,
70
70
int * kernel_occupancy = nullptr ) {
71
71
if (gemm_config.split_k_style != SplitKStyle::NO_SPLIT_K) {
72
- PADDLE_FATAL (" [MoeGemm] Grouped gemm does not support split-k" );
72
+ throw std::runtime_error (
73
+ " [MoeGemm] Grouped gemm does not support split-k" );
73
74
}
74
75
75
76
#ifdef PADDLE_CUDA_BF16
@@ -169,7 +170,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
169
170
int occupancy = std::min (2 , GemmGrouped::maximum_active_blocks ());
170
171
171
172
if (occupancy == 0 ) {
172
- PADDLE_FATAL (
173
+ throw std::runtime_error (
173
174
" [MoE Runner] GPU lacks the shared memory resources to run "
174
175
" GroupedGEMM kernel" );
175
176
}
@@ -197,7 +198,7 @@ void generic_moe_gemm_kernelLauncher(const T* A,
197
198
if (can_implement != cutlass::Status::kSuccess ) {
198
199
std::string err_msg = " MoEFC kernel will fail for params. Error: " +
199
200
std::string (cutlassGetStatusString (can_implement));
200
- PADDLE_FATAL (" [MoE Runner] " + err_msg);
201
+ throw std::runtime_error (" [MoE Runner] " + err_msg);
201
202
}
202
203
203
204
auto init_status = gemm.initialize (args);
@@ -243,7 +244,7 @@ struct dispatch_stages {
243
244
std::string err_msg = " Cutlass fpA_intB gemm. Not instantiates for arch " +
244
245
std::to_string (arch::kMinComputeCapability ) +
245
246
" with stages set to " + std::to_string (Stages);
246
- PADDLE_FATAL (" [dispatch_stages::dispatch] " + err_msg);
247
+ throw std::runtime_error (" [dispatch_stages::dispatch] " + err_msg);
247
248
}
248
249
};
249
250
@@ -394,7 +395,8 @@ void dispatch_gemm_config(const T* A,
394
395
default :
395
396
std::string err_msg = " dispatch_gemm_config does not support stages " +
396
397
std::to_string (gemm_config.stages );
397
- PADDLE_FATAL (" [MoE][dispatch_gemm_config] " + err_msg);
398
+ throw std::runtime_error (
399
+ " [MoE][dispatch_gemm_config] " + err_msg);
398
400
break ;
399
401
}
400
402
}
@@ -452,15 +454,16 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
452
454
dispatch_gemm_config_macro (64 , 128 , 64 , 32 , 64 , 64 );
453
455
dispatch_gemm_config_macro (128 , 128 , 64 , 64 , 32 , 64 );
454
456
case CutlassTileConfig::Undefined:
455
- PADDLE_FATAL (" [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
457
+ throw std::runtime_error (
458
+ " [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
456
459
break ;
457
460
case CutlassTileConfig::ChooseWithHeuristic:
458
- PADDLE_FATAL (
461
+ throw std::runtime_error (
459
462
" [dispatch_moe_gemm_to_cutlass] gemm config should have "
460
463
" already been set by heuristic." );
461
464
break ;
462
465
default :
463
- PADDLE_FATAL (
466
+ throw std::runtime_error (
464
467
" [dispatch_moe_gemm_to_cutlass] Config is invalid for same "
465
468
" type MoE tensorop GEMM." );
466
469
break ;
@@ -497,38 +500,44 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
497
500
dispatch_gemm_config_macro (32 , 128 , 64 , 32 , 32 , 64 );
498
501
dispatch_gemm_config_macro (64 , 128 , 64 , 64 , 64 , 64 );
499
502
case CutlassTileConfig::Undefined:
500
- PADDLE_FATAL (" [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
503
+ throw std::runtime_error (
504
+ " [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
501
505
break ;
502
506
case CutlassTileConfig::ChooseWithHeuristic:
503
- PADDLE_FATAL (
507
+ throw std::runtime_error (
504
508
" [dispatch_moe_gemm_to_cutlass] gemm config should have "
505
509
" already been set by heuristic." );
506
510
break ;
507
511
default :
508
- PADDLE_FATAL (
512
+ throw std::runtime_error (
509
513
" [dispatch_moe_gemm_to_cutlass] Config is invalid for "
510
514
" mixed type tensorop GEMM." );
511
515
break ;
512
516
}
513
517
} else {
514
518
switch (gemm_config.tile_config ) {
515
519
dispatch_gemm_config_macro (16 , 128 , 64 , 16 , 32 , 64 );
520
+ dispatch_gemm_config_macro (16 , 256 , 64 , 16 , 64 , 64 );
521
+ dispatch_gemm_config_macro (64 , 64 , 64 , 32 , 32 , 64 );
516
522
dispatch_gemm_config_macro (32 , 128 , 64 , 32 , 32 , 64 );
523
+ dispatch_gemm_config_macro (128 , 64 , 64 , 64 , 32 , 64 );
517
524
dispatch_gemm_config_macro (64 , 128 , 64 , 64 , 64 , 64 );
518
525
dispatch_gemm_config_macro (128 , 128 , 64 , 64 , 64 , 64 );
519
526
dispatch_gemm_config_macro (128 , 128 , 64 , 128 , 32 , 64 );
520
527
dispatch_gemm_config_macro (128 , 256 , 64 , 64 , 64 , 64 );
521
528
dispatch_gemm_config_macro (64 , 128 , 64 , 64 , 32 , 64 );
529
+ dispatch_gemm_config_macro (256 , 128 , 64 , 64 , 64 , 64 );
522
530
case CutlassTileConfig::Undefined:
523
- PADDLE_FATAL (" [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
531
+ throw std::runtime_error (
532
+ " [dispatch_moe_gemm_to_cutlass] gemm config undefined." );
524
533
break ;
525
534
case CutlassTileConfig::ChooseWithHeuristic:
526
- PADDLE_FATAL (
535
+ throw std::runtime_error (
527
536
" [dispatch_moe_gemm_to_cutlass] gemm config should have "
528
537
" already been set by heuristic." );
529
538
break ;
530
539
default :
531
- PADDLE_FATAL (
540
+ throw std::runtime_error (
532
541
" [dispatch_moe_gemm_to_cutlass] Config is invalid for "
533
542
" mixed type tensorop GEMM." );
534
543
break ;
@@ -561,17 +570,17 @@ void dispatch_moe_gemm_to_cutlass(const T* A,
561
570
switch (gemm_config.tile_config ) {
562
571
dispatch_gemm_config_macro (128 , 128 , 8 , 64 , 64 , 8 );
563
572
case CutlassTileConfig::Undefined:
564
- PADDLE_FATAL (
573
+ throw std::runtime_error (
565
574
" [dispatch_moe_gemm_to_cutlass][SIMT] gemm config "
566
575
" undefined." );
567
576
break ;
568
577
case CutlassTileConfig::ChooseWithHeuristic:
569
- PADDLE_FATAL (
578
+ throw std::runtime_error (
570
579
" [dispatch_moe_gemm_to_cutlass][SIMT] gemm config should "
571
580
" have already been set by heuristic." );
572
581
break ;
573
582
default :
574
- PADDLE_FATAL (
583
+ throw std::runtime_error (
575
584
" [dispatch_moe_gemm_to_cutlass][SIMT] Unsupported config "
576
585
" for float MoE gemm." );
577
586
break ;
0 commit comments