Add the workaround to support rowwise scaled_gemm for fp32 outputs (#2431)

y-sq · facebook-github-bot · commit abbed3a9dc90 · 2025-07-03T16:55:05.000-07:00
Summary:

Running rowwise scaling on fp32 tensors got the error, P1794222725
```
RuntimeError: Only bf16 high precision output types are supported for row-wise scaling.
```

This pr adds an option to explicitly use bfloat16 as the output of rowwise_scaled, and cast it back to the original precision.

It can be enabled by setting
```
config = dataclasses.replace(config, convert_dtypes_for_rowwise_scaled_mm=True)
```

Differential Revision: D73552660
diff --git a/test/float8/test_base.py b/test/float8/test_base.py
@@ -34,9 +34,7 @@
     e5m2_dtype,
 )
 from torchao.float8.float8_linear import Float8Linear
-from torchao.float8.float8_linear_utils import (
-    convert_to_float8_training,
-)
+from torchao.float8.float8_linear_utils import convert_to_float8_training
 from torchao.float8.float8_ops import addmm_float8_unwrapped
 from torchao.float8.float8_scaling_utils import (
     get_maybe_axiswise_dim,
@@ -379,12 +377,16 @@ def test_linear_from_config_params(
     )
     @pytest.mark.parametrize("x_shape", [(16, 16), (2, 16, 16), (3, 2, 16, 16)])
     @pytest.mark.parametrize("linear_bias", [True, False])
+    @pytest.mark.parametrize(
+        "linear_dtype", [torch.bfloat16, torch.float16, torch.float32]
+    )
     @unittest.skipIf(not torch.cuda.is_available(), "CUDA not available")
     @skip_if_rocm("ROCm enablement in progress")
     def test_linear_from_recipe(
         self,
         recipe_name,
         x_shape,
+        linear_dtype: torch.dtype,
         linear_bias: bool,
     ):
         if torch.cuda.get_device_capability() < (9, 0):
@@ -393,7 +395,6 @@ def test_linear_from_recipe(
             )
             pytest.skip()
 
-        linear_dtype = torch.bfloat16
         x = torch.randn(*x_shape, device="cuda", dtype=linear_dtype)
         m_ref = nn.Linear(16, 32, bias=linear_bias, device="cuda", dtype=linear_dtype)
         config = Float8LinearConfig.from_recipe_name(recipe_name)
diff --git a/torchao/float8/float8_ops.py b/torchao/float8/float8_ops.py
@@ -54,6 +54,12 @@ def addmm_float8_unwrapped(
         a_inverse_scale = a_inverse_scale.new_ones(())
         b_inverse_scale = a_inverse_scale.new_ones(())
 
+    # work around torch._scaled_mm not having float32 output type
+    # TODO(pytorch/pytorch#156771): remove this once torch._scaled_mm supports float32 output
+    orig_dtype = output_dtype
+    if orig_dtype in (torch.float16, torch.float32) and is_rowwise_scaling:
+        output_dtype = torch.bfloat16
+
     post_bias = None
     if output_dtype == torch.float32:
         # Bias is not supported by _scaled_mm when output is fp32
@@ -76,6 +82,9 @@ def addmm_float8_unwrapped(
     if post_bias is not None:
         output += post_bias
 
+    if orig_dtype in (torch.float16, torch.float32) and is_rowwise_scaling:
+        output = output.to(orig_dtype)
+
     return output