diff --git a/test/inductor/test_cpu_repro.py b/test/inductor/test_cpu_repro.py index 3b1ff31c80178..0bff745ca823c 100644 --- a/test/inductor/test_cpu_repro.py +++ b/test/inductor/test_cpu_repro.py @@ -2534,6 +2534,36 @@ def forward(self, x): ) assert metrics.generated_kernel_count == 0 + def test_group_norm_vec(self): + class M(torch.nn.Module): + def __init__(self): + super().__init__() + self.group_norm = torch.nn.GroupNorm(32, 32) + + def forward(self, x): + return self.group_norm(x) + + metrics.reset() + mod = M().eval() + x = torch.randn(2, 32, 32, 32) + with torch.no_grad(): + self.common(mod, (x,)) + # 2 generated kernels (one for var_mean, the other for result) + assert metrics.generated_cpp_vec_kernel_count == 2 + + def test_int_div_vec(self): + def fn(x, y, mode): + return torch.div(x, y, rounding_mode=mode) + + x = torch.randint(1, 100, (32, 32)) + y = torch.randint(1, 100, (32, 32)) + for mode in [None, "trunc", "floor"]: + with torch.no_grad(): + metrics.reset() + self.common(fn, (x, y, mode)) + # TODO: support vectorization for int div + assert metrics.generated_cpp_vec_kernel_count == 0 + if __name__ == "__main__": from torch._dynamo.test_case import run_tests diff --git a/torch/_inductor/codegen/cpp.py b/torch/_inductor/codegen/cpp.py index 31e7ee396851a..f8d9cdf568eeb 100644 --- a/torch/_inductor/codegen/cpp.py +++ b/torch/_inductor/codegen/cpp.py @@ -455,6 +455,10 @@ def mul(a, b): def div(a, b): return f"{a} / {b}" + @staticmethod + def truediv(a, b): + return f"{a} / {b}" + @staticmethod def abs(x): return f"{x}.abs()"