Skip to content

Commit

Permalink
[nnc] Test cases for uneven split + reorder (pytorch#53091)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: pytorch#53091

Split with tail followed by reorder causes a segfault in NNC
Split with mask followed by reorder generates invalid code that writes out of
bounds
ghstack-source-id: 122870733

Test Plan: LoopNest.ColReduceSplit*

Reviewed By: navahgar

Differential Revision: D26746254

fbshipit-source-id: f8a0de18531b34d2bf06ccaa35d9c98b81b5c600
  • Loading branch information
bertmaher authored and Sacha Refshauge committed Mar 31, 2021
1 parent 160465c commit e153def
Showing 1 changed file with 92 additions and 0 deletions.
92 changes: 92 additions & 0 deletions test/cpp/tensorexpr/test_loopnest.cpp
Expand Up @@ -3829,5 +3829,97 @@ TEST(LoopNest, InlineFromLoad) {
oss.str());
}

static std::pair<std::unique_ptr<Placeholder>, Tensor*> colReduce(
int M,
int N) {
auto a =
std::make_unique<Placeholder>("a", kFloat, std::vector<ExprHandle>{M, N});
Tensor* t = Reduce(
"b",
{{N, "n"}},
Sum(),
[&](const VarHandle& n, const VarHandle& m) { return a->load(m, n); },
{{M, "m"}});
return {std::move(a), t};
}

static Stmt* splitTailReorder(Tensor* b) {
constexpr int kVectorWidth = 8;
For *outer, *inner, *tail;
LoopNest nest({b});
auto loops = nest.getLoopStmtsFor(b);
nest.splitWithTail(loops[0], kVectorWidth, &outer, &inner, &tail);
loops = nest.getLoopStmtsFor(b);
nest.reorderAxis(loops[1], loops[2]);
nest.prepareForCodegen();
return nest.root_stmt();
}

static Stmt* splitMaskReorder(Tensor* b) {
constexpr int kVectorWidth = 8;
For *outer, *inner;
LoopNest nest({b});
auto loops = nest.getLoopStmtsFor(b);
nest.splitWithMask(loops[0], kVectorWidth, &outer, &inner);
loops = nest.getLoopStmtsFor(b);
nest.reorderAxis(loops[1], loops[2]);
std::clog << *nest.root_stmt() << "\n";
nest.prepareForCodegen();
return nest.root_stmt();
}

static void checkColReduce(Stmt* s, Placeholder& p, Tensor* t) {
int M = immediateAs<int>(p.dim(0));
int N = immediateAs<int>(p.dim(1));
PaddedBuffer<float> a(M, N);
PaddedBuffer<float> b(N);
PaddedBuffer<float> ref(N);
for (int i = 0; i < M; i++) {
for (int j = 0; j < N; j++) {
a(i, j) = 1.0f;
}
}
for (int i = 0; i < N; i++) {
b(i) = 0.0f;
}
for (int i = 0; i < N; i++) {
ref(i) = 76.0f;
}
SimpleIREvaluator(s, {p, t}).call({a, b});
ExpectAllNear(b, ref, 1e-5);
}

TEST(LoopNest, ColReduceSplitTailEvenReorder) {
KernelScope kernel_scope;
constexpr int M = 76, N = 128;
auto p = colReduce(M, N);
Stmt* s = splitTailReorder(p.second);
checkColReduce(s, *p.first, p.second);
}

TEST(LoopNest, DISABLED_ColReduceSplitTailUnevenReorder) {
KernelScope kernel_scope;
constexpr int M = 76, N = 100;
auto p = colReduce(M, N);
Stmt* s = splitTailReorder(p.second);
checkColReduce(s, *p.first, p.second);
}

TEST(LoopNest, ColReduceSplitMaskEvenReorder) {
KernelScope kernel_scope;
constexpr int M = 76, N = 128;
auto p = colReduce(M, N);
Stmt* s = splitMaskReorder(p.second);
checkColReduce(s, *p.first, p.second);
}

TEST(LoopNest, DISABLED_ColReduceSplitMaskUnevenReorder) {
KernelScope kernel_scope;
constexpr int M = 76, N = 100;
auto p = colReduce(M, N);
Stmt* s = splitMaskReorder(p.second);
checkColReduce(s, *p.first, p.second);
}

} // namespace jit
} // namespace torch

0 comments on commit e153def

Please sign in to comment.