-
Notifications
You must be signed in to change notification settings - Fork 13.2k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[flang] Code generation for fir.pack/unpack_array. #132080
Conversation
@llvm/pr-subscribers-flang-codegen @llvm/pr-subscribers-flang-driver Author: Slava Zakharin (vzakhari) ChangesThe code generation relies on Patch is 125.10 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/132080.diff 10 Files Affected:
diff --git a/flang/include/flang/Optimizer/CodeGen/CGPasses.td b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
index 2e097faec5403..df0ecf5540776 100644
--- a/flang/include/flang/Optimizer/CodeGen/CGPasses.td
+++ b/flang/include/flang/Optimizer/CodeGen/CGPasses.td
@@ -99,4 +99,15 @@ def BoxedProcedurePass : Pass<"boxed-procedure", "mlir::ModuleOp"> {
];
}
+def LowerRepackArraysPass : Pass<"lower-repack-arrays", "mlir::ModuleOp"> {
+ let summary = "Convert fir.pack/unpack_array to other FIR operations";
+ let description = [{
+ Convert fir.pack/unpack_array operations to other FIR operations
+ and Fortran runtime calls that implement the semantics
+ of packing/unpacking.
+ }];
+ let dependentDialects = ["fir::FIROpsDialect", "mlir::arith::ArithDialect",
+ "mlir::func::FuncDialect"];
+}
+
#endif // FORTRAN_OPTIMIZER_CODEGEN_FIR_PASSES
diff --git a/flang/include/flang/Optimizer/CodeGen/CodeGen.h b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
index 255b1950c8425..0398d0f248e08 100644
--- a/flang/include/flang/Optimizer/CodeGen/CodeGen.h
+++ b/flang/include/flang/Optimizer/CodeGen/CodeGen.h
@@ -26,6 +26,7 @@ struct NameUniquer;
#define GEN_PASS_DECL_CODEGENREWRITE
#define GEN_PASS_DECL_TARGETREWRITEPASS
#define GEN_PASS_DECL_BOXEDPROCEDUREPASS
+#define GEN_PASS_DECL_LOWERREPACKARRAYSPASS
#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
/// FIR to LLVM translation pass options.
diff --git a/flang/lib/Optimizer/CodeGen/CMakeLists.txt b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
index 553c20bb85d38..f730c7fd03948 100644
--- a/flang/lib/Optimizer/CodeGen/CMakeLists.txt
+++ b/flang/lib/Optimizer/CodeGen/CMakeLists.txt
@@ -4,6 +4,7 @@ add_flang_library(FIRCodeGen
CodeGen.cpp
CodeGenOpenMP.cpp
FIROpPatterns.cpp
+ LowerRepackArrays.cpp
PreCGRewrite.cpp
TBAABuilder.cpp
Target.cpp
diff --git a/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
new file mode 100644
index 0000000000000..c109dc4732ca5
--- /dev/null
+++ b/flang/lib/Optimizer/CodeGen/LowerRepackArrays.cpp
@@ -0,0 +1,403 @@
+//===-- LowerRepackArrays.cpp
+//------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Optimizer/CodeGen/CodeGen.h"
+
+#include "flang/Optimizer/Builder/BoxValue.h"
+#include "flang/Optimizer/Builder/Character.h"
+#include "flang/Optimizer/Builder/FIRBuilder.h"
+#include "flang/Optimizer/Builder/MutableBox.h"
+#include "flang/Optimizer/Builder/Runtime/Allocatable.h"
+#include "flang/Optimizer/Builder/Runtime/Transformational.h"
+#include "flang/Optimizer/Builder/Todo.h"
+#include "flang/Optimizer/Dialect/FIRDialect.h"
+#include "flang/Optimizer/Dialect/FIROps.h"
+#include "flang/Optimizer/Dialect/FIRType.h"
+#include "flang/Optimizer/Support/DataLayout.h"
+#include "mlir/Pass/Pass.h"
+#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
+
+namespace fir {
+#define GEN_PASS_DEF_LOWERREPACKARRAYSPASS
+#include "flang/Optimizer/CodeGen/CGPasses.h.inc"
+} // namespace fir
+
+#define DEBUG_TYPE "lower-repack-arrays"
+
+namespace {
+class RepackArrayConversion {
+public:
+ RepackArrayConversion(std::optional<mlir::DataLayout> dataLayout)
+ : dataLayout(dataLayout) {}
+
+protected:
+ std::optional<mlir::DataLayout> dataLayout;
+
+ static bool canAllocateTempOnStack(mlir::Value box);
+};
+
+class PackArrayConversion : public mlir::OpRewritePattern<fir::PackArrayOp>,
+ RepackArrayConversion {
+public:
+ using OpRewritePattern::OpRewritePattern;
+
+ PackArrayConversion(mlir::MLIRContext *context,
+ std::optional<mlir::DataLayout> dataLayout)
+ : OpRewritePattern(context), RepackArrayConversion(dataLayout) {}
+
+ mlir::LogicalResult
+ matchAndRewrite(fir::PackArrayOp op,
+ mlir::PatternRewriter &rewriter) const override;
+
+private:
+ static constexpr llvm::StringRef bufferName = ".repacked";
+
+ static mlir::Value allocateTempBuffer(fir::FirOpBuilder &builder,
+ mlir::Location loc, bool useStack,
+ mlir::Value origBox,
+ llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> typeParams);
+};
+
+class UnpackArrayConversion : public mlir::OpRewritePattern<fir::UnpackArrayOp>,
+ RepackArrayConversion {
+public:
+ using OpRewritePattern::OpRewritePattern;
+
+ UnpackArrayConversion(mlir::MLIRContext *context,
+ std::optional<mlir::DataLayout> dataLayout)
+ : OpRewritePattern(context), RepackArrayConversion(dataLayout) {}
+
+ mlir::LogicalResult
+ matchAndRewrite(fir::UnpackArrayOp op,
+ mlir::PatternRewriter &rewriter) const override;
+};
+} // anonymous namespace
+
+bool RepackArrayConversion::canAllocateTempOnStack(mlir::Value box) {
+ return !fir::isPolymorphicType(box.getType());
+}
+
+mlir::LogicalResult
+PackArrayConversion::matchAndRewrite(fir::PackArrayOp op,
+ mlir::PatternRewriter &rewriter) const {
+ mlir::Location loc = op.getLoc();
+ fir::FirOpBuilder builder(rewriter, op.getOperation());
+ if (op.getMaxSize() || op.getMaxElementSize() || op.getMinStride())
+ TODO(loc, "fir.pack_array with constraints");
+ if (op.getHeuristics() != fir::PackArrayHeuristics::None)
+ TODO(loc, "fir.pack_array with heuristics");
+
+ mlir::Value box = op.getArray();
+ llvm::SmallVector<mlir::Value> typeParams(op.getTypeparams().begin(),
+ op.getTypeparams().end());
+ // TODO: set non-default lower bounds on fir.pack_array,
+ // so that we can preserve lower bounds in the temporary box.
+ fir::BoxValue boxValue(box, /*lbounds=*/{}, typeParams);
+ mlir::Type boxType = boxValue.getBoxTy();
+ unsigned rank = boxValue.rank();
+ mlir::Type indexType = builder.getIndexType();
+ mlir::Value zero = fir::factory::createZeroValue(builder, loc, indexType);
+
+ // Fetch the extents from the box, and see if the array
+ // is not empty.
+ // If the type params are not explicitly provided, then we must also
+ // fetch the type parameters from the box.
+ //
+ // bool isNotEmpty;
+ // vector<int64_t> extents;
+ // if (IsPresent(box) && !IsContiguous[UpTo](box[, 1])) {
+ // isNotEmpty = box->base_addr != null;
+ // extents = SHAPE(box);
+ // } else {
+ // isNotEmpty = false;
+ // extents = vector<int64_t>(rank, 0);
+ // }
+
+ unsigned numTypeParams = 0;
+ if (typeParams.size() == 0) {
+ if (auto recordType = mlir::dyn_cast<fir::RecordType>(boxValue.getEleTy()))
+ if (recordType.getNumLenParams() != 0)
+ TODO(loc,
+ "allocating temporary for a parameterized derived type array");
+
+ if (auto charType = mlir::dyn_cast<fir::CharacterType>(boxValue.getEleTy()))
+ if (charType.hasDynamicLen())
+ numTypeParams = 1;
+ }
+
+ // For now we have to always check if the box is present.
+ mlir::Type predicateType = builder.getI1Type();
+ auto isPresent =
+ builder.create<fir::IsPresentOp>(loc, predicateType, boxValue.getAddr());
+
+ // The results of the IfOp are:
+ // (extent1, ..., extentN, typeParam1, ..., typeParamM, isNotEmpty)
+ // The number of results is rank + numTypeParams + 1.
+ llvm::SmallVector<mlir::Type> ifTypes(rank + numTypeParams, indexType);
+ ifTypes.push_back(predicateType);
+ llvm::SmallVector<mlir::Value> negativeResult(rank + numTypeParams, zero);
+ negativeResult.push_back(
+ fir::factory::createZeroValue(builder, loc, predicateType));
+ bool failedTypeParams = false;
+ llvm::SmallVector<mlir::Value> extentsAndPredicate =
+ builder
+ .genIfOp(loc, ifTypes, isPresent,
+ /*withElseRegion=*/true)
+ .genThen([&]() {
+ // The box is present.
+ auto isContiguous = builder.create<fir::IsContiguousBoxOp>(
+ loc, box, op.getInnermost());
+ llvm::SmallVector<mlir::Value> extentsAndPredicate =
+ builder
+ .genIfOp(loc, ifTypes, isContiguous,
+ /*withElseRegion=*/true)
+ .genThen([&]() {
+ // Box is contiguous, return zero.
+ builder.create<fir::ResultOp>(loc, negativeResult);
+ })
+ .genElse([&]() {
+ // Get the extents.
+ llvm::SmallVector<mlir::Value> results =
+ fir::factory::readExtents(builder, loc, boxValue);
+
+ // Get the type parameters from the box, if needed.
+ llvm::SmallVector<mlir::Value> assumedTypeParams;
+ if (numTypeParams != 0) {
+ if (auto charType = mlir::dyn_cast<fir::CharacterType>(
+ boxValue.getEleTy()))
+ if (charType.hasDynamicLen()) {
+ fir::factory::CharacterExprHelper charHelper(
+ builder, loc);
+ mlir::Value len = charHelper.readLengthFromBox(
+ boxValue.getAddr(), charType);
+ assumedTypeParams.push_back(
+ builder.createConvert(loc, indexType, len));
+ }
+
+ if (numTypeParams != assumedTypeParams.size()) {
+ failedTypeParams = true;
+ assumedTypeParams.append(
+ numTypeParams - assumedTypeParams.size(), zero);
+ }
+ }
+ results.append(assumedTypeParams);
+
+ auto dataAddr = builder.create<fir::BoxAddrOp>(
+ loc, boxValue.getMemTy(), boxValue.getAddr());
+ auto isNotEmpty = builder.create<fir::IsPresentOp>(
+ loc, predicateType, dataAddr);
+ results.push_back(isNotEmpty);
+ builder.create<fir::ResultOp>(loc, results);
+ })
+ .getResults();
+
+ builder.create<fir::ResultOp>(loc, extentsAndPredicate);
+ })
+ .genElse([&]() {
+ // Box is absent, nothing to do.
+ builder.create<fir::ResultOp>(loc, negativeResult);
+ })
+ .getResults();
+
+ if (failedTypeParams)
+ return emitError(loc) << "failed to compute the type parameters for "
+ << op.getOperation() << '\n';
+
+ // The last result is the isNotEmpty predicate value.
+ mlir::Value isNotEmpty = extentsAndPredicate.pop_back_val();
+ // If fir.pack_array does not specify type parameters, but they are needed
+ // for the type, then use the parameters fetched from the box.
+ if (typeParams.size() == 0 && numTypeParams != 0) {
+ assert(extentsAndPredicate.size() > numTypeParams);
+ typeParams.append(extentsAndPredicate.end() - numTypeParams,
+ extentsAndPredicate.end());
+ extentsAndPredicate.pop_back_n(numTypeParams);
+ }
+ // The remaining resulst are the extents.
+ llvm::SmallVector<mlir::Value> extents = std::move(extentsAndPredicate);
+ assert(extents.size() == rank);
+
+ mlir::Value tempBox;
+ // Allocate memory for the temporary, if allocating on stack.
+ // We can do it unconditionally, even if size is zero.
+ if (op.getStack() && canAllocateTempOnStack(boxValue.getAddr())) {
+ tempBox = allocateTempBuffer(builder, loc, /*useStack=*/true,
+ boxValue.getAddr(), extents, typeParams);
+ if (!tempBox)
+ return rewriter.notifyMatchFailure(op,
+ "failed to produce stack allocation");
+ }
+
+ mlir::Value newResult =
+ builder.genIfOp(loc, {boxType}, isNotEmpty, /*withElseRegion=*/true)
+ .genThen([&]() {
+ // Do the heap allocation conditionally.
+ if (!tempBox)
+ tempBox =
+ allocateTempBuffer(builder, loc, /*useStack=*/false,
+ boxValue.getAddr(), extents, typeParams);
+
+ // Do the copy, if needed, and return the new box (shaped same way
+ // as the original one).
+ if (!op.getNoCopy())
+ fir::runtime::genShallowCopy(builder, loc, tempBox,
+ boxValue.getAddr(),
+ /*resultIsAllocated=*/true);
+
+ // Set the lower bounds after the original box.
+ mlir::Value shape;
+ if (!boxValue.getLBounds().empty()) {
+ shape = builder.genShape(loc, boxValue.getLBounds(), extents);
+ }
+
+ // Rebox the temporary box to make its type the same as
+ // the original box's.
+ tempBox = builder.create<fir::ReboxOp>(loc, boxType, tempBox, shape,
+ /*slice=*/nullptr);
+ builder.create<fir::ResultOp>(loc, tempBox);
+ })
+ .genElse([&]() {
+ // Return original box.
+ builder.create<fir::ResultOp>(loc, boxValue.getAddr());
+ })
+ .getResults()[0];
+
+ rewriter.replaceOp(op, newResult);
+ return mlir::success();
+}
+
+mlir::Value PackArrayConversion::allocateTempBuffer(
+ fir::FirOpBuilder &builder, mlir::Location loc, bool useStack,
+ mlir::Value origBox, llvm::ArrayRef<mlir::Value> extents,
+ llvm::ArrayRef<mlir::Value> typeParams) {
+ auto tempType = mlir::cast<fir::SequenceType>(
+ fir::extractSequenceType(origBox.getType()));
+ assert(tempType.getDimension() == extents.size() &&
+ "number of extents does not match the rank");
+
+ if (fir::isPolymorphicType(origBox.getType())) {
+ // Use runtime to allocate polymorphic temporary vector using the dynamic
+ // type of the original box and the provided numElements.
+ // TODO: try to generalize it with BufferizeHLFIR.cpp:createArrayTemp().
+
+ // We cannot allocate polymorphic entity on stack.
+ // Return null, and allow the caller to reissue the call.
+ if (useStack)
+ return nullptr;
+
+ mlir::Type indexType = builder.getIndexType();
+ mlir::Type boxHeapType = fir::HeapType::get(tempType);
+ mlir::Value boxAlloc = fir::factory::genNullBoxStorage(
+ builder, loc, fir::ClassType::get(boxHeapType));
+ fir::runtime::genAllocatableApplyMold(builder, loc, boxAlloc, origBox,
+ tempType.getDimension());
+ mlir::Value one = builder.createIntegerConstant(loc, indexType, 1);
+ unsigned dim = 0;
+ for (mlir::Value extent : extents) {
+ mlir::Value dimIndex =
+ builder.createIntegerConstant(loc, indexType, dim++);
+ fir::runtime::genAllocatableSetBounds(builder, loc, boxAlloc, dimIndex,
+ one, extent);
+ }
+
+ if (!typeParams.empty()) {
+ // We should call AllocatableSetDerivedLength() here.
+ TODO(loc,
+ "polymorphic type with length parameters in PackArrayConversion");
+ }
+
+ fir::runtime::genAllocatableAllocate(builder, loc, boxAlloc);
+ return builder.create<fir::LoadOp>(loc, boxAlloc);
+ }
+
+ // Allocate non-polymorphic temporary on stack or in heap.
+ mlir::Value newBuffer;
+ if (useStack)
+ newBuffer =
+ builder.createTemporary(loc, tempType, bufferName, extents, typeParams);
+ else
+ newBuffer = builder.createHeapTemporary(loc, tempType, bufferName, extents,
+ typeParams);
+
+ mlir::Type ptrType = newBuffer.getType();
+ mlir::Type tempBoxType = fir::BoxType::get(mlir::isa<fir::HeapType>(ptrType)
+ ? ptrType
+ : fir::unwrapRefType(ptrType));
+ mlir::Value shape = builder.genShape(loc, extents);
+ mlir::Value newBox =
+ builder.createBox(loc, tempBoxType, newBuffer, shape, /*slice=*/nullptr,
+ typeParams, /*tdesc=*/nullptr);
+ return newBox;
+}
+
+mlir::LogicalResult
+UnpackArrayConversion::matchAndRewrite(fir::UnpackArrayOp op,
+ mlir::PatternRewriter &rewriter) const {
+ mlir::Location loc = op.getLoc();
+ fir::FirOpBuilder builder(rewriter, op.getOperation());
+ mlir::Type predicateType = builder.getI1Type();
+ mlir::Type indexType = builder.getIndexType();
+ mlir::Value tempBox = op.getTemp();
+ mlir::Value originalBox = op.getOriginal();
+
+ // For now we have to always check if the box is present.
+ auto isPresent =
+ builder.create<fir::IsPresentOp>(loc, predicateType, originalBox);
+
+ builder.genIfThen(loc, isPresent).genThen([&]() {
+ mlir::Type addrType =
+ fir::HeapType::get(fir::extractSequenceType(tempBox.getType()));
+ mlir::Value tempAddr =
+ builder.create<fir::BoxAddrOp>(loc, addrType, tempBox);
+ mlir::Value tempAddrAsIndex =
+ builder.createConvert(loc, indexType, tempAddr);
+ mlir::Value originalAddr =
+ builder.create<fir::BoxAddrOp>(loc, addrType, originalBox);
+ originalAddr = builder.createConvert(loc, indexType, originalAddr);
+
+ auto isNotSame = builder.create<mlir::arith::CmpIOp>(
+ loc, mlir::arith::CmpIPredicate::ne, tempAddrAsIndex, originalAddr);
+ builder.genIfThen(loc, isNotSame).genThen([&]() {});
+ // Copy from temporary to the original.
+ if (!op.getNoCopy())
+ fir::runtime::genShallowCopy(builder, loc, originalBox, tempBox,
+ /*resultIsAllocated=*/true);
+
+ // Deallocate, if it was allocated in heap.
+ if (!op.getStack())
+ builder.create<fir::FreeMemOp>(loc, tempAddr);
+ });
+ rewriter.eraseOp(op);
+ return mlir::success();
+}
+
+namespace {
+class LowerRepackArraysPass
+ : public fir::impl::LowerRepackArraysPassBase<LowerRepackArraysPass> {
+public:
+ using LowerRepackArraysPassBase<
+ LowerRepackArraysPass>::LowerRepackArraysPassBase;
+
+ void runOnOperation() override final {
+ auto *context = &getContext();
+ mlir::ModuleOp module = getOperation();
+ std::optional<mlir::DataLayout> dl = fir::support::getOrSetMLIRDataLayout(
+ module, /*allowDefaultLayout=*/false);
+ mlir::RewritePatternSet patterns(context);
+ patterns.insert<PackArrayConversion>(context, dl);
+ patterns.insert<UnpackArrayConversion>(context, dl);
+ mlir::GreedyRewriteConfig config;
+ config.enableRegionSimplification =
+ mlir::GreedySimplifyRegionLevel::Disabled;
+ (void)applyPatternsGreedily(module, std::move(patterns), config);
+ }
+};
+
+} // anonymous namespace
diff --git a/flang/lib/Optimizer/Passes/Pipelines.cpp b/flang/lib/Optimizer/Passes/Pipelines.cpp
index 3aea021e596f6..6ec19556625bc 100644
--- a/flang/lib/Optimizer/Passes/Pipelines.cpp
+++ b/flang/lib/Optimizer/Passes/Pipelines.cpp
@@ -198,6 +198,7 @@ void createDefaultFIROptimizerPassPipeline(mlir::PassManager &pm,
pm.addPass(fir::createPolymorphicOpConversion());
pm.addPass(fir::createAssumedRankOpConversion());
+ pm.addPass(fir::createLowerRepackArraysPass());
// Expand FIR operations that may use SCF dialect for their
// implementation. This is a mandatory pass.
pm.addPass(fir::createSimplifyFIROperations(
diff --git a/flang/test/Driver/bbc-mlir-pass-pipeline.f90 b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
index 276ef818622a1..137c19608c38f 100644
--- a/flang/test/Driver/bbc-mlir-pass-pipeline.f90
+++ b/flang/test/Driver/bbc-mlir-pass-pipeline.f90
@@ -47,6 +47,7 @@
! CHECK-NEXT: PolymorphicOpConversion
! CHECK-NEXT: AssumedRankOpConversion
+! CHECK-NEXT: LowerRepackArraysPass
! CHECK-NEXT: SimplifyFIROperations
! CHECK-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
diff --git a/flang/test/Driver/mlir-debug-pass-...
[truncated]
|
mlir::Value box = op.getArray(); | ||
llvm::SmallVector<mlir::Value> typeParams(op.getTypeparams().begin(), | ||
op.getTypeparams().end()); | ||
// TODO: set non-default lower bounds on fir.pack_array, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please do not miss this TODO :)
I have to add a shape operand for fir.pack_array
so that I can specify the non-default lower bounds as in the dummy argument declaration. I believe there is no other way to get them before hlfir.declare
. I will only allow !fir.shift
type for this operand. Let me know if that sounds wrong to you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the best would be for fir.pack_array to propagate the bounds from its input fir.box.
I guess the issue is that there is no easy tool/helper like readExtents
to extract that for you in FIR, maybe you could move and expose genLboundsAndExtentsFromBox
to FIRBuilder.cpp.
The issue I see with the fir.shift is that it is extra complexity to the operation a bit outside of the operation scope. Plus, if there is no repacking, then returning the original fir.box would be treacherous because its bounds would not be guaranteed to be the one of the fir.shift.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks! I agree that the shape operand is not necessary, and we should just inherit the lower bounds from the input box.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks great, just one question
mlir::Value box = op.getArray(); | ||
llvm::SmallVector<mlir::Value> typeParams(op.getTypeparams().begin(), | ||
op.getTypeparams().end()); | ||
// TODO: set non-default lower bounds on fir.pack_array, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think the best would be for fir.pack_array to propagate the bounds from its input fir.box.
I guess the issue is that there is no easy tool/helper like readExtents
to extract that for you in FIR, maybe you could move and expose genLboundsAndExtentsFromBox
to FIRBuilder.cpp.
The issue I see with the fir.shift is that it is extra complexity to the operation a bit outside of the operation scope. Plus, if there is no repacking, then returning the original fir.box would be treacherous because its bounds would not be guaranteed to be the one of the fir.shift.
// if (IsPresent(box) && !IsContiguous[UpTo](box[, 1])) { | ||
// isNotEmpty = box->base_addr != null; | ||
// extents = SHAPE(box); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not placing the whole repacking under if (isPresent)
and just have the fir.if return the fir.box?
This would produce clearer/simpler IR and code in my opinion (the whole non optional repacking code generation can be in its own member function).
Note that it is OK to do an alloca inside a fir.if an use its address outside of it because fir.if is is not an alloca "owner" (it does not have the AutomaticAllocationScope
trait). We are already relying on this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good point, Jean! Though I am a little bit nervous about placing the alloca under an IF :)
I've been running my testing with the heap allocations so far, and it worked pretty well. I am going to do what you suggest and test it with using the stack allocations. We'll see what pops up.
if (fir::isPolymorphicType(origBox.getType())) { | ||
// Use runtime to allocate polymorphic temporary vector using the dynamic | ||
// type of the original box and the provided numElements. | ||
// TODO: try to generalize it with BufferizeHLFIR.cpp:createArrayTemp(). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
+1
originalAddr = builder.createConvert(loc, indexType, originalAddr); | ||
|
||
auto isNotSame = builder.create<mlir::arith::CmpIOp>( | ||
loc, mlir::arith::CmpIPredicate::ne, tempAddrAsIndex, originalAddr); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A small builder helper builder.genPtrCompare(loc, mlir::arith::CmpIPredicate::ne, tempAddrAs, originalAddr);
dealing with the conversion and compare would be useful and reduce the noise in the different place this has to be done.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with Jean's comments resolved. Thanks!
Thank you for the reviews, Jean and Tom! I do not feel well the last two days. I will update it next week and ping you again. |
The code generation relies on `ShallowCopyDirect` runtime to copy data between the original and the temporary arrays (both directions). The allocations are done by the compiler generated code. The heap allocations could have been passed to `ShallowCopy` runtime, but I decided to expose the allocations so that the temporary descriptor passed to `ShallowCopyDirect` has `nocapture` - maybe this will be better for LLVM optimizations.
fcfe42c
to
24845f1
Compare
Hi Tom. Jean will be on vacation for quite a while, so we are on our own :) This PR is ready for re-review. Please take a look. |
// Return true iff for the given original boxed array we can | ||
// allocate temporary memory in stack memory. | ||
// This function is used to synchronize allocation/deallocation | ||
// implied by fir.pack_array and fir.unpack_array, because | ||
// the presence of the stack attribute does not automatically | ||
// mean that the allocation is actually done in stack memory. | ||
// For example, we always do the heap allocation for polymorphic | ||
// types using Fortran runtime. | ||
// Adding the polymorpic mold to fir.alloca and then using | ||
// Fortran runtime to compute the allocation size could probably | ||
// resolve this limitation. | ||
static bool canAllocateTempOnStack(mlir::Value box) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Doesn't need to be in this patch but I think it might come in useful to have a method on the fir.pack_array operation to query whether the allocation will really be lowered to a stack allocation (and then use that method here).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, I think putting it into the operation class is more appropriate. I can make it a static member, because I need to use it for both pack
and unpack
coversions.
Thank you for the review, Tom! I will merge this on Monday. I will be out for a couple of days and I do not want to break something and leave.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/198/builds/3297 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/17/builds/6907 Here is the relevant piece of the build log for the reference
|
The code generation relies on
ShallowCopyDirect
runtimeto copy data between the original and the temporary arrays
(both directions). The allocations are done by the compiler
generated code. The heap allocations could have been passed
to
ShallowCopy
runtime, but I decided to expose the allocationsso that the temporary descriptor passed to
ShallowCopyDirect
has
nocapture
- maybe this will be better for LLVM optimizations.