Skip to content

C++: Make asExpr return AggregateLiterals #227

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1357,6 +1357,8 @@ predicate nodeIsHidden(Node n) {
n instanceof InitialGlobalValue
or
n instanceof SsaSynthNode
or
n instanceof AggregateNode
}

predicate neverSkipInPathGraph(Node n) {
35 changes: 35 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll
Original file line number Diff line number Diff line change
@@ -73,6 +73,7 @@ private newtype TIRDataFlowNode =
indirectionIndex = [0 .. Ssa::getMaxIndirectionsForType(p.getUnspecifiedType()) - 1] and
not any(InitializeParameterInstruction init).getParameter() = p
} or
TAggregateNode(Ssa::AggregateUse use) or
TFlowSummaryNode(FlowSummaryImpl::Private::SummaryNode sn)

/**
@@ -868,6 +869,40 @@ class BodyLessParameterNodeImpl extends Node, TBodyLessParameterNodeImpl {
}
}

/**
* INTERNAL: do not use.
*
* A node representing a use of an `AggregateLiteral` after it has been
* initialized. This node exists to ensure that
* ```
* node.asExpr() instanceof AggregateLiteral
* ```
* has a result.
*/
class AggregateNode extends Node, TAggregateNode {
Ssa::AggregateUse use;

AggregateNode() { this = TAggregateNode(use) }

override DataFlowCallable getEnclosingCallable() {
result.asSourceCallable() = this.getFunction()
}

override Declaration getFunction() { result = use.getBlock().getEnclosingFunction() }

override DataFlowType getType() { result = use.getSourceVariable().getType() }

final override Location getLocationImpl() { result = use.getLocation() }

final override string toStringImpl() { result = use.toString() }

/** Gets the `AggregateUse` corresponding to this node. */
Ssa::AggregateUse getUse() { result = use }

/** Gets the `AggregateLiteral` that has just been initialized. */
AggregateLiteral getAggregateLiteral() { result = use.getAggregateLiteral() }
}

/**
* A data-flow node used to model flow summaries. That is, a dataflow node
* that is synthesized to represent a parameter, return value, or other part
14 changes: 13 additions & 1 deletion cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/ExprNodes.qll
Original file line number Diff line number Diff line change
@@ -264,6 +264,11 @@ private module Cached {
e = getConvertedResultExpression(node.asInstruction(), n)
}

private predicate exprNodeShouldBeAggregateNode(AggregateNode node, Expr e, int n) {
node.getAggregateLiteral() = e and
n = 0
}

/** Holds if `node` should be an `IndirectInstruction` that maps `node.asIndirectExpr()` to `e`. */
private predicate indirectExprNodeShouldBeIndirectInstruction(
IndirectInstruction node, Expr e, int n, int indirectionIndex
@@ -294,7 +299,8 @@ private module Cached {
exprNodeShouldBeInstruction(_, e, n) or
exprNodeShouldBeOperand(_, e, n) or
exprNodeShouldBeIndirectOutNode(_, e, n) or
exprNodeShouldBeIndirectOperand(_, e, n)
exprNodeShouldBeIndirectOperand(_, e, n) or
exprNodeShouldBeAggregateNode(_, e, n)
}

private class InstructionExprNode extends ExprNodeBase, InstructionNode {
@@ -442,6 +448,12 @@ private module Cached {
final override Expr getConvertedExpr(int n) { exprNodeShouldBeIndirectOperand(this, result, n) }
}

private class AggregateExprNode extends ExprNodeBase instanceof AggregateNode {
AggregateExprNode() { exprNodeShouldBeAggregateNode(this, _, _) }

final override Expr getConvertedExpr(int n) { exprNodeShouldBeAggregateNode(this, result, n) }
}

/**
* An expression, viewed as a node in a data flow graph.
*/
108 changes: 108 additions & 0 deletions cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll
Original file line number Diff line number Diff line change
@@ -115,6 +115,42 @@ private newtype TDefImpl =
isGlobalDefImpl(v, f, _, indirectionIndex)
}

/**
* Holds if `chi` is the `ChiInstruction` generated from the initialization
* of the `elementIndex`'th field/element of `aggr`. Furthermore, the
* initialization has position `position`.
*/
private predicate aggregateLiteralInitialization(
ChiInstruction chi, Cpp::AggregateLiteral aggr, int elementIndex, int position
) {
exists(Cpp::Field f |
chi.getPartial().getAst() = aggr.(Cpp::ClassAggregateLiteral).getFieldExpr(f, position) and
f.getInitializationOrder() = elementIndex
)
or
chi.getPartial().getAst() =
aggr.(Cpp::ArrayAggregateLiteral).getElementExpr(elementIndex, position)
}

/**
* Gets the initialization order of `chi` when initializing `aggr`.
*
* That is, if `chi` is the `ChiInstruction` generated from initializing the
* `i`'th field then `i = getAggregateLiteralInitializationRank(chi, aggr)`
* holds.
*
* Note that initialization orders may be surprising when a field or element
* is initialized repeatedly.
*/
private int getAggregateLiteralInitializationRank(ChiInstruction chi, Cpp::AggregateLiteral aggr) {
chi =
rank[result + 1](ChiInstruction cand, int elementIndex, int position |
aggregateLiteralInitialization(cand, aggr, elementIndex, position)
|
cand order by elementIndex, position
)
}

cached
private newtype TUseImpl =
TDirectUseImpl(Operand operand, int indirectionIndex) {
@@ -126,6 +162,16 @@ private newtype TUseImpl =
// the assignment to a global variable isn't ruled out as dead.
isGlobalUse(v, f, _, indirectionIndex)
} or
TAggregateUse(Cpp::AggregateLiteral aggr) {
// There is no single instruction that represents a fully initialized
// aggregate. So we work around this by creating a dataflow ndoe to
// represent this, and we do that by adding a single use of the associated
// variable after the last initialization.
exists(int i |
i = getAggregateLiteralInitializationRank(_, aggr) and
not i + 1 = getAggregateLiteralInitializationRank(_, aggr)
)
} or
TFinalParameterUse(Parameter p, int indirectionIndex) {
underlyingTypeIsModifiableAt(p.getUnderlyingType(), indirectionIndex) and
// Only create an SSA read for the final use of a parameter if there's
@@ -619,6 +665,68 @@ class GlobalUse extends UseImpl, TGlobalUse {
override predicate isCertain() { any() }
}

/**
* A use that works around an IR issue where no instruction computes the
* expression that is an `AggregateLiteral`.
*
* That is there is no instruction `i` for which
* ```
* i.getUnconvertedResultExpression() instanceof AggregateLiteral
* ```
* holds. Instead, the IR represents the initialization of an aggregate literal
* as a sequence of `StoreInstruction`s.
*
* To work around this we add a use after the final `ChiInstruction` in the
* initialization sequence and implement `getNode()` to return a node for which
* we map `node.asExpr()` to be the aggregate literal.
*/
class AggregateUse extends UseImpl, TAggregateUse {
Cpp::AggregateLiteral aggr;

AggregateUse() { this = TAggregateUse(aggr) and indirectionIndex = 1 }

final override AggregateNode getNode() { result.getUse() = this }

/** Gets the aggregate literal that has just been initialized. */
final Cpp::AggregateLiteral getAggregateLiteral() { result = aggr }

final override string toString() { result = aggr.toString() }

private ChiInstruction getChiInstruction() {
exists(int i |
i = getAggregateLiteralInitializationRank(result, aggr) and
not i + 1 = getAggregateLiteralInitializationRank(_, aggr)
)
}

final override predicate hasIndexInBlock(IRBlock block, int index) {
block.getInstruction(index) = this.getChiInstruction()
}

final override Cpp::Location getLocation() { result = aggr.getLocation() }

final override int getIndirection() { result = 1 }

final override BaseIRVariable getBaseSourceVariable() {
exists(Instruction dest, IRVariable v |
dest = this.getChiInstruction().getPartial().(StoreInstruction).getDestinationAddress() and
v = result.getIRVariable()
|
// A class aggregate literal
v =
dest.(FieldAddressInstruction)
.getObjectAddress()
.(VariableAddressInstruction)
.getIRVariable()
or
// An array aggregate literal
v = dest.(PointerAddInstruction).getLeft().(VariableAddressInstruction).getIRVariable()
)
}

final override predicate isCertain() { any() }
}

/**
* A definition that models a synthetic "initial definition" of a global
* variable just after the function entry point.
17 changes: 17 additions & 0 deletions cpp/ql/test/library-tests/dataflow/asExpr/test.cpp
Original file line number Diff line number Diff line change
@@ -21,3 +21,20 @@ A& get_ref();
void test2() {
take_ref(get_ref()); // $ asExpr="call to get_ref" asIndirectExpr="call to get_ref"
}

struct S {
int a;
int b;
};

void test_aggregate_literal() {
S s1 = {1, 2}; // $ asExpr=1 asExpr=2 asExpr={...}
const S s2 = {3, 4}; // $ asExpr=3 asExpr=4 asExpr={...}
S s3 = (S){5, 6}; // $ asExpr=5 asExpr=6 asExpr={...}
const S s4 = (S){7, 8}; // $ asExpr=7 asExpr=8 asExpr={...}

S s5 = {.a = 1, .b = 2}; // $ asExpr=1 asExpr=2 asExpr={...}

int xs[] = {1, 2, 3}; // $ asExpr=1 asExpr=2 asExpr=3 asExpr={...}
const int ys[] = {[0] = 4, [1] = 5, [0] = 6}; // $ asExpr=4 asExpr=5 asExpr=6 asExpr={...}
}
Original file line number Diff line number Diff line change
@@ -15,9 +15,9 @@
| example.c:17:11:17:16 | *definition of coords | example.c:17:11:17:16 | *definition of coords |
| example.c:17:11:17:16 | *definition of coords | example.c:17:11:17:16 | *definition of coords |
| example.c:17:11:17:16 | *definition of coords | example.c:17:11:17:16 | *definition of coords |
| example.c:17:11:17:16 | *definition of coords | example.c:17:11:17:16 | *definition of coords |
| example.c:17:11:17:16 | *definition of coords | example.c:17:19:17:22 | {...} |
| example.c:17:11:17:16 | *definition of coords | example.c:24:13:24:18 | *coords |
| example.c:17:11:17:16 | *definition of coords [post update] | example.c:17:11:17:16 | *definition of coords |
| example.c:17:11:17:16 | *definition of coords [post update] | example.c:17:19:17:22 | {...} |
| example.c:17:11:17:16 | *definition of coords [post update] | example.c:24:13:24:18 | *coords |
| example.c:17:11:17:16 | definition of coords | example.c:17:11:17:16 | *definition of coords |
| example.c:17:11:17:16 | definition of coords | example.c:17:11:17:16 | definition of coords |
@@ -27,6 +27,7 @@
| example.c:17:11:17:16 | definition of coords | example.c:24:13:24:18 | coords |
| example.c:17:11:17:16 | definition of coords [post update] | example.c:17:11:17:16 | definition of coords |
| example.c:17:11:17:16 | definition of coords [post update] | example.c:24:13:24:18 | coords |
| example.c:17:19:17:22 | {...} | example.c:17:11:17:16 | *definition of coords |
| example.c:17:19:17:22 | {...} | example.c:17:19:17:22 | {...} |
| example.c:17:21:17:21 | 0 | example.c:17:21:17:21 | 0 |
| example.c:19:6:19:6 | *b | example.c:15:37:15:37 | *b |
Original file line number Diff line number Diff line change
@@ -36,6 +36,7 @@ irTypeBugs
| ../../../include/iterator.h:31:16:31:25 | ../../../include/iterator.h:31:16:31:25 | ../../../include/iterator.h:31:16:31:25 | [summary] read: Argument[this].Element[*] in operator-> |
| ../../../include/iterator.h:31:16:31:25 | ../../../include/iterator.h:31:16:31:25 | ../../../include/iterator.h:31:16:31:25 | [summary] to write: ReturnValue[**] in operator-> |
| ../../../include/iterator.h:31:16:31:25 | ../../../include/iterator.h:31:16:31:25 | ../../../include/iterator.h:31:16:31:25 | [summary] to write: ReturnValue[*] in operator-> |
| clang.cpp:40:30:40:51 | clang.cpp:40:30:40:51 | clang.cpp:40:30:40:51 | {...} |
incorrectBaseType
| clang.cpp:22:8:22:20 | *& ... | Expected 'Node.getType()' to be int, but it was int * |
| clang.cpp:23:17:23:29 | *& ... | Expected 'Node.getType()' to be int, but it was int * |