Skip to content

C++: Improve alias analysis for indirections #1736

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions config/identical-files.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/PrintIR.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/PrintIR.qll"
],
"IR SSASanity": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/SSASanity.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/aliased_ssa/internal/SSASanity.qll"
],
"C++ IR InstructionImports": [
"cpp/ql/src/semmle/code/cpp/ir/implementation/raw/internal/InstructionImports.qll",
"cpp/ql/src/semmle/code/cpp/ir/implementation/unaliased_ssa/internal/InstructionImports.qll",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,27 +16,35 @@ IRUserVariable getIRUserVariable(Language::Function func, Language::Variable var
* be a user-declared variable (`IRUserVariable`) or a temporary variable
* generated by the AST-to-IR translation (`IRTempVariable`).
*/
abstract class IRVariable extends TIRVariable {
class IRVariable extends TIRVariable {
Language::Function func;

abstract string toString();
string toString() {
none()
}

/**
* Gets the type of the variable.
*/
abstract Language::Type getType();
Language::Type getType() {
none()
}

/**
* Gets the AST node that declared this variable, or that introduced this
* variable as part of the AST-to-IR translation.
*/
abstract Language::AST getAST();
Language::AST getAST() {
none()
}

/**
* Gets an identifier string for the variable. This identifier is unique
* within the function.
*/
abstract string getUniqueId();
string getUniqueId() {
none()
}

/**
* Gets the source location of this variable.
Expand Down Expand Up @@ -100,10 +108,14 @@ class IRUserVariable extends IRVariable, TIRUserVariable {
* stack. This includes all parameters, non-static local variables, and
* temporary variables.
*/
abstract class IRAutomaticVariable extends IRVariable {
class IRAutomaticVariable extends IRVariable {
IRAutomaticVariable() {
this instanceof IRAutomaticUserVariable or
this instanceof IRTempVariable
}
}

class IRAutomaticUserVariable extends IRUserVariable, IRAutomaticVariable {
class IRAutomaticUserVariable extends IRUserVariable {
override Language::AutomaticVariable var;

IRAutomaticUserVariable() {
Expand Down Expand Up @@ -132,7 +144,7 @@ IRTempVariable getIRTempVariable(Language::AST ast, TempVariableTag tag) {
result.getTag() = tag
}

class IRTempVariable extends IRVariable, IRAutomaticVariable, TIRTempVariable {
class IRTempVariable extends IRVariable, TIRTempVariable {
Language::AST ast;
TempVariableTag tag;
Language::Type type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,20 @@ module InstructionSanity {
/**
* Holds if instruction `instr` has multiple operands with tag `tag`.
*/
query predicate duplicateOperand(Instruction instr, OperandTag tag) {
strictcount(NonPhiOperand operand |
operand = instr.getAnOperand() and
operand.getOperandTag() = tag
) > 1 and
not tag instanceof UnmodeledUseOperandTag
query predicate duplicateOperand(Instruction instr, string message, IRFunction func,
string funcText) {
exists(OperandTag tag, int operandCount |
operandCount = strictcount(NonPhiOperand operand |
operand = instr.getAnOperand() and
operand.getOperandTag() = tag
) and
operandCount > 1 and
not tag instanceof UnmodeledUseOperandTag and
message = "Instruction has " + operandCount + " operands with tag '" + tag.toString() + "'" +
" in function '$@'." and
func = instr.getEnclosingIRFunction() and
funcText = Language::getIdentityString(func.getFunction())
)
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,34 +2,10 @@ private import AliasAnalysisInternal
private import cpp
private import InputIR
private import semmle.code.cpp.ir.internal.IntegerConstant as Ints

private import semmle.code.cpp.models.interfaces.Alias

private class IntValue = Ints::IntValue;

/**
* Converts the bit count in `bits` to a byte count and a bit count in the form
* bytes:bits.
*/
bindingset[bits]
string bitsToBytesAndBits(int bits) {
result = (bits / 8).toString() + ":" + (bits % 8).toString()
}

/**
* Gets a printable string for a bit offset with possibly unknown value.
*/
bindingset[bitOffset]
string getBitOffsetString(IntValue bitOffset) {
if Ints::hasValue(bitOffset) then
if bitOffset >= 0 then
result = "+" + bitsToBytesAndBits(bitOffset)
else
result = "-" + bitsToBytesAndBits(Ints::neg(bitOffset))
else
result = "+?"
}

/**
* Gets the offset of field `field` in bits.
*/
Expand Down Expand Up @@ -142,7 +118,11 @@ private predicate operandIsPropagated(Operand operand, IntValue bitOffset) {
) or
// Adding an integer to or subtracting an integer from a pointer propagates
// the address with an offset.
bitOffset = getPointerBitOffset(instr.(PointerOffsetInstruction)) or
exists(PointerOffsetInstruction ptrOffset |
ptrOffset = instr and
operand = ptrOffset.getLeftOperand() and
bitOffset = getPointerBitOffset(ptrOffset)
) or
// Computing a field address from a pointer propagates the address plus the
// offset of the field.
bitOffset = getFieldBitOffset(instr.(FieldAddressInstruction).getField()) or
Expand Down Expand Up @@ -278,52 +258,70 @@ private predicate resultEscapesNonReturn(Instruction instr) {
}

/**
* Holds if the address of the specified local variable or parameter escapes the
* domain of the analysis.
* Holds if the address of `allocation` escapes outside the domain of the analysis. This can occur
* either because the allocation's address is taken within the function and escapes, or because the
* allocation is marked as always escaping via `alwaysEscapes()`.
*/
predicate allocationEscapes(Configuration::Allocation allocation) {
allocation.alwaysEscapes() or
resultEscapesNonReturn(allocation.getABaseInstruction())
}

/**
* Equivalent to `operandIsPropagated()`, but includes interprocedural propagation.
*/
private predicate automaticVariableAddressEscapes(IRAutomaticVariable var) {
// The variable's address escapes if the result of any
// VariableAddressInstruction that computes the variable's address escapes.
exists(VariableAddressInstruction instr |
instr.getVariable() = var and
resultEscapesNonReturn(instr)
private predicate operandIsPropagatedIncludingByCall(Operand operand, IntValue bitOffset) {
operandIsPropagated(operand, bitOffset) or
exists(CallInstruction call, Instruction init |
isArgumentForParameter(call, operand, init) and
resultReturned(init, bitOffset)
)
}

/**
* Holds if the address of the specified variable escapes the domain of the
* analysis.
* Holds if `addrOperand` is at offset `bitOffset` from the value of instruction `base`. The offset
* may be `unknown()`.
*/
predicate variableAddressEscapes(IRVariable var) {
automaticVariableAddressEscapes(var.(IRAutomaticVariable)) or
// All variables with static storage duration have their address escape.
not var instanceof IRAutomaticVariable
private predicate hasBaseAndOffset(AddressOperand addrOperand, Instruction base,
IntValue bitOffset) {
base = addrOperand.getDef() and bitOffset = 0 or // Base case
exists(Instruction middle, int previousBitOffset, Operand middleOperand,
IntValue additionalBitOffset |
// We already have an offset from `middle`.
hasBaseAndOffset(addrOperand, middle, previousBitOffset) and
// `middle` is propagated from `base`.
middleOperand = middle.getAnOperand() and
operandIsPropagatedIncludingByCall(middleOperand, additionalBitOffset) and
base = middleOperand.getDef() and
bitOffset = Ints::add(previousBitOffset, additionalBitOffset)
)
}

/**
* Holds if the result of instruction `instr` points within variable `var`, at
* bit offset `bitOffset` within the variable. If the result points within
* `var`, but at an unknown or non-constant offset, then `bitOffset` is unknown.
* Holds if `addrOperand` is at constant offset `bitOffset` from the value of instruction `base`.
* Only holds for the `base` with the longest chain of propagation to `addrOperand`.
*/
predicate resultPointsTo(Instruction instr, IRVariable var, IntValue bitOffset) {
(
// The address of a variable points to that variable, at offset 0.
instr.(VariableAddressInstruction).getVariable() = var and
bitOffset = 0
) or
exists(Operand operand, IntValue originalBitOffset, IntValue propagatedBitOffset |
operand = instr.getAnOperand() and
// If an operand is propagated, then the result points to the same variable,
// offset by the bit offset from the propagation.
resultPointsTo(operand.getAnyDef(), var, originalBitOffset) and
(
operandIsPropagated(operand, propagatedBitOffset)
or
exists(CallInstruction ci, Instruction init |
isArgumentForParameter(ci, operand, init) and
resultReturned(init, propagatedBitOffset)
)
) and
bitOffset = Ints::add(originalBitOffset, propagatedBitOffset)
predicate addressOperandBaseAndConstantOffset(AddressOperand addrOperand, Instruction base,
int bitOffset) {
hasBaseAndOffset(addrOperand, base, bitOffset) and
Ints::hasValue(bitOffset) and
not exists(Instruction previousBase, int previousBitOffset |
hasBaseAndOffset(addrOperand, previousBase, previousBitOffset) and
previousBase = base.getAnOperand().getDef() and
Ints::hasValue(previousBitOffset)
)
}

/**
* Gets the allocation into which `addrOperand` points, if known.
*/
Configuration::Allocation getAddressOperandAllocation(AddressOperand addrOperand) {
exists(Instruction base |
result.getABaseInstruction() = base and
hasBaseAndOffset(addrOperand, base, _) and
not exists(Instruction previousBase |
hasBaseAndOffset(addrOperand, previousBase, _) and
previousBase = base.getAnOperand().getDef()
)
)
}
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
import semmle.code.cpp.ir.implementation.unaliased_ssa.IR as InputIR
import AliasConfiguration as Configuration
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
private import cpp
private import semmle.code.cpp.ir.implementation.unaliased_ssa.IR
private import semmle.code.cpp.ir.implementation.unaliased_ssa.gvn.ValueNumbering
private import AliasAnalysis

/**
* A memory allocation that can be tracked by the AliasedSSA alias analysis.
* For now, we track all variables accessed within the function, including both local variables
* and global variables. In the future, we will track indirect parameters as well.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about modeled allocators or default new?

*/
class Allocation extends ValueNumber {
IRVariable var;

Allocation() {
// For now, we only track variables.
var = this.getAnInstruction().(VariableAddressInstruction).getVariable()
}

final string getAllocationString() {
exists(string suffix |
result = var.toString() + suffix and
if isUnaliased() then
suffix = ""
else
suffix = "*"
)
}

final Type getType() {
result = var.getType()
}

final int getBitSize() {
result = getType().getSize() * 8
}

final predicate alwaysEscapes() {
// An automatic variable only escapes if its address is taken and escapes, but we assume that
// any other kind of variable always escapes.
not var instanceof IRAutomaticVariable
}

final predicate isUnaliased() {
not allocationEscapes(this)
}

final Instruction getABaseInstruction() {
// Any instruction with this value number serves as a base address for this allocation.
result = getAnInstruction()
}
}
Loading