Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Implement package preprocessor2
  • Loading branch information
MichaelRFairhurst committed May 14, 2025
commit a653a58dc994b7d7ab40129e3ea1e556f7655d5a
43 changes: 43 additions & 0 deletions cpp/common/src/codingstandards/cpp/Macro.qll
Original file line number Diff line number Diff line change
@@ -19,6 +19,49 @@ class FunctionLikeMacro extends Macro {
exists(this.getBody().regexpFind("\\#?\\b" + parameter + "\\b", _, result))
)
}

/**
* Holds if the parameter is used in a way that may make it vulnerable to precedence issues.
*
* Typically, parameters are wrapped in parentheses to protect them from precedence issues, but
* that is not always possible.
*/
predicate parameterPrecedenceUnprotected(int index) {
// Check if the parameter is used in a way that requires parentheses
exists(string parameter | parameter = getParameter(index) |
// Finds any occurence of the parameter that is not preceded by, or followed by, either a
// parenthesis or the '#' token operator.
//
// Note the following cases:
// - "(x + 1)" is preceded by a parenthesis, but not followed by one, so SHOULD be matched.
// - "x # 1" is followed by "#" (though not preceded by #) and SHOULD be matched.
// - "(1 + x)" is followed by a parenthesis, but not preceded by one, so SHOULD be matched.
// - "1 # x" is preceded by "#" (though not followed by #) and SHOULD NOT be matched.
//
// So the regex is structured as follows:
// - paramMatch: Matches the parameter at a word boundary, with optional whitespace
// - notHashed: Finds parameters not used with a leading # operator.
// - The final regex finds cases of `notHashed` that are not preceded by a parenthesis,
// and cases of `notHashed` that are not followed by a parenthesis.
//
// Therefore, a parameter with parenthesis on both sides is not matched, a parameter with
// parenthesis missing on one or both sides is only matched if there is no leading or trailing
// ## operator.
exists(string noBeforeParen, string noAfterParen, string paramMatch, string notHashed |
// Not preceded by a parenthesis
noBeforeParen = "(?<!\\(\\s*)" and
// Not followed by a parenthesis
noAfterParen = "(?!\\s*\\))" and
// Parameter at word boundary in optional whitespace
paramMatch = "\\s*\\b" + parameter + "\\b\\s*" and
// A parameter is ##'d if it is preceded or followed by the # operator.
notHashed = "(?<!#)" + paramMatch and
// Parameter is used without a leading or trailing parenthesis, and without #.
getBody()
.regexpMatch(".*(" + noBeforeParen + notHashed + "|" + notHashed + noAfterParen + ").*")
)
)
}
}

newtype TMacroOperator =
2 changes: 1 addition & 1 deletion cpp/common/src/codingstandards/cpp/MatchingParenthesis.qll
Original file line number Diff line number Diff line change
@@ -61,7 +61,7 @@ module MatchingParenthesis<InputString Input> {
occurrence = prevOccurrence + 1
) else (
token = TNotParen() and
exists(inputStr.regexpFind("\\(|\\)", prevOccurrence + 1, endPos)) and
exists(inputStr.regexpFind("\\(|\\)|$", prevOccurrence + 1, endPos)) and
Copy link
Preview

Copilot AI May 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Including '$' in the regex "\\(|\\)|$" allows matching an empty string at end‐of‐input, which can lead to zero‐length matches and infinite loops. Consider matching only parentheses here and handling end‐of‐input separately.

Suggested change
exists(inputStr.regexpFind("\\(|\\)|$", prevOccurrence + 1, endPos)) and
exists(inputStr.regexpFind("\\(|\\)", prevOccurrence + 1, endPos)) and
(endPos < inputStr.length() or endPos = inputStr.length()) and

Copilot uses AI. Check for mistakes.

occurrence = prevOccurrence
)
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
//** THIS FILE IS AUTOGENERATED, DO NOT MODIFY DIRECTLY. **/
import cpp
import RuleMetadata
import codingstandards.cpp.exclusions.RuleMetadata

newtype Preprocessor2Query =
TInvalidIncludeDirectiveQuery() or
TUnparenthesizedMacroArgumentQuery() or
TDisallowedUseOfPragmaQuery()

predicate isPreprocessor2QueryMetadata(Query query, string queryId, string ruleId, string category) {
query =
// `Query` instance for the `invalidIncludeDirective` query
Preprocessor2Package::invalidIncludeDirectiveQuery() and
queryId =
// `@id` for the `invalidIncludeDirective` query
"cpp/misra/invalid-include-directive" and
ruleId = "RULE-19-2-2" and
category = "required"
or
query =
// `Query` instance for the `unparenthesizedMacroArgument` query
Preprocessor2Package::unparenthesizedMacroArgumentQuery() and
queryId =
// `@id` for the `unparenthesizedMacroArgument` query
"cpp/misra/unparenthesized-macro-argument" and
ruleId = "RULE-19-3-4" and
category = "required"
or
query =
// `Query` instance for the `disallowedUseOfPragma` query
Preprocessor2Package::disallowedUseOfPragmaQuery() and
queryId =
// `@id` for the `disallowedUseOfPragma` query
"cpp/misra/disallowed-use-of-pragma" and
ruleId = "RULE-19-6-1" and
category = "advisory"
}

module Preprocessor2Package {
Query invalidIncludeDirectiveQuery() {
//autogenerate `Query` type
result =
// `Query` type for `invalidIncludeDirective` query
TQueryCPP(TPreprocessor2PackageQuery(TInvalidIncludeDirectiveQuery()))
}

Query unparenthesizedMacroArgumentQuery() {
//autogenerate `Query` type
result =
// `Query` type for `unparenthesizedMacroArgument` query
TQueryCPP(TPreprocessor2PackageQuery(TUnparenthesizedMacroArgumentQuery()))
}

Query disallowedUseOfPragmaQuery() {
//autogenerate `Query` type
result =
// `Query` type for `disallowedUseOfPragma` query
TQueryCPP(TPreprocessor2PackageQuery(TDisallowedUseOfPragmaQuery()))
}
}
Original file line number Diff line number Diff line change
@@ -41,6 +41,7 @@ import OrderOfEvaluation
import OutOfBounds
import Pointers
import Preprocessor
import Preprocessor2
import Representation
import Scope
import SideEffects1
@@ -96,6 +97,7 @@ newtype TCPPQuery =
TOutOfBoundsPackageQuery(OutOfBoundsQuery q) or
TPointersPackageQuery(PointersQuery q) or
TPreprocessorPackageQuery(PreprocessorQuery q) or
TPreprocessor2PackageQuery(Preprocessor2Query q) or
TRepresentationPackageQuery(RepresentationQuery q) or
TScopePackageQuery(ScopeQuery q) or
TSideEffects1PackageQuery(SideEffects1Query q) or
@@ -151,6 +153,7 @@ predicate isQueryMetadata(Query query, string queryId, string ruleId, string cat
isOutOfBoundsQueryMetadata(query, queryId, ruleId, category) or
isPointersQueryMetadata(query, queryId, ruleId, category) or
isPreprocessorQueryMetadata(query, queryId, ruleId, category) or
isPreprocessor2QueryMetadata(query, queryId, ruleId, category) or
isRepresentationQueryMetadata(query, queryId, ruleId, category) or
isScopeQueryMetadata(query, queryId, ruleId, category) or
isSideEffects1QueryMetadata(query, queryId, ruleId, category) or
24 changes: 24 additions & 0 deletions cpp/misra/src/rules/RULE-19-2-2/InvalidIncludeDirective.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* @id cpp/misra/invalid-include-directive
* @name RULE-19-2-2: The #include directive shall be followed by either a <filename> or "filename" sequence
* @description Include directives shall only use the <filename> or "filename" forms.
* @kind problem
* @precision very-high
* @problem.severity error
* @tags external/misra/id/rule-19-2-2
* scope/single-translation-unit
* maintainability
* correctness
* external/misra/enforcement/decidable
* external/misra/obligation/required
*/

import cpp
import codingstandards.cpp.misra

from Include include
where
not isExcluded(include, Preprocessor2Package::invalidIncludeDirectiveQuery()) and
// Check for < followed by (not >)+ followed by >, or " followed by (not ")+ followed by ".
not include.getIncludeText().trim().regexpMatch("^(<[^>]+>|\"[^\"]+\")$")
select include, "Non-compliant #include directive text '" + include.getHead() + "'."
187 changes: 187 additions & 0 deletions cpp/misra/src/rules/RULE-19-3-4/UnparenthesizedMacroArgument.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
/**
* @id cpp/misra/unparenthesized-macro-argument
* @name RULE-19-3-4: Parentheses shall be used to ensure macro arguments are expanded appropriately
* @description Expanded macro arguments shall be enclosed in parentheses to ensure the resulting
* expressions have the expected precedence and order of operations.
* @kind problem
* @precision very-high
* @problem.severity error
* @tags external/misra/id/rule-19-3-4
* scope/single-translation-unit
* correctness
* maintainability
* external/misra/enforcement/decidable
* external/misra/obligation/required
*/

import cpp
import codingstandards.cpp.misra
import codingstandards.cpp.Macro
import codingstandards.cpp.MatchingParenthesis
import codeql.util.Boolean

/**
* This regex is used to find macro arguments that appear to have critical operators in them, before
* we do the expensive process of parsing them to look for parenthesis.
*/
pragma[noinline]
string criticalOperatorRegex() {
result =
".*(" +
concat(string op |
op in [
Copy link
Preview

Copilot AI May 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The regex built in criticalOperatorRegex concatenates a trailing '|' inside the capture group, introducing an empty alternative that matches any input and causing hasCriticalOperator to always return true. Remove the trailing '|' or build the pattern without leading/trailing separators.

Copilot uses AI. Check for mistakes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Pretty sure this is incorrect, as concat(... | v, "|") should function like join() and not add a trailing |.

"\\*=?", "/=?", "%=?", "\\+=?", "-=?", "<<?=?", ">>?=?", "==?", "!=", "&&?=?", "\\^/?",
"\\|\\|?=?", "\\?"
]
|
op, "|"
) + ").*"
}

/**
* Whether a string appears to contain a critical operator.
*/
bindingset[input]
predicate hasCriticalOperator(string input) { input.regexpMatch(criticalOperatorRegex()) }

/**
* A critical operator is an operator with "level" between 13 and 2, according to the MISRA C++
* standard. This includes from the "multiplicative" level (13) to the "conditional" level (2).
*/
class CriticalOperatorExpr extends Expr {
string operator;

CriticalOperatorExpr() {
operator = this.(BinaryOperation).getOperator()
or
this instanceof ConditionalExpr and operator = "?"
or
operator = this.(Assignment).getOperator()
}

string getOperator() { result = operator }
}

/**
* An invocation of a macro that has a parameter that is not precedence-protected with parentheses,
* and that produces a critical operator expression.
*
* This class is used in two passes. Firstly, with `hasRiskyParameter`, to find the macro paramaters
* that should be parsed for parenthesis. Secondly, with `hasNonCompliantParameter`, to parse the
* risky parameters and attempt to match the produced AST to an unparenthesized occurence of that
* operator in the argument text.
*
* For a given macro invocation to be considered risky, it must
* - The macro must have a parameter that is not precedence-protected with parentheses.
* - The macro must produce a critical operator expression.
* - The macro must produce only expressions, statements, or variable declarations with initializers.
*
* For a risky macro to be non-compliant, it must hold for some values of the predicate
* `hasNonCompliantParameter`.
*/
class RiskyMacroInvocation extends MacroInvocation {
FunctionLikeMacro macro;
string riskyParamName;
int riskyParamIdx;

RiskyMacroInvocation() {
macro = getMacro() and
// The parameter is not precedence-protected with parentheses in the macro body.
macro.parameterPrecedenceUnprotected(riskyParamIdx) and
riskyParamName = macro.getParameter(riskyParamIdx) and
// This macro invocation produces a critical operator expression.
getAGeneratedElement() instanceof CriticalOperatorExpr and
// It seems to generate an expression, statement, or variable declaration with initializer.
forex(Element e | e = getAGeneratedElement() |
e instanceof Expr
or
e instanceof Stmt
or
e.(Variable).getInitializer().getExpr() = getAGeneratedElement()
or
e.(VariableDeclarationEntry).getDeclaration().getInitializer().getExpr() =
getAGeneratedElement()
)
}

/**
* A stage 1 pass used to find macro parameters that are not precedence-protected, and have a
* critical operator in them, and therefore need to be parsed to check for parenthesis at the
* macro call-site, which is expensive.
*/
predicate hasRiskyParameter(string name, int index, string value) {
name = riskyParamName and
index = riskyParamIdx and
value = getExpandedArgument(riskyParamIdx) and
hasCriticalOperator(value)
}

/**
* A stage 2 pass that occurs after risky parameters have been parsed, to check for parenthesis at the macro
* call-site.
*
* For a given macro argument to be flagged, it must:
* - be risky as determined by the characteristic predicate (produce a critical operator and only
* expressions, statements, etc).
* - be flagged by stage 1 as a risky parameter (i.e. it must have a critical operator in it and
* correspond to a macro parameter that is not precedence-protected with parentheses)
* - there must be a top-level text node that contains the operator in the argument string
* - the operator cannot be the first character in the string (i.e. it should not look like a
* unary - or +)
* - the operator cannot exist inside a generated string literal
* - the operator existence of the operator should not be as a substring of "->", "++", or "--"
* operators.
*
* The results of this predicate should be flagged by the query.
*/
predicate hasNonCompliantParameter(string name, int index, string value, string operator) {
hasRiskyParameter(name, index, value) and
exists(
ParsedRoot parsedRoot, ParsedText topLevelText, string text, CriticalOperatorExpr opExpr,
int opIndex
|
parsedRoot.getInputString() = value and
(topLevelText.getParent() = parsedRoot or topLevelText = parsedRoot) and
text = topLevelText.getText().trim() and
opExpr = getAGeneratedElement() and
operator = opExpr.getOperator() and
opIndex = text.indexOf(operator) and
// Ignore "->", "++", and "--" operators.
not [text.substring(opIndex - 1, opIndex + 1), text.substring(opIndex, opIndex + 2)] =
["--", "++", "->"] and
// Ignore operators inside string literals.
not exists(Literal l |
l = getAGeneratedElement() and
exists(l.getValue().indexOf(operator))
) and
// A leading operator is probably unary and not a problem.
(opIndex > 0 or topLevelText.getChildIdx() > 0)
)
}
}

/**
* A string class that is used to determine what macro arguments will be parsed.
*
* This should be a reasonably small set of strings, as parsing is expensive.
*/
class RiskyMacroArgString extends string {
RiskyMacroArgString() { any(RiskyMacroInvocation mi).hasRiskyParameter(_, _, this) }
}

// Import `ParsedRoot` etc for the parsed macro arguments.
import MatchingParenthesis<RiskyMacroArgString>

from
RiskyMacroInvocation mi, FunctionLikeMacro m, string paramName, string criticalOperator,
int paramIndex, string argumentString
where
not isExcluded([m.(Element), mi.(Element)],
Preprocessor2Package::unparenthesizedMacroArgumentQuery()) and
mi.getMacro() = m and
mi.hasNonCompliantParameter(paramName, paramIndex, argumentString, criticalOperator)
select mi,
"Macro argument " + paramIndex + " (with expanded value '" + argumentString + "') contains a" +
" critical operator '" + criticalOperator +
"' that is not parenthesized, but macro $@ argument '" + paramName +
"' is not precedence-protected with parenthesis.", m, m.getName()
32 changes: 32 additions & 0 deletions cpp/misra/src/rules/RULE-19-6-1/DisallowedUseOfPragma.ql
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
/**
* @id cpp/misra/disallowed-use-of-pragma
* @name RULE-19-6-1: The #pragma directive and the _Pragma operator should not be used
* @description Preprocessor pragma directives are implementation-defined, and should not be used to
* maintain code portability.
* @kind problem
* @precision very-high
* @problem.severity error
* @tags external/misra/id/rule-19-6-1
* scope/single-translation-unit
* maintainability
* external/misra/enforcement/decidable
* external/misra/obligation/advisory
*/

import cpp
import codingstandards.cpp.misra

from PreprocessorDirective pragma, string kind
where
not isExcluded(pragma, Preprocessor2Package::disallowedUseOfPragmaQuery()) and
(
pragma instanceof PreprocessorPragma and
kind = "#pragma directive '" + pragma.getHead() + "'"
or
exists(string headOrBody, string pragmaOperand |
headOrBody = [pragma.getHead(), pragma.(Macro).getBody()] and
pragmaOperand = headOrBody.regexpCapture(".*\\b(_Pragma\\b\\s*\\([^\\)]+\\)).*", 1) and
kind = "_Pragma operator used: '" + pragmaOperand + "'"
)
)
select pragma, "Non-compliant " + kind + "."
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
| test.cpp:6:1:6:20 | #include STRING_PATH | Non-compliant #include directive text 'STRING_PATH'. |
| test.cpp:10:1:10:16 | #include QSTRING | Non-compliant #include directive text 'QSTRING'. |
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
rules/RULE-19-2-2/InvalidIncludeDirective.ql
Loading
Oops, something went wrong.