Skip to content

Actions: Improve Bash parsing performance on command and string interpolations #19701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 98 additions & 44 deletions actions/ql/lib/codeql/actions/Bash.qll
Original file line number Diff line number Diff line change
Expand Up @@ -8,35 +8,64 @@ class BashShellScript extends ShellScript {
)
}

private string lineProducer(int i) {
result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", i)
}

private predicate cmdSubstitutionReplacement(string cmdSubs, string id, int k) {
exists(string line | line = this.lineProducer(k) |
exists(int i, int j |
cmdSubs =
// $() cmd substitution
line.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", i, j)
.regexpReplaceAll("^\\$\\(", "")
.regexpReplaceAll("\\)$", "") and
id = "cmdsubs:" + k + ":" + i + ":" + j
)
or
exists(int i, int j |
// `...` cmd substitution
cmdSubs =
line.regexpFind("\\`[^\\`]+\\`", i, j)
.regexpReplaceAll("^\\`", "")
.regexpReplaceAll("\\`$", "") and
id = "cmd:" + k + ":" + i + ":" + j
)
/**
* Gets the line at 0-based index `lineIndex` within this shell script,
* assuming newlines as separators.
*/
private string lineProducer(int lineIndex) {
result = this.getRawScript().regexpReplaceAll("\\\\\\s*\n", "").splitAt("\n", lineIndex)
}

private predicate cmdSubstitutionReplacement(string command, string id, int lineIndex) {
this.commandInSubstitution(lineIndex, command, id)
or
this.commandInBackticks(lineIndex, command, id)
}

/**
* Holds if there is a command substitution `$(command)` in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this command.
*/
private predicate commandInSubstitution(int lineIndex, string command, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
command =
// Look for the command inside a $(...) command substitution
this.lineProducer(lineIndex)
.regexpFind("\\$\\((?:[^()]+|\\((?:[^()]+|\\([^()]*\\))*\\))*\\)", occurrenceIndex,
occurrenceOffset)
// trim starting $( - TODO do this in first regex
.regexpReplaceAll("^\\$\\(", "")
// trim ending ) - TODO do this in first regex
.regexpReplaceAll("\\)$", "") and
id = "cmdsubs:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
)
}

private predicate rankedCmdSubstitutionReplacements(int i, string old, string new) {
old = rank[i](string old2 | this.cmdSubstitutionReplacement(old2, _, _) | old2) and
this.cmdSubstitutionReplacement(old, new, _)
/**
* Holds if `command` is a command in backticks `` `...` `` in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this command.
*/
private predicate commandInBackticks(int lineIndex, string command, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
command =
this.lineProducer(lineIndex)
.regexpFind("\\`[^\\`]+\\`", occurrenceIndex, occurrenceOffset)
// trim leading backtick - TODO do this in first regex
.regexpReplaceAll("^\\`", "")
// trim trailing backtick - TODO do this in first regex
.regexpReplaceAll("\\`$", "") and
id = "cmd:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset
)
}

private predicate rankedCmdSubstitutionReplacements(int i, string command, string commandId) {
// rank commands by their unique IDs
commandId = rank[i](string c, string id | this.cmdSubstitutionReplacement(c, id, _) | id) and
// since we cannot output (command, ID) tuples from the rank operation,
// we need to work out the specific command associated with the resulting ID
this.cmdSubstitutionReplacement(command, commandId, _)
}

private predicate doReplaceCmdSubstitutions(int line, int round, string old, string new) {
Expand Down Expand Up @@ -64,31 +93,56 @@ class BashShellScript extends ShellScript {
this.cmdSubstitutionReplacement(result, _, i)
}

/**
* Holds if `quotedStr` is a string in double quotes in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this quoted string.
*/
private predicate doubleQuotedString(int lineIndex, string quotedStr, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
// double quoted string
quotedStr =
this.cmdSubstitutedLineProducer(lineIndex)
.regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", occurrenceIndex, occurrenceOffset) and
id =
"qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
}

/**
* Holds if `quotedStr` is a string in single quotes in
* the line at `lineIndex` in the shell script,
* and `id` is a unique identifier for this quoted string.
*/
private predicate singleQuotedString(int lineIndex, string quotedStr, string id) {
exists(int occurrenceIndex, int occurrenceOffset |
// single quoted string
quotedStr =
this.cmdSubstitutedLineProducer(lineIndex)
.regexpFind("'((?:\\\\.|[^'\\\\])*)'", occurrenceIndex, occurrenceOffset) and
id =
"qstr:" + lineIndex + ":" + occurrenceIndex + ":" + occurrenceOffset + ":" +
quotedStr.length() + ":" + quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
}

private predicate quotedStringReplacement(string quotedStr, string id) {
exists(string line, int k | line = this.cmdSubstitutedLineProducer(k) |
exists(int i, int j |
// double quoted string
quotedStr = line.regexpFind("\"((?:[^\"\\\\]|\\\\.)*)\"", i, j) and
id =
"qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" +
quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
exists(int lineIndex |
this.doubleQuotedString(lineIndex, quotedStr, id)
or
exists(int i, int j |
// single quoted string
quotedStr = line.regexpFind("'((?:\\\\.|[^'\\\\])*)'", i, j) and
id =
"qstr:" + k + ":" + i + ":" + j + ":" + quotedStr.length() + ":" +
quotedStr.regexpReplaceAll("[^a-zA-Z0-9]", "")
)
this.singleQuotedString(lineIndex, quotedStr, id)
) and
// Only do this for strings that might otherwise disrupt subsequent parsing
quotedStr.regexpMatch("[\"'].*[$\n\r'\"" + Bash::separator() + "].*[\"']")
}

private predicate rankedQuotedStringReplacements(int i, string old, string new) {
old = rank[i](string old2 | this.quotedStringReplacement(old2, _) | old2) and
this.quotedStringReplacement(old, new)
private predicate rankedQuotedStringReplacements(int i, string quotedString, string quotedStringId) {
// rank quoted strings by their nearly-unique IDs
quotedStringId = rank[i](string s, string id | this.quotedStringReplacement(s, id) | id) and
// since we cannot output (string, ID) tuples from the rank operation,
// we need to work out the specific string associated with the resulting ID
this.quotedStringReplacement(quotedString, quotedStringId)
}

private predicate doReplaceQuotedStrings(int line, int round, string old, string new) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
name: Workflow with complex interpolation
on:
workflow_dispatch:
inputs:
choice-a:
required: true
type: choice
description: choice-a
default: a1
options:
- a1
- a2
- a3
string-b:
required: false
type: string
description: string-b
string-c:
required: false
type: string
description: string-c
list-d:
required: true
type: string
default: d1 d2
description: list-d whitespace separated
list-e:
required: false
type: string
description: list-e whitespace separated
choice-f:
required: true
type: choice
description: choice-f
options:
- false
- true

env:
DRY_TEST: false
B: ${{ github.event.inputs.string-b }}

jobs:
job:
runs-on: ubuntu-latest
steps:
- name: Produce values
id: produce-values
run: |
echo "region=region" >> $GITHUB_OUTPUT
echo "zone=zone" >> $GITHUB_OUTPUT

- name: Step with complex interpolation
id: complex
env:
CHOICE_A: ${{ github.event.inputs.choice-a }}
STRING_B: ${{ github.event.inputs.string-b }}
STRING_C: ${{ github.event.inputs.string-c }}
LIST_D: ${{ github.event.inputs.list-d }}
LIST_E: ${{ github.event.inputs.list-e }}
CHOICE_F: ${{ github.event.inputs.choice-f }}
REGION: ${{ steps.produce-values.outputs.region }}
ZONE: ${{ steps.produce-values.outputs.zone }}
DRY_TEST_JSON: ${{ fromJSON(env.DRY_TEST) }}
FUNCTION_NAME: my-function
USER_EMAIL: 'example@example.com'
TYPE: type
RANGE: '0-100'

run: |
comma_separated_list_d=$(echo "${LIST_D}" | sed "s/ /\",\"/g")
comma_separated_list_e=$(echo "${LIST_E}" | sed "s/ /\",\"/g")
c1=$(echo "${STRING_C}" | cut -d "-" -f 1)
c2=$(echo "${STRING_C}" | cut -d "-" -f 2)
# Similar commands that use JSON payloads with string interpolation.
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":["'"${comma_separated_list_d}"'"],"listE":"","dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
response=$(aws lambda invoke --invocation-type RequestResponse --function-name "${FUNCTION_NAME}" --region "${REGION}" --cli-read-timeout 0 --cli-binary-format raw-in-base64-out --payload '{"appName":"my-app","chA":"'"${CHOICE_A}"'","c1":"'"${c1}"'","c2":"'"${c2}"'","a":"${CHOICE_A}","bValue":"${B}","zone":"${ZONE}","userEmail":"'"${USER_EMAIL}"'","region":"${REGION}","range":"${RANGE}","type":"${TYPE}","b":"${STRING_B}","listD":"","listE":["'"${comma_separated_list_e}"'"],"dryTest":'"${DRY_TEST_JSON}"',"f":"${CHOICE_F}"}' ./config.json --log-type Tail)
shell: bash
Loading