Skip to content

[8.18] (backport #8396) [tests] split up serverless and resource leaks integration tests #8456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Jun 12, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions .buildkite/bk.integration.pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,10 +39,10 @@ steps:
depends_on:
- packaging-windows
env:
TEST_LONG_RUNNING: "true"
TEST_PACKAGE: "github.com/elastic/elastic-agent/testing/integration/leak"
command: |
buildkite-agent artifact download build/distributions/** . --step 'packaging-windows'
.buildkite/scripts/integration-tests.ps1 fleet true TestLongRunningAgentForLeaks
.buildkite/scripts/steps/integration_tests_tf.ps1 fleet true
artifact_paths:
- build/**
- build/diagnostics/**
Expand All @@ -57,10 +57,10 @@ steps:
depends_on:
- packaging-windows
env:
TEST_LONG_RUNNING: "true"
TEST_PACKAGE: "github.com/elastic/elastic-agent/testing/integration/leak"
command: |
buildkite-agent artifact download build/distributions/** . --step 'packaging-windows'
.buildkite/scripts/integration-tests.ps1 fleet true TestLongRunningAgentForLeaks
.buildkite/scripts/steps/integration_tests_tf.ps1 fleet true
artifact_paths:
- build/**
- build/diagnostics/**
Expand All @@ -74,10 +74,10 @@ steps:
- label: "Ubuntu:2404:amd64:sudo"
depends_on: packaging-ubuntu-x86-64
env:
TEST_LONG_RUNNING: "true"
TEST_PACKAGE: "github.com/elastic/elastic-agent/testing/integration/leak"
command: |
buildkite-agent artifact download build/distributions/** . --step 'packaging-ubuntu-x86-64'
.buildkite/scripts/steps/integration_tests_tf.sh fleet true TestLongRunningAgentForLeaks
.buildkite/scripts/steps/integration_tests_tf.sh fleet true
artifact_paths:
- build/**
- build/diagnostics/**
Expand All @@ -99,7 +99,7 @@ steps:
- packaging-windows
command: |
buildkite-agent artifact download build/distributions/** . --step 'packaging-windows'
.buildkite/scripts/integration-tests.ps1 {{matrix}} true
.buildkite/scripts/steps/integration_tests_tf.ps1 {{matrix}} true
artifact_paths:
- build/**
- build/diagnostics/**
Expand All @@ -124,7 +124,7 @@ steps:
- packaging-windows
command: |
buildkite-agent artifact download build/distributions/** . --step 'packaging-windows'
.buildkite/scripts/integration-tests.ps1 {{matrix}} false
.buildkite/scripts/steps/integration_tests_tf.ps1 {{matrix}} false
artifact_paths:
- build/**
- build/diagnostics/**
Expand All @@ -143,7 +143,7 @@ steps:
- packaging-windows
command: |
buildkite-agent artifact download build/distributions/** . --step 'packaging-windows'
.buildkite/scripts/integration-tests.ps1 {{matrix}} true
.buildkite/scripts/steps/integration_tests_tf.ps1 {{matrix}} true
artifact_paths:
- build/**
- build/diagnostics/**
Expand All @@ -169,7 +169,7 @@ steps:
- packaging-windows
command: |
buildkite-agent artifact download build/distributions/** . --step 'packaging-windows'
.buildkite/scripts/integration-tests.ps1 {{matrix}} false
.buildkite/scripts/steps/integration_tests_tf.ps1 {{matrix}} false
artifact_paths:
- build/**
- build/diagnostics/**
Expand Down
2 changes: 1 addition & 1 deletion .buildkite/integration.pipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ steps:
TEST_INTEG_AUTH_GCP_DATACENTER: "us-central1-a"
command: |
buildkite-agent artifact download "build/distributions/**" . $BUILDKITE_BUILD_ID
.buildkite/scripts/steps/integration_tests.sh serverless integration:single TestLogIngestionFleetManaged #right now, run a single test in serverless mode as a sort of smoke test, instead of re-running the entire suite
.buildkite/scripts/steps/integration_tests.sh serverless integration:testServerless
artifact_paths:
- "build/TEST-**"
- "build/diagnostics/*"
Expand Down
80 changes: 80 additions & 0 deletions .buildkite/scripts/buildkite-integration-tests.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
param(
[string]$GROUP_NAME,
[string]$TEST_SUDO
)

if (-not $GROUP_NAME)
{
Write-Error "Error: Specify the group name: buildkite-integration-tests.ps1 <group_name> <true|false>"
exit 1
}

if (-not $TEST_SUDO)
{
Write-Error "Error: Specify the test sudo: buildkite-integration-tests.ps1 <group_name> <true|false>"
exit 1
}

if (-not $env:TEST_PACKAGE)
{
$env:TEST_PACKAGE = "github.com/elastic/elastic-agent/testing/integration"
}

# TODO: make is not available on Windows yet hence we cannot use make install-gotestsum
go install gotest.tools/gotestsum
gotestsum --version

$env:TEST_BINARY_NAME = "elastic-agent"
# Parsing version.go. Will be simplified here: https://github.com/elastic/ingest-dev/issues/4925
$AGENT_VERSION = (Get-Content version/version.go | Select-String -Pattern 'const defaultBeatVersion =' | ForEach-Object { $_ -replace '.*?"(.*?)".*', '$1' })
$env:AGENT_VERSION = $AGENT_VERSION + "-SNAPSHOT"

Write-Output "~~~ Agent version: $env:AGENT_VERSION"
$env:SNAPSHOT = $true

Write-Host "~~~ Running integration tests as $env:USERNAME"

$osInfo = (Get-CimInstance Win32_OperatingSystem).Caption + " " + (Get-CimInstance Win32_OperatingSystem).OSArchitecture -replace " ", "_"
$root_suffix = ""
if ($TEST_SUDO -eq "true")
{
$root_suffix = "_sudo"
}
$fully_qualified_group_name = "${GROUP_NAME}${root_suffix}_${osInfo}"
$outputXML = "build/${fully_qualified_group_name}.integration.xml"
$outputJSON = "build/${fully_qualified_group_name}.integration.out.json"

$TestsExitCode = 0

try
{
Write-Output "~~~ Integration tests: $GROUP_NAME as user: $env:USERNAME"
# -test.timeout=2h0m0s is set because some tests normally take up to 45 minutes.
# This 2-hour timeout provides enough room for future, potentially longer tests,
# while still enforcing a reasonable upper limit on total execution time.
# See: https://pkg.go.dev/cmd/go#hdr-Testing_flags
$gotestArgs = @("-tags=integration", "-test.shuffle=on", "-test.timeout=2h0m0s", "$env:TEST_PACKAGE", "-v", "-args", "-integration.groups=$GROUP_NAME", "-integration.sudo=$TEST_SUDO")
& gotestsum --no-color -f standard-quiet --junitfile-hide-skipped-tests --junitfile "${outputXML}" --jsonfile "${outputJSON}" -- @gotestArgs
$TestsExitCode = $LASTEXITCODE

if ($TestsExitCode -ne 0)
{
Write-Output "^^^ +++"
Write-Output "Integration tests failed"
}
}
finally
{
if (Test-Path $outputXML)
{
# Install junit2html if not installed
go install github.com/alexec/junit2html@latest
Get-Content $outputXML | junit2html > "build/TEST-report.html"
}
else
{
Write-Output "Cannot generate HTML test report: $outputXML not found"
}
}

exit $TestsExitCode
20 changes: 13 additions & 7 deletions .buildkite/scripts/buildkite-integration-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@

GROUP_NAME=$1
TEST_SUDO=$2
TEST_NAME_PATTERN=${3:-""}

# Set default TEST_PACKAGE if not already defined in env or argument
: "${TEST_PACKAGE:="github.com/elastic/elastic-agent/testing/integration"}"

if [ -z "$GROUP_NAME" ]; then
echo "Error: Specify the group name: sudo-integration-tests.sh [group_name]" >&2
Expand Down Expand Up @@ -46,18 +48,22 @@ outputXML="build/${fully_qualified_group_name}.integration.xml"
outputJSON="build/${fully_qualified_group_name}.integration.out.json"

echo "~~~ Integration tests: ${GROUP_NAME}"
GOTEST_ARGS=(-tags integration -test.shuffle on -test.timeout 2h0m0s)
if [ -n "$TEST_NAME_PATTERN" ]; then
GOTEST_ARGS+=(-run="${TEST_NAME_PATTERN}")
fi
GOTEST_ARGS+=("github.com/elastic/elastic-agent/testing/integration" -v -args "-integration.groups=${GROUP_NAME}" "-integration.sudo=${TEST_SUDO}")

# -test.timeout=2h0m0s is set because some tests normally take up to 45 minutes.
# This 2-hour timeout provides enough room for future, potentially longer tests,
# while still enforcing a reasonable upper limit on total execution time.
# See: https://pkg.go.dev/cmd/go#hdr-Testing_flags
GOTEST_ARGS=(-tags integration -test.shuffle on -test.timeout 2h0m0s "${TEST_PACKAGE}" -v -args "-integration.groups=${GROUP_NAME}" "-integration.sudo=${TEST_SUDO}")
set +e
TEST_BINARY_NAME="elastic-agent" AGENT_VERSION="${AGENT_VERSION}" SNAPSHOT=true \
gotestsum --no-color -f standard-quiet --junitfile-hide-skipped-tests --junitfile "${outputXML}" --jsonfile "${outputJSON}" -- "${GOTEST_ARGS[@]}"
TESTS_EXIT_STATUS=$?
set -e

if [[ $TESTS_EXIT_STATUS -ne 0 ]]; then
echo "^^^ +++"
echo "Integration tests failed"
fi

if [ -f "$outputXML" ]; then
go install github.com/alexec/junit2html@latest
junit2html < "$outputXML" > build/TEST-report.html
Expand Down
43 changes: 43 additions & 0 deletions .buildkite/scripts/steps/integration_tests_tf.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
param (
[string]$GROUP_NAME,
[string]$TEST_SUDO
)

Write-Output "~~~ Preparing environment"

$PSVersionTable.PSVersion

. "$PWD\.buildkite\scripts\steps\ess.ps1"

# Read package version from .package-version file
$PACKAGE_VERSION = Get-Content .package-version -ErrorAction SilentlyContinue
if ($PACKAGE_VERSION) {
$PACKAGE_VERSION = "${PACKAGE_VERSION}-SNAPSHOT"
}

Write-Output "~~~ Building test binaries"
& mage build:testBinaries
if ($LASTEXITCODE -ne 0) {
Write-Output "^^^ +++"
Write-Output "Failed to build test binaries"
exit 1
}

$TestsExitCode = 0
try {
Write-Output "~~~ Running integration tests"
# Get-Ess-Stack will start the ESS stack if it is a BK retry, otherwise it will retrieve ESS stack metadata
Get-Ess-Stack -StackVersion $PACKAGE_VERSION
& "$PWD\.buildkite\scripts\buildkite-integration-tests.ps1" $GROUP_NAME $TEST_SUDO
$TestsExitCode = $LASTEXITCODE
if ($TestsExitCode -ne 0)
{
Write-Output "^^^ +++"
Write-Output "Integration tests failed"
}
} finally {
# ess_down will destroy the ESS stack if tf state file is found, aka if this is a BK retry
ess_down
}

exit $TestsExitCode
6 changes: 2 additions & 4 deletions .buildkite/scripts/steps/integration_tests_tf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@ asdf install

GROUP_NAME=$1
TEST_SUDO=$2
# NOTE: This argument is not used in this script, but is declared to show that it can be set
# and passed down to downstream scripts where it may be used.
TEST_NAME_PATTERN=${3:-""}

if [ -z "$GROUP_NAME" ]; then
echo "Error: Specify the group name: integration_tests_tf.sh [group_name]" >&2
exit 1
Expand All @@ -37,7 +35,7 @@ mage build:testBinaries
if [[ "${BUILDKITE_RETRY_COUNT}" -gt 0 ]]; then
echo "~~~ The steps is retried, starting the ESS stack again"
trap 'ess_down' EXIT
ess_up $OVERRIDE_STACK_VERSION || echo "Failed to start ESS stack" >&2
ess_up $OVERRIDE_STACK_VERSION || (echo -e "^^^ +++\nFailed to start ESS stack")
preinstall_fleet_packages
else
# For the first run, we start the stack in the start_ess.sh step and it sets the meta-data
Expand Down
5 changes: 4 additions & 1 deletion docs/test-framework-dev-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,10 +130,13 @@ We pass a `-test.count` flag along with the name match
We pass a `-test.run` flag along with the names of the tests we want to run in OR
`GOTEST_FLAGS="-test.run ^(TestStandaloneUpgrade|TestFleetManagedUpgrade)$" mage integration:test`

##### Run Serverless tests
The test framework includes a smoke test suite to check elastic-agent in a serverless environment. The suite can be run via the `integration:TestServerless` mage target.

##### Run Extended Runtime Leak Test
The test framework includes a "long running" test to check for resource leaks and stability.
The runtime of the test can be set via the `LONG_TEST_RUNTIME` environment variable.
The test itself can be run via the `integration:TestLongRunningAgentForLeaks` mage target.
The test itself can be run via the `integration:TestForResourceLeaks` mage target.

##### Limitations
Due to the way the parameters are passed to `devtools.GoTest` the value of the environment variable
Expand Down
Loading