Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 81 additions & 29 deletions .github/workflows/qa-android-critical-flow-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,16 @@ name: QA Android Critical Flow Tests
# - If androidDeviceId is set, we lock per-device so only one run can use that specific phone at a time (other devices can run in parallel).
# - If androidDeviceId is empty ("auto"), we lock the shared device pool so only one auto-run uses the farm at a time (other auto-runs queue).
concurrency:
group: qa-android-ui-tests-office-${{ inputs.androidDeviceId || 'auto' }}
group: qa-android-ui-tests-office-${{ github.event_name == 'workflow_dispatch' && inputs.androidDeviceId || 'auto' }}
cancel-in-progress: false

on:
# Uncomment merge_group when the team is ready to enable automatic merge queue runs.
# merge_group:
# types: [checks_requested]
# GitHub schedules use UTC. 04:00 UTC is 06:00 in Berlin during daylight saving time.
schedule:
- cron: "0 4 * * *"
workflow_dispatch:
inputs:
appBuildNumber:
Expand Down Expand Up @@ -91,46 +97,91 @@ jobs:
runs-on: ubuntu-latest

outputs:
resolvedAppBuildNumber: ${{ steps.resolve_run_inputs.outputs.appBuildNumber }}
resolvedIsUpgrade: ${{ steps.resolve_run_inputs.outputs.isUpgrade }}
resolvedOldBuildNumber: ${{ steps.resolve_run_inputs.outputs.oldBuildNumber }}
resolvedEnforceAppInstall: ${{ steps.resolve_run_inputs.outputs.enforceAppInstall }}
resolvedFlavor: ${{ steps.resolve_run_inputs.outputs.flavor }}
resolvedTags: ${{ steps.resolve_run_inputs.outputs.TAGS }}
resolvedTestinyRunName: ${{ steps.resolve_run_inputs.outputs.testinyRunName }}
resolvedAndroidDeviceId: ${{ steps.resolve_run_inputs.outputs.androidDeviceId }}
resolvedRerunFailedEnabled: ${{ steps.resolve_run_inputs.outputs.rerunFailedEnabled }}
resolvedRerunFailedCount: ${{ steps.resolve_run_inputs.outputs.rerunFailedCount }}
resolvedTestCaseId: ${{ steps.resolve_selector.outputs.testCaseId }}
resolvedCategory: ${{ steps.resolve_selector.outputs.category }}

steps:
- name: Checkout repository
uses: actions/checkout@v6

# Scheduled and PR runs do not have UI inputs, so resolve one shared set
# of values here and let later jobs consume the same outputs.
- name: Resolve run inputs
id: resolve_run_inputs
shell: bash
run: |
if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
{
echo "appBuildNumber=${{ inputs.appBuildNumber }}"
echo "isUpgrade=${{ inputs.isUpgrade }}"
echo "oldBuildNumber=${{ inputs.oldBuildNumber }}"
echo "enforceAppInstall=${{ inputs.enforceAppInstall }}"
echo "flavor=${{ inputs.flavor }}"
echo "TAGS=${{ inputs.TAGS }}"
echo "testinyRunName=${{ inputs.testinyRunName }}"
echo "androidDeviceId=${{ inputs.androidDeviceId }}"
echo "rerunFailedEnabled=${{ inputs.rerunFailedEnabled }}"
echo "rerunFailedCount=${{ inputs.rerunFailedCount }}"
} >> "${GITHUB_OUTPUT}"
exit 0
fi

{
echo "appBuildNumber=latest"
echo "isUpgrade=false"
echo "oldBuildNumber="
echo "enforceAppInstall=false"
echo "flavor=internal release candidate"
echo "TAGS=@criticalFlow"
echo "testinyRunName="
echo "androidDeviceId="
echo "rerunFailedEnabled=true"
echo "rerunFailedCount=1"
} >> "${GITHUB_OUTPUT}"

# Validate upgrade inputs before runner work starts.
- name: Validate upgrade inputs
shell: bash
env:
IS_UPGRADE: ${{ inputs.isUpgrade }}
OLD_BUILD_NUMBER: ${{ inputs.oldBuildNumber }}
IS_UPGRADE: ${{ steps.resolve_run_inputs.outputs.isUpgrade }}
OLD_BUILD_NUMBER: ${{ steps.resolve_run_inputs.outputs.oldBuildNumber }}
run: bash scripts/qa_android_ui_tests/validation.sh validate-upgrade-inputs

# Validate retry toggle/count before any runner work starts.
- name: Validate rerun inputs
shell: bash
env:
RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
RERUN_FAILED_ENABLED: ${{ steps.resolve_run_inputs.outputs.rerunFailedEnabled }}
RERUN_FAILED_COUNT: ${{ steps.resolve_run_inputs.outputs.rerunFailedCount }}
run: bash scripts/qa_android_ui_tests/validation.sh validate-rerun-inputs

# Resolve TAGS into CI selectors and expose them as job outputs.
- name: Resolve selector from TAGS
id: resolve_selector
shell: bash
env:
TAGS_RAW: ${{ inputs.TAGS }}
TAGS_RAW: ${{ steps.resolve_run_inputs.outputs.TAGS }}
run: bash scripts/qa_android_ui_tests/validation.sh resolve-selector-from-tags

# Print resolved values for traceability in workflow logs.
- name: Print resolved values
shell: bash
env:
WORKFLOW_REF: ${{ github.ref_name }}
FLAVOR_INPUT: ${{ inputs.flavor }}
FLAVOR_INPUT: ${{ steps.resolve_run_inputs.outputs.flavor }}
RESOLVED_TESTCASE_ID: ${{ steps.resolve_selector.outputs.testCaseId }}
RESOLVED_CATEGORY: ${{ steps.resolve_selector.outputs.category }}
TESTINY_RUN_NAME: ${{ inputs.testinyRunName }}
TESTINY_RUN_NAME: ${{ steps.resolve_run_inputs.outputs.testinyRunName }}
run: bash scripts/qa_android_ui_tests/validation.sh print-resolved-values

run-android-ui-tests:
Expand Down Expand Up @@ -181,7 +232,7 @@ jobs:
- name: Resolve flavor (runner config)
id: resolve_flavor
env:
FLAVOR_INPUT: ${{ inputs.flavor }}
FLAVOR_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedFlavor }}
run: bash scripts/qa_android_ui_tests/execution_setup.sh resolve-flavor

- name: Configure AWS credentials (for S3)
Expand All @@ -195,15 +246,15 @@ jobs:
- name: Download APK(s) from S3
id: download_apks
env:
APP_BUILD_NUMBER: ${{ inputs.appBuildNumber }}
IS_UPGRADE: ${{ inputs.isUpgrade }}
OLD_BUILD_NUMBER: ${{ inputs.oldBuildNumber }}
APP_BUILD_NUMBER: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAppBuildNumber }}
IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
OLD_BUILD_NUMBER: ${{ needs.validate-and-resolve-inputs.outputs.resolvedOldBuildNumber }}
run: S3_BUCKET="${{ secrets.AWS_S3_BUCKET }}" bash scripts/qa_android_ui_tests/execution_setup.sh download-apks

# Select device(s): use input device when provided, otherwise auto-pick.
- name: Detect target device(s)
env:
TARGET_DEVICE_ID: ${{ inputs.androidDeviceId }}
TARGET_DEVICE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAndroidDeviceId }}
RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
run: bash scripts/qa_android_ui_tests/execution_setup.sh detect-target-devices

Expand All @@ -214,8 +265,8 @@ jobs:
# Install app/test prerequisites on each selected device.
- name: Install APK(s) on device(s)
env:
ENFORCE_APP_INSTALL: ${{ inputs.enforceAppInstall }}
IS_UPGRADE: ${{ inputs.isUpgrade }}
ENFORCE_APP_INSTALL: ${{ needs.validate-and-resolve-inputs.outputs.resolvedEnforceAppInstall }}
IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
run: bash scripts/qa_android_ui_tests/execution_setup.sh install-apks-on-devices

- name: Install 1Password CLI
Expand All @@ -242,9 +293,9 @@ jobs:
env:
RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
RESOLVED_CATEGORY: ${{ needs.validate-and-resolve-inputs.outputs.resolvedCategory }}
IS_UPGRADE: ${{ inputs.isUpgrade }}
RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
RERUN_FAILED_ENABLED: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedEnabled }}
RERUN_FAILED_COUNT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedCount }}
ALLURE_RESULTS_ROOT: ${{ runner.temp }}/allure-results
run: bash scripts/qa_android_ui_tests/run_ui_tests.sh

Expand All @@ -263,17 +314,18 @@ jobs:
SOURCE_REF: ${{ github.ref }}
SOURCE_REF_NAME: ${{ github.ref_name }}
SOURCE_SHA: ${{ github.sha }}
FLAVOR_INPUT: ${{ inputs.flavor }}
TAGS_INPUT: ${{ inputs.TAGS }}
FLAVOR_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedFlavor }}
TAGS_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTags }}
RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
RESOLVED_CATEGORY: ${{ needs.validate-and-resolve-inputs.outputs.resolvedCategory }}
APP_BUILD_NUMBER_INPUT: ${{ inputs.appBuildNumber }}
IS_UPGRADE: ${{ inputs.isUpgrade }}
ENFORCE_APP_INSTALL: ${{ inputs.enforceAppInstall }}
TESTINY_RUN_NAME: ${{ inputs.testinyRunName }}
ANDROID_DEVICE_ID: ${{ inputs.androidDeviceId }}
RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
APP_BUILD_NUMBER_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAppBuildNumber }}
IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
OLD_BUILD_NUMBER: ${{ needs.validate-and-resolve-inputs.outputs.resolvedOldBuildNumber }}
ENFORCE_APP_INSTALL: ${{ needs.validate-and-resolve-inputs.outputs.resolvedEnforceAppInstall }}
TESTINY_RUN_NAME: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestinyRunName }}
ANDROID_DEVICE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAndroidDeviceId }}
RERUN_FAILED_ENABLED: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedEnabled }}
RERUN_FAILED_COUNT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedCount }}
run: bash scripts/qa_android_ui_tests/reporting.sh prepare-deflake-bundle

- name: Upload deflake input artifact
Expand Down Expand Up @@ -304,7 +356,7 @@ jobs:
MERGED_DIR: ${{ runner.temp }}/allure-results-merged
REAL_BUILD_NUMBER: ${{ env.REAL_BUILD_NUMBER }}
NEW_APK_NAME: ${{ env.NEW_APK_NAME }}
INPUT_TAGS: ${{ inputs.TAGS }}
INPUT_TAGS: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTags }}
run: bash scripts/qa_android_ui_tests/reporting.sh merge-allure-results

# Generate static Allure HTML from merged results.
Expand Down Expand Up @@ -332,7 +384,7 @@ jobs:
PAGES_DIR: gh-pages/docs/qa-ui-tests
PAGES_TITLE: QA Android Critical Flow Tests
KEEP_DAYS: "90"
INPUT_TAGS: ${{ inputs.TAGS }}
INPUT_TAGS: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTags }}
APK_VERSION: ${{ env.REAL_BUILD_NUMBER }}
APK_NAME: ${{ env.NEW_APK_NAME }}
run: bash scripts/qa_android_ui_tests/reporting.sh publish-allure-report
Expand Down
100 changes: 87 additions & 13 deletions scripts/qa_android_ui_tests/README.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,98 @@
# QA Android UI Tests Scripts
# QA Android UI Test CI Scripts

These scripts back the workflow:
These scripts back the Android UI test workflows:

- `.github/workflows/qa-android-critical-flow-tests.yml`
- `.github/workflows/qa-android-ui-test-manual-deflake.yml`

The workflow now calls a small set of phase-oriented scripts instead of many tiny one-off files.
The workflows call a small set of phase-oriented scripts instead of many tiny one-off files.

## Workflow Summary

### `qa-android-critical-flow-tests.yml`

Main critical-flow workflow.

- Supports `workflow_dispatch`.
- Runs nightly on `schedule` at `04:00 UTC`.
- Uses built-in defaults for non-manual runs:
- latest APK
- `internal release candidate`
- `@criticalFlow`
- auto device selection
- failed-test rerun enabled with count `1`
- Exports one standard deflake artifact for later manual reruns.

### `qa-android-ui-test-manual-deflake.yml`

Manual workflow for rerunning only the leftover failed tests from an earlier run.

- Triggered with a `sourceRunId`.
- Accepts a previous critical-flow run or a previous manual deflake run.
- Downloads the standard deflake artifact from that selected run.
- Runs only the failed tests listed in the artifact.
- Publishes a fresh Allure report for the manual deflake run only.
- Exports a fresh deflake artifact again so a deflake run can be deflaked later.

## Manual Deflake ID

Every workflow run that exports the standard deflake bundle writes a copy-friendly summary entry:

- `manual deflake id: <github.run_id>`

This is the GitHub Actions run ID to paste into the manual deflake workflow input.

## Standard Deflake Artifact

Artifact name:

- `android-ui-test-deflake-input`

Bundle contents:

- `metadata.json`
- `failed-tests.txt`
- `failed-tests-first-attempt.txt`

`metadata.json` carries the run context needed by later manual deflake runs, including:

- workflow name and file
- run ID, run number, run attempt, branch, and commit
- flavor and selector
- build inputs and resolved build info
- upgrade flags
- device selection
- rerun configuration
- Testiny run name

## Flavor Resolution Source

Flavor resolution is runner-driven, not hardcoded in the repo.

- Source of truth: `/etc/android-qa/flavors.json` (on the self-hosted runner)
- Source of truth: `/etc/android-qa/flavors.json` on the self-hosted runner
- Executed via: `bash scripts/qa_android_ui_tests/execution_setup.sh resolve-flavor`
- Exports for later workflow steps: `S3_FOLDER`, `APP_ID`, `PACKAGES_TO_UNINSTALL`
- Exports for later workflow steps:
- `S3_FOLDER`
- `APP_ID`
- `PACKAGES_TO_UNINSTALL`

## Primary Scripts

- `validation.sh`: input validation, TAG selector parsing, and resolved value logging.
- `validation.sh`: input validation, selector parsing, and resolved value logging.
- `execution_setup.sh`: runner prep, flavor/APK resolution, device prep, secrets fetch, and test artifact setup.
- `run_ui_tests.sh`: instrumentation execution/sharding plus failed-test auto-reruns (explicit per-device retry lists with even count balancing).
- `reporting.sh`: Allure pull/merge/generate/publish plus cleanup subcommands.
- `run_ui_tests.sh`: instrumentation execution, sharding, failed-test auto-reruns, and manual-deflake failed-list execution.
- `reporting.sh`: Allure pull/merge/generate/publish, deflake bundle preparation, and cleanup.

## Retry Flow

The rerun feature is controlled by workflow inputs:

- `rerunFailedEnabled`: turn failed-test reruns on or off for this workflow run.
- `rerunFailedCount`: maximum number of rerun attempts after attempt `0` completes. Default is `2`.
- `rerunFailedEnabled`: turn failed-test reruns on or off for this workflow run
- `rerunFailedCount`: maximum number of rerun attempts after attempt `0` completes; default is `1`

Execution flow:

1. Run attempt `0` on the selected device set using the normal CI selector (`testCaseId` or `category`).
1. Run attempt `0` on the selected device set using the resolved selector.
2. Pull Allure results immediately after that attempt finishes.
3. Extract only the failed test IDs in `Class#method` format.
4. Evenly assign those failed tests across the retry devices.
Expand All @@ -46,10 +108,22 @@ Reporting behavior:
- `passed_on_rerun`
- `failed_after_retries`

## Manual Deflake Flow

1. Critical flow or an earlier manual deflake run uploads `android-ui-test-deflake-input`.
2. A user copies the `manual deflake id` from the workflow summary.
3. The manual deflake workflow downloads the artifact for that selected run.
4. The workflow validates `metadata.json` and `failed-tests.txt`.
5. Only the leftover failed tests are executed.
6. A fresh Allure report is published for that manual deflake run.
7. A fresh deflake artifact is uploaded again for the next round if needed.

## Python Helpers

- `resolve_flavor.py`: parse `flavors.json` and export flavor-derived env vars.
- `select_apks.py`: resolve NEW/OLD APK keys based on input/build selection rules.
- `select_apks.py`: resolve new and old APK keys based on input and build-selection rules.
- `fetch_secrets_json.py`: build runtime `secrets.json` from 1Password vault items.
- `merge_allure_results.py`: merge per-device Allure outputs and attach metadata.
- `merge_allure_results.py`: merge per-device Allure outputs and attach retry metadata.
- `extract_failed_tests.py`: extract failed test IDs (`Class#method`) from one attempt's Allure result files.
- `prepare_deflake_bundle.py`: build the standard deflake artifact and append the manual deflake ID summary.
- `inspect_deflake_bundle.py`: validate a downloaded deflake artifact and expose its resolved values to later workflow steps.
19 changes: 19 additions & 0 deletions scripts/qa_android_ui_tests/prepare_deflake_bundle.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ def env_int(name: str) -> int:
return 0


def append_summary(lines: list[str]) -> None:
# Write helper output into the GitHub Actions step summary when available.
summary_path = env("GITHUB_STEP_SUMMARY")
if not summary_path:
return
with open(summary_path, "a", encoding="utf-8") as summary_file:
summary_file.write("\n".join(lines) + "\n")


def copy_test_list(src_value: str, dest: Path) -> None:
src = Path(src_value) if src_value else None
if src and src.is_file():
Expand Down Expand Up @@ -58,6 +67,7 @@ def copy_test_list(src_value: str, dest: Path) -> None:

metadata = {
"schema_version": 1,
"manual_deflake_id": env("SOURCE_RUN_ID"),
"source_workflow_name": env("SOURCE_WORKFLOW_NAME"),
"source_workflow_file": env("SOURCE_WORKFLOW_FILE"),
"source_repository": env("SOURCE_REPOSITORY"),
Expand Down Expand Up @@ -97,3 +107,12 @@ def copy_test_list(src_value: str, dest: Path) -> None:

copy_test_list(env("FINAL_FAILED_TESTS_FILE"), bundle_dir / "failed-tests.txt")
copy_test_list(env("FIRST_FAILED_TESTS_FILE"), bundle_dir / "failed-tests-first-attempt.txt")

# Show the GitHub Actions run id in the summary so it can be copied directly
# into a later manual deflake run.
append_summary(
[
"### Manual Deflake",
f"- manual deflake id: {metadata['manual_deflake_id']}",
]
)
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ class ApplockTest : BaseUiTest() {
pages.registrationPage.apply {
waitUntilLoginFlowIsCompleted()
clickAllowNotificationButton()
// clickAgreeShareDataAlert()
clickAgreeShareDataAlert()
assertConversationPageVisible()
}
}
Expand Down
Loading
Loading