wireapp · emmaoke-w · May 5, 2026 · May 4, 2026 · May 5, 2026 · May 5, 2026
@@ -4,10 +4,16 @@ name: QA Android Critical Flow Tests
 # - If androidDeviceId is set, we lock per-device so only one run can use that specific phone at a time (other devices can run in parallel).
 # - If androidDeviceId is empty ("auto"), we lock the shared device pool so only one auto-run uses the farm at a time (other auto-runs queue).
 concurrency:
-  group: qa-android-ui-tests-office-${{ inputs.androidDeviceId || 'auto' }}
+  group: qa-android-ui-tests-office-${{ github.event_name == 'workflow_dispatch' && inputs.androidDeviceId || 'auto' }}
   cancel-in-progress: false
 
 on:
+  # Uncomment merge_group when the team is ready to enable automatic merge queue runs.
+  # merge_group:
+  #   types: [checks_requested]
+  # GitHub schedules use UTC. 04:00 UTC is 06:00 in Berlin during daylight saving time.
+  schedule:
+    - cron: "0 4 * * *"
   workflow_dispatch:
     inputs:
       appBuildNumber:
@@ -91,46 +97,91 @@ jobs:
     runs-on: ubuntu-latest
 
     outputs:
+      resolvedAppBuildNumber: ${{ steps.resolve_run_inputs.outputs.appBuildNumber }}
+      resolvedIsUpgrade: ${{ steps.resolve_run_inputs.outputs.isUpgrade }}
+      resolvedOldBuildNumber: ${{ steps.resolve_run_inputs.outputs.oldBuildNumber }}
+      resolvedEnforceAppInstall: ${{ steps.resolve_run_inputs.outputs.enforceAppInstall }}
+      resolvedFlavor: ${{ steps.resolve_run_inputs.outputs.flavor }}
+      resolvedTags: ${{ steps.resolve_run_inputs.outputs.TAGS }}
+      resolvedTestinyRunName: ${{ steps.resolve_run_inputs.outputs.testinyRunName }}
+      resolvedAndroidDeviceId: ${{ steps.resolve_run_inputs.outputs.androidDeviceId }}
+      resolvedRerunFailedEnabled: ${{ steps.resolve_run_inputs.outputs.rerunFailedEnabled }}
+      resolvedRerunFailedCount: ${{ steps.resolve_run_inputs.outputs.rerunFailedCount }}
       resolvedTestCaseId: ${{ steps.resolve_selector.outputs.testCaseId }}
       resolvedCategory: ${{ steps.resolve_selector.outputs.category }}
 
     steps:
       - name: Checkout repository
         uses: actions/checkout@v6
 
+      # Scheduled and PR runs do not have UI inputs, so resolve one shared set
+      # of values here and let later jobs consume the same outputs.
+      - name: Resolve run inputs
+        id: resolve_run_inputs
+        shell: bash
+        run: |
+          if [[ "${GITHUB_EVENT_NAME}" == "workflow_dispatch" ]]; then
+            {
+              echo "appBuildNumber=${{ inputs.appBuildNumber }}"
+              echo "isUpgrade=${{ inputs.isUpgrade }}"
+              echo "oldBuildNumber=${{ inputs.oldBuildNumber }}"
+              echo "enforceAppInstall=${{ inputs.enforceAppInstall }}"
+              echo "flavor=${{ inputs.flavor }}"
+              echo "TAGS=${{ inputs.TAGS }}"
+              echo "testinyRunName=${{ inputs.testinyRunName }}"
+              echo "androidDeviceId=${{ inputs.androidDeviceId }}"
+              echo "rerunFailedEnabled=${{ inputs.rerunFailedEnabled }}"
+              echo "rerunFailedCount=${{ inputs.rerunFailedCount }}"
+            } >> "${GITHUB_OUTPUT}"
+            exit 0
+          fi
+
+          {
+            echo "appBuildNumber=latest"
+            echo "isUpgrade=false"
+            echo "oldBuildNumber="
+            echo "enforceAppInstall=false"
+            echo "flavor=internal release candidate"
+            echo "TAGS=@criticalFlow"
+            echo "testinyRunName="
+            echo "androidDeviceId="
+            echo "rerunFailedEnabled=true"
+            echo "rerunFailedCount=1"
+          } >> "${GITHUB_OUTPUT}"
+
       # Validate upgrade inputs before runner work starts.
       - name: Validate upgrade inputs
         shell: bash
         env:
-          IS_UPGRADE: ${{ inputs.isUpgrade }}
-          OLD_BUILD_NUMBER: ${{ inputs.oldBuildNumber }}
+          IS_UPGRADE: ${{ steps.resolve_run_inputs.outputs.isUpgrade }}
+          OLD_BUILD_NUMBER: ${{ steps.resolve_run_inputs.outputs.oldBuildNumber }}
         run: bash scripts/qa_android_ui_tests/validation.sh validate-upgrade-inputs
 
       # Validate retry toggle/count before any runner work starts.
       - name: Validate rerun inputs
         shell: bash
         env:
-          RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
-          RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
+          RERUN_FAILED_ENABLED: ${{ steps.resolve_run_inputs.outputs.rerunFailedEnabled }}
+          RERUN_FAILED_COUNT: ${{ steps.resolve_run_inputs.outputs.rerunFailedCount }}
         run: bash scripts/qa_android_ui_tests/validation.sh validate-rerun-inputs
 
       # Resolve TAGS into CI selectors and expose them as job outputs.
       - name: Resolve selector from TAGS
         id: resolve_selector
         shell: bash
         env:
-          TAGS_RAW: ${{ inputs.TAGS }}
+          TAGS_RAW: ${{ steps.resolve_run_inputs.outputs.TAGS }}
         run: bash scripts/qa_android_ui_tests/validation.sh resolve-selector-from-tags
 
       # Print resolved values for traceability in workflow logs.
       - name: Print resolved values
         shell: bash
         env:
           WORKFLOW_REF: ${{ github.ref_name }}
-          FLAVOR_INPUT: ${{ inputs.flavor }}
+          FLAVOR_INPUT: ${{ steps.resolve_run_inputs.outputs.flavor }}
           RESOLVED_TESTCASE_ID: ${{ steps.resolve_selector.outputs.testCaseId }}
           RESOLVED_CATEGORY: ${{ steps.resolve_selector.outputs.category }}
-          TESTINY_RUN_NAME: ${{ inputs.testinyRunName }}
+          TESTINY_RUN_NAME: ${{ steps.resolve_run_inputs.outputs.testinyRunName }}
         run: bash scripts/qa_android_ui_tests/validation.sh print-resolved-values
 
   run-android-ui-tests:
@@ -181,7 +232,7 @@ jobs:
       - name: Resolve flavor (runner config)
         id: resolve_flavor
         env:
-          FLAVOR_INPUT: ${{ inputs.flavor }}
+          FLAVOR_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedFlavor }}
         run: bash scripts/qa_android_ui_tests/execution_setup.sh resolve-flavor
 
       - name: Configure AWS credentials (for S3)
@@ -195,15 +246,15 @@ jobs:
       - name: Download APK(s) from S3
         id: download_apks
         env:
-          APP_BUILD_NUMBER: ${{ inputs.appBuildNumber }}
-          IS_UPGRADE: ${{ inputs.isUpgrade }}
-          OLD_BUILD_NUMBER: ${{ inputs.oldBuildNumber }}
+          APP_BUILD_NUMBER: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAppBuildNumber }}
+          IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
+          OLD_BUILD_NUMBER: ${{ needs.validate-and-resolve-inputs.outputs.resolvedOldBuildNumber }}
         run: S3_BUCKET="${{ secrets.AWS_S3_BUCKET }}" bash scripts/qa_android_ui_tests/execution_setup.sh download-apks
 
       # Select device(s): use input device when provided, otherwise auto-pick.
       - name: Detect target device(s)
         env:
-          TARGET_DEVICE_ID: ${{ inputs.androidDeviceId }}
+          TARGET_DEVICE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAndroidDeviceId }}
           RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
         run: bash scripts/qa_android_ui_tests/execution_setup.sh detect-target-devices
 
@@ -214,8 +265,8 @@ jobs:
       # Install app/test prerequisites on each selected device.
       - name: Install APK(s) on device(s)
         env:
-          ENFORCE_APP_INSTALL: ${{ inputs.enforceAppInstall }}
-          IS_UPGRADE: ${{ inputs.isUpgrade }}
+          ENFORCE_APP_INSTALL: ${{ needs.validate-and-resolve-inputs.outputs.resolvedEnforceAppInstall }}
+          IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
         run: bash scripts/qa_android_ui_tests/execution_setup.sh install-apks-on-devices
 
       - name: Install 1Password CLI
@@ -242,9 +293,9 @@ jobs:
         env:
           RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
           RESOLVED_CATEGORY: ${{ needs.validate-and-resolve-inputs.outputs.resolvedCategory }}
-          IS_UPGRADE: ${{ inputs.isUpgrade }}
-          RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
-          RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
+          IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
+          RERUN_FAILED_ENABLED: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedEnabled }}
+          RERUN_FAILED_COUNT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedCount }}
           ALLURE_RESULTS_ROOT: ${{ runner.temp }}/allure-results
         run: bash scripts/qa_android_ui_tests/run_ui_tests.sh
 
@@ -263,17 +314,18 @@ jobs:
           SOURCE_REF: ${{ github.ref }}
           SOURCE_REF_NAME: ${{ github.ref_name }}
           SOURCE_SHA: ${{ github.sha }}
-          FLAVOR_INPUT: ${{ inputs.flavor }}
-          TAGS_INPUT: ${{ inputs.TAGS }}
+          FLAVOR_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedFlavor }}
+          TAGS_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTags }}
           RESOLVED_TESTCASE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestCaseId }}
           RESOLVED_CATEGORY: ${{ needs.validate-and-resolve-inputs.outputs.resolvedCategory }}
-          APP_BUILD_NUMBER_INPUT: ${{ inputs.appBuildNumber }}
-          IS_UPGRADE: ${{ inputs.isUpgrade }}
-          ENFORCE_APP_INSTALL: ${{ inputs.enforceAppInstall }}
-          TESTINY_RUN_NAME: ${{ inputs.testinyRunName }}
-          ANDROID_DEVICE_ID: ${{ inputs.androidDeviceId }}
-          RERUN_FAILED_ENABLED: ${{ inputs.rerunFailedEnabled }}
-          RERUN_FAILED_COUNT: ${{ inputs.rerunFailedCount }}
+          APP_BUILD_NUMBER_INPUT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAppBuildNumber }}
+          IS_UPGRADE: ${{ needs.validate-and-resolve-inputs.outputs.resolvedIsUpgrade }}
+          OLD_BUILD_NUMBER: ${{ needs.validate-and-resolve-inputs.outputs.resolvedOldBuildNumber }}
+          ENFORCE_APP_INSTALL: ${{ needs.validate-and-resolve-inputs.outputs.resolvedEnforceAppInstall }}
+          TESTINY_RUN_NAME: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTestinyRunName }}
+          ANDROID_DEVICE_ID: ${{ needs.validate-and-resolve-inputs.outputs.resolvedAndroidDeviceId }}
+          RERUN_FAILED_ENABLED: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedEnabled }}
+          RERUN_FAILED_COUNT: ${{ needs.validate-and-resolve-inputs.outputs.resolvedRerunFailedCount }}
         run: bash scripts/qa_android_ui_tests/reporting.sh prepare-deflake-bundle
 
       - name: Upload deflake input artifact
@@ -304,7 +356,7 @@ jobs:
           MERGED_DIR: ${{ runner.temp }}/allure-results-merged
           REAL_BUILD_NUMBER: ${{ env.REAL_BUILD_NUMBER }}
           NEW_APK_NAME: ${{ env.NEW_APK_NAME }}
-          INPUT_TAGS: ${{ inputs.TAGS }}
+          INPUT_TAGS: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTags }}
         run: bash scripts/qa_android_ui_tests/reporting.sh merge-allure-results
 
       # Generate static Allure HTML from merged results.
@@ -332,7 +384,7 @@ jobs:
           PAGES_DIR: gh-pages/docs/qa-ui-tests
           PAGES_TITLE: QA Android Critical Flow Tests
           KEEP_DAYS: "90"
-          INPUT_TAGS: ${{ inputs.TAGS }}
+          INPUT_TAGS: ${{ needs.validate-and-resolve-inputs.outputs.resolvedTags }}
           APK_VERSION: ${{ env.REAL_BUILD_NUMBER }}
           APK_NAME: ${{ env.NEW_APK_NAME }}
         run: bash scripts/qa_android_ui_tests/reporting.sh publish-allure-report

diff --git a/scripts/qa_android_ui_tests/README.md b/scripts/qa_android_ui_tests/README.md
@@ -1,36 +1,98 @@
-# QA Android UI Tests Scripts
+# QA Android UI Test CI Scripts
 
-These scripts back the workflow:
+These scripts back the Android UI test workflows:
 
 - `.github/workflows/qa-android-critical-flow-tests.yml`
+- `.github/workflows/qa-android-ui-test-manual-deflake.yml`
 
-The workflow now calls a small set of phase-oriented scripts instead of many tiny one-off files.
+The workflows call a small set of phase-oriented scripts instead of many tiny one-off files.
+
+## Workflow Summary
+
+### `qa-android-critical-flow-tests.yml`
+
+Main critical-flow workflow.
+
+- Supports `workflow_dispatch`.
+- Runs nightly on `schedule` at `04:00 UTC`.
+- Uses built-in defaults for non-manual runs:
+  - latest APK
+  - `internal release candidate`
+  - `@criticalFlow`
+  - auto device selection
+  - failed-test rerun enabled with count `1`
+- Exports one standard deflake artifact for later manual reruns.
+
+### `qa-android-ui-test-manual-deflake.yml`
+
+Manual workflow for rerunning only the leftover failed tests from an earlier run.
+
+- Triggered with a `sourceRunId`.
+- Accepts a previous critical-flow run or a previous manual deflake run.
+- Downloads the standard deflake artifact from that selected run.
+- Runs only the failed tests listed in the artifact.
+- Publishes a fresh Allure report for the manual deflake run only.
+- Exports a fresh deflake artifact again so a deflake run can be deflaked later.
+
+## Manual Deflake ID
+
+Every workflow run that exports the standard deflake bundle writes a copy-friendly summary entry:
+
+- `manual deflake id: <github.run_id>`
+
+This is the GitHub Actions run ID to paste into the manual deflake workflow input.
+
+## Standard Deflake Artifact
+
+Artifact name:
+
+- `android-ui-test-deflake-input`
+
+Bundle contents:
+
+- `metadata.json`
+- `failed-tests.txt`
+- `failed-tests-first-attempt.txt`
+
+`metadata.json` carries the run context needed by later manual deflake runs, including:
+
+- workflow name and file
+- run ID, run number, run attempt, branch, and commit
+- flavor and selector
+- build inputs and resolved build info
+- upgrade flags
+- device selection
+- rerun configuration
+- Testiny run name
 
 ## Flavor Resolution Source
 
 Flavor resolution is runner-driven, not hardcoded in the repo.
 
-- Source of truth: `/etc/android-qa/flavors.json` (on the self-hosted runner)
+- Source of truth: `/etc/android-qa/flavors.json` on the self-hosted runner
 - Executed via: `bash scripts/qa_android_ui_tests/execution_setup.sh resolve-flavor`
-- Exports for later workflow steps: `S3_FOLDER`, `APP_ID`, `PACKAGES_TO_UNINSTALL`
+- Exports for later workflow steps:
+  - `S3_FOLDER`
+  - `APP_ID`
+  - `PACKAGES_TO_UNINSTALL`
 
 ## Primary Scripts
 
-- `validation.sh`: input validation, TAG selector parsing, and resolved value logging.
+- `validation.sh`: input validation, selector parsing, and resolved value logging.
 - `execution_setup.sh`: runner prep, flavor/APK resolution, device prep, secrets fetch, and test artifact setup.
-- `run_ui_tests.sh`: instrumentation execution/sharding plus failed-test auto-reruns (explicit per-device retry lists with even count balancing).
-- `reporting.sh`: Allure pull/merge/generate/publish plus cleanup subcommands.
+- `run_ui_tests.sh`: instrumentation execution, sharding, failed-test auto-reruns, and manual-deflake failed-list execution.
+- `reporting.sh`: Allure pull/merge/generate/publish, deflake bundle preparation, and cleanup.
 
 ## Retry Flow
 
 The rerun feature is controlled by workflow inputs:
 
-- `rerunFailedEnabled`: turn failed-test reruns on or off for this workflow run.
-- `rerunFailedCount`: maximum number of rerun attempts after attempt `0` completes. Default is `2`.
+- `rerunFailedEnabled`: turn failed-test reruns on or off for this workflow run
+- `rerunFailedCount`: maximum number of rerun attempts after attempt `0` completes; default is `1`
 
 Execution flow:
 
-1. Run attempt `0` on the selected device set using the normal CI selector (`testCaseId` or `category`).
+1. Run attempt `0` on the selected device set using the resolved selector.
 2. Pull Allure results immediately after that attempt finishes.
 3. Extract only the failed test IDs in `Class#method` format.
 4. Evenly assign those failed tests across the retry devices.
@@ -46,10 +108,22 @@ Reporting behavior:
   - `passed_on_rerun`
   - `failed_after_retries`
 
+## Manual Deflake Flow
+
+1. Critical flow or an earlier manual deflake run uploads `android-ui-test-deflake-input`.
+2. A user copies the `manual deflake id` from the workflow summary.
+3. The manual deflake workflow downloads the artifact for that selected run.
+4. The workflow validates `metadata.json` and `failed-tests.txt`.
+5. Only the leftover failed tests are executed.
+6. A fresh Allure report is published for that manual deflake run.
+7. A fresh deflake artifact is uploaded again for the next round if needed.
+
 ## Python Helpers
 
 - `resolve_flavor.py`: parse `flavors.json` and export flavor-derived env vars.
-- `select_apks.py`: resolve NEW/OLD APK keys based on input/build selection rules.
+- `select_apks.py`: resolve new and old APK keys based on input and build-selection rules.
 - `fetch_secrets_json.py`: build runtime `secrets.json` from 1Password vault items.
-- `merge_allure_results.py`: merge per-device Allure outputs and attach metadata.
+- `merge_allure_results.py`: merge per-device Allure outputs and attach retry metadata.
 - `extract_failed_tests.py`: extract failed test IDs (`Class#method`) from one attempt's Allure result files.
+- `prepare_deflake_bundle.py`: build the standard deflake artifact and append the manual deflake ID summary.
+- `inspect_deflake_bundle.py`: validate a downloaded deflake artifact and expose its resolved values to later workflow steps.
diff --git a/scripts/qa_android_ui_tests/prepare_deflake_bundle.py b/scripts/qa_android_ui_tests/prepare_deflake_bundle.py
@@ -26,6 +26,15 @@ def env_int(name: str) -> int:
         return 0
 
 
+def append_summary(lines: list[str]) -> None:
+    # Write helper output into the GitHub Actions step summary when available.
+    summary_path = env("GITHUB_STEP_SUMMARY")
+    if not summary_path:
+        return
+    with open(summary_path, "a", encoding="utf-8") as summary_file:
+        summary_file.write("\n".join(lines) + "\n")
+
+
 def copy_test_list(src_value: str, dest: Path) -> None:
     src = Path(src_value) if src_value else None
     if src and src.is_file():
@@ -58,6 +67,7 @@ def copy_test_list(src_value: str, dest: Path) -> None:
 
 metadata = {
     "schema_version": 1,
+    "manual_deflake_id": env("SOURCE_RUN_ID"),
     "source_workflow_name": env("SOURCE_WORKFLOW_NAME"),
     "source_workflow_file": env("SOURCE_WORKFLOW_FILE"),
     "source_repository": env("SOURCE_REPOSITORY"),
@@ -97,3 +107,12 @@ def copy_test_list(src_value: str, dest: Path) -> None:
 
 copy_test_list(env("FINAL_FAILED_TESTS_FILE"), bundle_dir / "failed-tests.txt")
 copy_test_list(env("FIRST_FAILED_TESTS_FILE"), bundle_dir / "failed-tests-first-attempt.txt")
+
+# Show the GitHub Actions run id in the summary so it can be copied directly
+# into a later manual deflake run.
+append_summary(
+    [
+        "### Manual Deflake",
+        f"- manual deflake id: {metadata['manual_deflake_id']}",
+    ]
+)
diff --git a/tests/testsCore/src/androidTest/kotlin/com/wire/android/tests/core/tests/ApplockTest.kt b/tests/testsCore/src/androidTest/kotlin/com/wire/android/tests/core/tests/ApplockTest.kt
@@ -107,7 +107,7 @@ class ApplockTest : BaseUiTest() {
             pages.registrationPage.apply {
                 waitUntilLoginFlowIsCompleted()
                 clickAllowNotificationButton()
-                // clickAgreeShareDataAlert()
+                clickAgreeShareDataAlert()
                 assertConversationPageVisible()
             }
         }