From f41fda9bd9373b0ff4a6a617d40621f924c9407b Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 13 Nov 2025 13:05:26 -0800 Subject: [PATCH 01/10] reset profile: only add previous origins if actually using previous browser, and not resetting --- backend/btrixcloud/profiles.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py index 72c5203040..fa64f8110c 100644 --- a/backend/btrixcloud/profiles.py +++ b/backend/btrixcloud/profiles.py @@ -270,7 +270,9 @@ async def do_commit_to_profile( existing_profile.resource.size if existing_profile.resource else 0 ) - origins = existing_profile.origins + # only set origins from existing profile if browser actually launched with that profile + if metadata.baseprofile == profileid: + origins = existing_profile.origins else: profileid = metadata.profileid From 764a0ed602d7852cb32242a36fd9bb2d14a52849 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 13 Nov 2025 14:33:05 -0800 Subject: [PATCH 02/10] tests: add test for origins kept for existing profile, but not overridden profile --- backend/btrixcloud/profiles.py | 3 ++- backend/test/conftest.py | 22 ++++++++++++++--- backend/test/test_profiles.py | 45 ++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+), 5 deletions(-) diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py index fa64f8110c..1a0d7e0935 100644 --- a/backend/btrixcloud/profiles.py +++ b/backend/btrixcloud/profiles.py @@ -270,7 +270,8 @@ async def do_commit_to_profile( existing_profile.resource.size if existing_profile.resource else 0 ) - # only set origins from existing profile if browser actually launched with that profile + # only set origins from existing profile if browser + # actually launched with that profile (eg. not a reset) if metadata.baseprofile == profileid: origins = existing_profile.origins diff --git a/backend/test/conftest.py b/backend/test/conftest.py index 72cff12467..8fc4674437 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -650,8 +650,10 @@ def profile_browser_2_id(admin_auth_headers, default_org_id): @pytest.fixture(scope="session") -def profile_browser_3_id(admin_auth_headers, default_org_id): - return _create_profile_browser(admin_auth_headers, default_org_id) +def profile_browser_3_id(admin_auth_headers, default_org_id, profile_id): + return _create_profile_browser( + admin_auth_headers, default_org_id, baseprofile=profile_id + ) @pytest.fixture(scope="session") @@ -659,13 +661,25 @@ def profile_browser_4_id(admin_auth_headers, default_org_id): return _create_profile_browser(admin_auth_headers, default_org_id) +@pytest.fixture(scope="session") +def profile_browser_5_id(admin_auth_headers, default_org_id): + return _create_profile_browser(admin_auth_headers, default_org_id) + + def _create_profile_browser( - headers: Dict[str, str], oid: UUID, url: str = "https://old.webrecorder.net" + headers: Dict[str, str], + oid: UUID, + url: str = "https://old.webrecorder.net", + baseprofile="", ): + data = {"url": url} + if baseprofile: + data["profileId"] = baseprofile + r = requests.post( f"{API_PREFIX}/orgs/{oid}/profiles/browser", headers=headers, - json={"url": url}, + json=data, ) assert r.status_code == 200 browser_id = r.json()["browserid"] diff --git a/backend/test/test_profiles.py b/backend/test/test_profiles.py index 9dcff944f4..9ad4bb21e2 100644 --- a/backend/test/test_profiles.py +++ b/backend/test/test_profiles.py @@ -315,6 +315,51 @@ def test_commit_browser_to_existing_profile( ] +def test_commit_reset_browser_to_existing_profile( + admin_auth_headers, default_org_id, profile_browser_5_id, profile_id +): + prepare_browser_for_profile_commit( + profile_browser_5_id, + admin_auth_headers, + default_org_id, + url="https://example-com.webrecorder.net", + ) + + time.sleep(10) + + # Commit new browser to existing profile + while True: + r = requests.patch( + f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}", + headers=admin_auth_headers, + json={ + "browserid": profile_browser_5_id, + "name": PROFILE_NAME_UPDATED, + "description": PROFILE_DESC_UPDATED, + "tags": PROFILE_TAGS_UPDATED, + }, + ) + assert r.status_code == 200 + if r.json().get("detail") == "waiting_for_browser": + time.sleep(5) + continue + + break + + assert r.json()["updated"] + + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + + assert data.get("origins") == [ + "https://example-com.webrecorder.net", + ] + + @pytest.mark.parametrize( "sort_by,sort_direction,profile_1_index,profile_2_index", [ From 9dfa6f9a5c102fcd98ee13a8d68b03b4642b6bd4 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 13 Nov 2025 15:18:34 -0800 Subject: [PATCH 03/10] cleanup, attempt fix --- backend/btrixcloud/profiles.py | 2 +- backend/test/conftest.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py index 1a0d7e0935..62adb5a196 100644 --- a/backend/btrixcloud/profiles.py +++ b/backend/btrixcloud/profiles.py @@ -272,7 +272,7 @@ async def do_commit_to_profile( # only set origins from existing profile if browser # actually launched with that profile (eg. not a reset) - if metadata.baseprofile == profileid: + if profileid and metadata.baseprofile == profileid: origins = existing_profile.origins else: diff --git a/backend/test/conftest.py b/backend/test/conftest.py index 8fc4674437..0244506f52 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -669,7 +669,7 @@ def profile_browser_5_id(admin_auth_headers, default_org_id): def _create_profile_browser( headers: Dict[str, str], oid: UUID, - url: str = "https://old.webrecorder.net", + url="https://old.webrecorder.net", baseprofile="", ): data = {"url": url} From ec70bd357ce0448420fac83840910672c280b8bd Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 13 Nov 2025 16:01:32 -0800 Subject: [PATCH 04/10] tests: remove some fixtures, call create_profile_browser() directly --- backend/test/conftest.py | 23 +++-------------------- backend/test/test_profiles.py | 33 ++++++++++++++++++++------------- 2 files changed, 23 insertions(+), 33 deletions(-) diff --git a/backend/test/conftest.py b/backend/test/conftest.py index 0244506f52..6f8c08aa84 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -639,34 +639,17 @@ def url_list_config_id(crawler_auth_headers, default_org_id): @pytest.fixture(scope="session") def profile_browser_id(admin_auth_headers, default_org_id): - return _create_profile_browser(admin_auth_headers, default_org_id) + return create_profile_browser(admin_auth_headers, default_org_id) @pytest.fixture(scope="session") def profile_browser_2_id(admin_auth_headers, default_org_id): - return _create_profile_browser( + return create_profile_browser( admin_auth_headers, default_org_id, "https://specs.webrecorder.net" ) -@pytest.fixture(scope="session") -def profile_browser_3_id(admin_auth_headers, default_org_id, profile_id): - return _create_profile_browser( - admin_auth_headers, default_org_id, baseprofile=profile_id - ) - - -@pytest.fixture(scope="session") -def profile_browser_4_id(admin_auth_headers, default_org_id): - return _create_profile_browser(admin_auth_headers, default_org_id) - - -@pytest.fixture(scope="session") -def profile_browser_5_id(admin_auth_headers, default_org_id): - return _create_profile_browser(admin_auth_headers, default_org_id) - - -def _create_profile_browser( +def create_profile_browser( headers: Dict[str, str], oid: UUID, url="https://old.webrecorder.net", diff --git a/backend/test/test_profiles.py b/backend/test/test_profiles.py index 9ad4bb21e2..862c1c25ec 100644 --- a/backend/test/test_profiles.py +++ b/backend/test/test_profiles.py @@ -17,6 +17,7 @@ PROFILE_2_TAGS, PROFILE_TAGS_UPDATED, prepare_browser_for_profile_commit, + create_profile_browser, ) @@ -253,7 +254,7 @@ def test_update_profile_metadata(crawler_auth_headers, default_org_id, profile_i def test_commit_browser_to_existing_profile( - admin_auth_headers, default_org_id, profile_browser_3_id, profile_id + admin_auth_headers, default_org_id, profile_id ): # Get original modified time r = requests.get( @@ -265,8 +266,13 @@ def test_commit_browser_to_existing_profile( original_created = data["created"] original_modified = data["modified"] + # create browser with existing profile + browser_id = create_profile_browser( + admin_auth_headers, default_org_id, baseprofile=profile_id + ) + prepare_browser_for_profile_commit( - profile_browser_3_id, + browser_id, admin_auth_headers, default_org_id, url="https://example-com.webrecorder.net", @@ -280,7 +286,7 @@ def test_commit_browser_to_existing_profile( f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}", headers=admin_auth_headers, json={ - "browserid": profile_browser_3_id, + "browserid": browser_id, "name": PROFILE_NAME_UPDATED, "description": PROFILE_DESC_UPDATED, "tags": PROFILE_TAGS_UPDATED, @@ -316,10 +322,13 @@ def test_commit_browser_to_existing_profile( def test_commit_reset_browser_to_existing_profile( - admin_auth_headers, default_org_id, profile_browser_5_id, profile_id + admin_auth_headers, default_org_id, profile_id ): + # create new browser w/o existing profile to reset + browser_id = create_profile_browser(admin_auth_headers, default_org_id) + prepare_browser_for_profile_commit( - profile_browser_5_id, + browser_id, admin_auth_headers, default_org_id, url="https://example-com.webrecorder.net", @@ -333,7 +342,7 @@ def test_commit_reset_browser_to_existing_profile( f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}", headers=admin_auth_headers, json={ - "browserid": profile_browser_5_id, + "browserid": browser_id, "name": PROFILE_NAME_UPDATED, "description": PROFILE_DESC_UPDATED, "tags": PROFILE_TAGS_UPDATED, @@ -464,9 +473,7 @@ def test_delete_profile(admin_auth_headers, default_org_id, profile_2_id): assert r.json()["detail"] == "profile_not_found" -def test_create_profile_read_only_org( - admin_auth_headers, default_org_id, profile_browser_4_id -): +def test_create_profile_read_only_org(admin_auth_headers, default_org_id): # Set org to read-only r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/read-only", @@ -475,9 +482,9 @@ def test_create_profile_read_only_org( ) assert r.json()["updated"] - prepare_browser_for_profile_commit( - profile_browser_4_id, admin_auth_headers, default_org_id - ) + browser_id = create_profile_browser(admin_auth_headers, default_org_id) + + prepare_browser_for_profile_commit(browser_id, admin_auth_headers, default_org_id) # Try to create profile, verify we get 403 forbidden start_time = time.monotonic() @@ -488,7 +495,7 @@ def test_create_profile_read_only_org( f"{API_PREFIX}/orgs/{default_org_id}/profiles", headers=admin_auth_headers, json={ - "browserid": profile_browser_4_id, + "browserid": browser_id, "name": "uncreatable", "description": "because org is read-only", }, From 3dbc5360c632b2e4674cd76d09ef2a9797d5a5f8 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 13 Nov 2025 18:22:03 -0800 Subject: [PATCH 05/10] tests: fix tests to not navigate to extra url if not needed --- backend/btrixcloud/profiles.py | 2 +- backend/test/conftest.py | 32 ++++++++++++++++++-------------- backend/test/test_profiles.py | 17 ++++++++++------- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/backend/btrixcloud/profiles.py b/backend/btrixcloud/profiles.py index 62adb5a196..1a0d7e0935 100644 --- a/backend/btrixcloud/profiles.py +++ b/backend/btrixcloud/profiles.py @@ -272,7 +272,7 @@ async def do_commit_to_profile( # only set origins from existing profile if browser # actually launched with that profile (eg. not a reset) - if profileid and metadata.baseprofile == profileid: + if metadata.baseprofile == profileid: origins = existing_profile.origins else: diff --git a/backend/test/conftest.py b/backend/test/conftest.py index 6f8c08aa84..369afd4ed6 100644 --- a/backend/test/conftest.py +++ b/backend/test/conftest.py @@ -708,10 +708,7 @@ def echo_server(): def prepare_browser_for_profile_commit( - browser_id: str, - headers: Dict[str, str], - oid: UUID, - url="https://old.webrecorder.net/tools", + browser_id: str, headers: Dict[str, str], oid: UUID, url=None ) -> None: # Ping to make sure it doesn't expire r = requests.post( @@ -737,14 +734,15 @@ def prepare_browser_for_profile_commit( assert data["scale"] assert data["oid"] == oid - # Navigate to new URL - r = requests.post( - f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/navigate", - headers=headers, - json={"url": url}, - ) - assert r.status_code == 200 - assert r.json()["success"] + # Navigate to new URL, if provided + if url: + r = requests.post( + f"{API_PREFIX}/orgs/{oid}/profiles/browser/{browser_id}/navigate", + headers=headers, + json={"url": url}, + ) + assert r.status_code == 200 + assert r.json()["success"] # Ping browser until ready max_attempts = 20 @@ -767,7 +765,10 @@ def prepare_browser_for_profile_commit( @pytest.fixture(scope="session") def profile_id(admin_auth_headers, default_org_id, profile_browser_id): prepare_browser_for_profile_commit( - profile_browser_id, admin_auth_headers, default_org_id + profile_browser_id, + admin_auth_headers, + default_org_id, + url="https://old.webrecorder.net/tools", ) # Create profile @@ -859,7 +860,10 @@ def profile_config_id(admin_auth_headers, default_org_id, profile_id): @pytest.fixture(scope="session") def profile_2_id(admin_auth_headers, default_org_id, profile_browser_2_id): prepare_browser_for_profile_commit( - profile_browser_2_id, admin_auth_headers, default_org_id + profile_browser_2_id, + admin_auth_headers, + default_org_id, + url="https://old.webrecorder.net/tools", ) # Create profile diff --git a/backend/test/test_profiles.py b/backend/test/test_profiles.py index 862c1c25ec..f82771cf96 100644 --- a/backend/test/test_profiles.py +++ b/backend/test/test_profiles.py @@ -266,16 +266,20 @@ def test_commit_browser_to_existing_profile( original_created = data["created"] original_modified = data["modified"] + url = "https://example-com.webrecorder.net/" + # create browser with existing profile browser_id = create_profile_browser( - admin_auth_headers, default_org_id, baseprofile=profile_id + admin_auth_headers, + default_org_id, + url=url, + baseprofile=profile_id, ) prepare_browser_for_profile_commit( browser_id, admin_auth_headers, default_org_id, - url="https://example-com.webrecorder.net", ) time.sleep(10) @@ -324,14 +328,13 @@ def test_commit_browser_to_existing_profile( def test_commit_reset_browser_to_existing_profile( admin_auth_headers, default_org_id, profile_id ): + url = "https://example-com.webrecorder.net/" + # create new browser w/o existing profile to reset - browser_id = create_profile_browser(admin_auth_headers, default_org_id) + browser_id = create_profile_browser(admin_auth_headers, default_org_id, url=url) prepare_browser_for_profile_commit( - browser_id, - admin_auth_headers, - default_org_id, - url="https://example-com.webrecorder.net", + browser_id, admin_auth_headers, default_org_id, url=url ) time.sleep(10) From 0d546caef627464995f8280ed88a224b57f1512a Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Thu, 13 Nov 2025 19:47:11 -0800 Subject: [PATCH 06/10] tests: add tests for modifiedCrawlCid, modifiedCrawlId, modifiedCrawlDate for profiles after crawl finishes --- backend/test/test_run_crawl.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index f7807be143..84e44a1bd4 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -26,6 +26,7 @@ # newly started crawl for this test suite # (not using the fixture to be able to test running crawl) admin_crawl_id = None +admin_config_id = None seed_file_crawl_id = None @@ -89,6 +90,9 @@ def test_start_crawl(admin_auth_headers, default_org_id, profile_id): global admin_crawl_id admin_crawl_id = data["run_now_job"] + global admin_config_id + admin_config_id = data["id"] + def test_wait_for_running(admin_auth_headers, default_org_id): while True: @@ -363,6 +367,25 @@ def test_crawls_include_file_error_page_counts(admin_auth_headers, default_org_i assert data["errorPageCount"] >= 0 +def test_profile_updated_by_crawl(admin_auth_headers, default_org_id, profile_id): + r = requests.get( + f"{API_PREFIX}/orgs/{default_org_id}/profiles/{profile_id}", + headers=admin_auth_headers, + ) + assert r.status_code == 200 + data = r.json() + assert data["id"] == profile_id + assert data["oid"] == default_org_id + + assert data["modifiedCrawlId"] == admin_crawl_id + assert data["modifiedCrawlCid"] == admin_config_id + + assert data["modifiedCrawlDate"] >= data["modified"] + + assert data["createdByName"] == "admin" + assert data["modifiedByName"] == "admin" + + def test_download_wacz(): r = requests.get(HOST_PREFIX + wacz_path) assert r.status_code == 200 From 6f25e2c9eee9b74d5034c88e6c7b8b143a47c913 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 17 Nov 2025 12:12:16 -0800 Subject: [PATCH 07/10] test: rename globals to avoid any conflicts with existing fixtures --- backend/test/test_run_crawl.py | 102 ++++++++++++++++----------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 84e44a1bd4..5275cbe4fc 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -25,8 +25,8 @@ # newly started crawl for this test suite # (not using the fixture to be able to test running crawl) -admin_crawl_id = None -admin_config_id = None +curr_admin_crawl_id = None +curr_admin_config_id = None seed_file_crawl_id = None @@ -87,17 +87,17 @@ def test_start_crawl(admin_auth_headers, default_org_id, profile_id): ) data = r.json() - global admin_crawl_id - admin_crawl_id = data["run_now_job"] + global curr_admin_crawl_id + curr_admin_crawl_id = data["run_now_job"] - global admin_config_id - admin_config_id = data["id"] + global curr_admin_config_id + curr_admin_config_id = data["id"] def test_wait_for_running(admin_auth_headers, default_org_id): while True: r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/replay.json", headers=admin_auth_headers, ) data = r.json() @@ -109,14 +109,14 @@ def test_wait_for_running(admin_auth_headers, default_org_id): def test_crawl_queue(admin_auth_headers, default_org_id): # 422 - requires offset and count r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/queue", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/queue", headers=admin_auth_headers, ) assert r.status_code == 422 while True: r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/queue?offset=0&count=20", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/queue?offset=0&count=20", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -131,13 +131,13 @@ def test_crawl_queue(admin_auth_headers, default_org_id): def test_crawl_queue_match(admin_auth_headers, default_org_id): # 422, regex required r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/queueMatchAll", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/queueMatchAll", headers=admin_auth_headers, ) assert r.status_code == 422 r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/queueMatchAll?regex=webrecorder&offset=0", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/queueMatchAll?regex=webrecorder&offset=0", headers=admin_auth_headers, ) @@ -149,7 +149,7 @@ def test_crawl_queue_match(admin_auth_headers, default_org_id): def test_add_exclusion(admin_auth_headers, default_org_id): r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/exclusions?regex=test", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/exclusions?regex=test", headers=admin_auth_headers, ) assert r.json()["success"] == True @@ -157,7 +157,7 @@ def test_add_exclusion(admin_auth_headers, default_org_id): def test_add_invalid_exclusion(admin_auth_headers, default_org_id): r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/exclusions?regex=[", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/exclusions?regex=[", headers=admin_auth_headers, ) assert r.status_code == 400 @@ -166,7 +166,7 @@ def test_add_invalid_exclusion(admin_auth_headers, default_org_id): def test_remove_exclusion(admin_auth_headers, default_org_id): r = requests.delete( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/exclusions?regex=test", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/exclusions?regex=test", headers=admin_auth_headers, ) assert r.json()["success"] == True @@ -177,7 +177,7 @@ def test_wait_for_complete(admin_auth_headers, default_org_id): data = None while True: r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/replay.json", headers=admin_auth_headers, ) data = r.json() @@ -193,7 +193,7 @@ def test_wait_for_complete(admin_auth_headers, default_org_id): assert len(data["initialPages"]) == 4 assert data["pagesQueryUrl"].endswith( - f"/orgs/{default_org_id}/crawls/{admin_crawl_id}/pagesSearch" + f"/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/pagesSearch" ) assert data["downloadUrl"] is None @@ -215,21 +215,21 @@ def test_queue_and_exclusions_error_crawl_not_running( admin_auth_headers, default_org_id ): r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/queue?offset=0&count=20", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/queue?offset=0&count=20", headers=admin_auth_headers, ) assert r.status_code == 400 assert r.json()["detail"] == "crawl_not_running" r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/queueMatchAll?regex=webrecorder&offset=0", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/queueMatchAll?regex=webrecorder&offset=0", headers=admin_auth_headers, ) assert r.status_code == 400 assert r.json()["detail"] == "crawl_not_running" r = requests.post( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/exclusions?regex=test2", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/exclusions?regex=test2", headers=admin_auth_headers, ) assert r.status_code == 400 @@ -238,7 +238,7 @@ def test_queue_and_exclusions_error_crawl_not_running( def test_crawl_info(admin_auth_headers, default_org_id): r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) data = r.json() @@ -252,7 +252,7 @@ def test_crawl_info(admin_auth_headers, default_org_id): def test_crawls_include_seed_info(admin_auth_headers, default_org_id): r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) data = r.json() @@ -284,7 +284,7 @@ def test_crawls_include_seed_info(admin_auth_headers, default_org_id): def test_crawl_seeds_endpoint(admin_auth_headers, default_org_id): r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/seeds", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/seeds", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -298,7 +298,7 @@ def test_crawl_seeds_endpoint(admin_auth_headers, default_org_id): def test_crawls_exclude_errors(admin_auth_headers, default_org_id): # Get endpoint r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -307,7 +307,7 @@ def test_crawls_exclude_errors(admin_auth_headers, default_org_id): # replay.json endpoint r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -328,7 +328,7 @@ def test_crawls_exclude_errors(admin_auth_headers, default_org_id): def test_crawls_exclude_full_seeds(admin_auth_headers, default_org_id): # Get endpoint r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -338,7 +338,7 @@ def test_crawls_exclude_full_seeds(admin_auth_headers, default_org_id): # replay.json endpoint r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/replay.json", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -359,7 +359,7 @@ def test_crawls_exclude_full_seeds(admin_auth_headers, default_org_id): def test_crawls_include_file_error_page_counts(admin_auth_headers, default_org_id): r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}/replay.json", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}/replay.json", headers=admin_auth_headers, ) data = r.json() @@ -377,8 +377,8 @@ def test_profile_updated_by_crawl(admin_auth_headers, default_org_id, profile_id assert data["id"] == profile_id assert data["oid"] == default_org_id - assert data["modifiedCrawlId"] == admin_crawl_id - assert data["modifiedCrawlCid"] == admin_config_id + assert data["modifiedCrawlId"] == curr_admin_crawl_id + assert data["modifiedCrawlCid"] == curr_admin_config_id assert data["modifiedCrawlDate"] >= data["modified"] @@ -430,11 +430,11 @@ def test_verify_wacz(): ], ) def test_download_wacz_crawls( - admin_auth_headers, default_org_id, admin_crawl_id, type_path + admin_auth_headers, default_org_id, curr_admin_crawl_id, type_path ): with TemporaryFile() as fh: with requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/{type_path}/{admin_crawl_id}/download", + f"{API_PREFIX}/orgs/{default_org_id}/{type_path}/{curr_admin_crawl_id}/download", headers=admin_auth_headers, stream=True, ) as r: @@ -471,11 +471,11 @@ def test_download_wacz_crawls( ], ) def test_download_wacz_crawls_as_single_wacz( - admin_auth_headers, default_org_id, admin_crawl_id, type_path + admin_auth_headers, default_org_id, curr_admin_crawl_id, type_path ): with TemporaryFile() as fh: with requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/{type_path}/{admin_crawl_id}/download?preferSingleWACZ=true", + f"{API_PREFIX}/orgs/{default_org_id}/{type_path}/{curr_admin_crawl_id}/download?preferSingleWACZ=true", headers=admin_auth_headers, stream=True, ) as r: @@ -527,10 +527,10 @@ def test_download_wacz_crawls_as_single_wacz( def test_update_crawl( admin_auth_headers, default_org_id, - admin_crawl_id, + curr_admin_crawl_id, ): r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -552,7 +552,7 @@ def test_update_crawl( UPDATED_NAME = "Updated crawl name" UPDATED_COLLECTION_IDS = [new_coll_id] r = requests.patch( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, json={ "tags": UPDATED_TAGS, @@ -567,7 +567,7 @@ def test_update_crawl( # Verify update was successful r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -580,7 +580,7 @@ def test_update_crawl( # Update reviewStatus and verify r = requests.patch( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, json={ "reviewStatus": 5, @@ -591,7 +591,7 @@ def test_update_crawl( assert data["updated"] r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -604,7 +604,7 @@ def test_update_crawl( ) assert r.status_code == 200 crawls = r.json()["items"] - assert crawls[0]["id"] == admin_crawl_id + assert crawls[0]["id"] == curr_admin_crawl_id assert crawls[0]["reviewStatus"] == 5 r = requests.get( @@ -613,7 +613,7 @@ def test_update_crawl( ) assert r.status_code == 200 crawls = r.json()["items"] - assert crawls[-1]["id"] == admin_crawl_id + assert crawls[-1]["id"] == curr_admin_crawl_id assert crawls[-1]["reviewStatus"] == 5 # Test sorting on reviewStatus for all-crawls @@ -623,7 +623,7 @@ def test_update_crawl( ) assert r.status_code == 200 crawls = r.json()["items"] - assert crawls[0]["id"] == admin_crawl_id + assert crawls[0]["id"] == curr_admin_crawl_id assert crawls[0]["reviewStatus"] == 5 r = requests.get( @@ -632,12 +632,12 @@ def test_update_crawl( ) assert r.status_code == 200 crawls = r.json()["items"] - assert crawls[-1]["id"] == admin_crawl_id + assert crawls[-1]["id"] == curr_admin_crawl_id assert crawls[-1]["reviewStatus"] == 5 # Try to update to invalid reviewStatus r = requests.patch( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, json={ "reviewStatus": "invalid", @@ -646,7 +646,7 @@ def test_update_crawl( assert r.status_code == 422 r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -654,14 +654,14 @@ def test_update_crawl( # Verify deleting works as well r = requests.patch( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, json={"tags": [], "description": None}, ) assert r.status_code == 200 r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 200 @@ -1292,7 +1292,7 @@ def test_delete_crawls_crawler(crawler_auth_headers, default_org_id, crawler_cra r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/crawls/delete", headers=crawler_auth_headers, - json={"crawl_ids": [admin_crawl_id]}, + json={"crawl_ids": [curr_admin_crawl_id]}, ) assert r.status_code == 403 data = r.json() @@ -1356,7 +1356,7 @@ def test_delete_crawls_org_owner( admin_auth_headers, crawler_auth_headers, default_org_id, - admin_crawl_id, + curr_admin_crawl_id, crawler_crawl_id, wr_specs_crawl_id, ): @@ -1364,7 +1364,7 @@ def test_delete_crawls_org_owner( r = requests.post( f"{API_PREFIX}/orgs/{default_org_id}/crawls/delete", headers=admin_auth_headers, - json={"crawl_ids": [admin_crawl_id]}, + json={"crawl_ids": [curr_admin_crawl_id]}, ) assert r.status_code == 200 data = r.json() @@ -1372,7 +1372,7 @@ def test_delete_crawls_org_owner( assert data["storageQuotaReached"] is False r = requests.get( - f"{API_PREFIX}/orgs/{default_org_id}/crawls/{admin_crawl_id}", + f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", headers=admin_auth_headers, ) assert r.status_code == 404 From 561ab3554d0cc373a838ab4ed2ee39b94abcb5f2 Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 17 Nov 2025 13:30:02 -0800 Subject: [PATCH 08/10] don't try to use 'curr_admin_crawl_id' as a fixture --- backend/test/test_run_crawl.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 5275cbe4fc..ea27f35da0 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -430,7 +430,7 @@ def test_verify_wacz(): ], ) def test_download_wacz_crawls( - admin_auth_headers, default_org_id, curr_admin_crawl_id, type_path + admin_auth_headers, default_org_id, type_path ): with TemporaryFile() as fh: with requests.get( @@ -471,7 +471,7 @@ def test_download_wacz_crawls( ], ) def test_download_wacz_crawls_as_single_wacz( - admin_auth_headers, default_org_id, curr_admin_crawl_id, type_path + admin_auth_headers, default_org_id, type_path ): with TemporaryFile() as fh: with requests.get( From d73c0e4bbda8578eccac688b0c253d63ae82d34a Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 17 Nov 2025 13:58:43 -0800 Subject: [PATCH 09/10] fix remaining tests --- backend/test/test_run_crawl.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index ea27f35da0..1533702a5f 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -527,7 +527,6 @@ def test_download_wacz_crawls_as_single_wacz( def test_update_crawl( admin_auth_headers, default_org_id, - curr_admin_crawl_id, ): r = requests.get( f"{API_PREFIX}/orgs/{default_org_id}/crawls/{curr_admin_crawl_id}", @@ -1356,7 +1355,6 @@ def test_delete_crawls_org_owner( admin_auth_headers, crawler_auth_headers, default_org_id, - curr_admin_crawl_id, crawler_crawl_id, wr_specs_crawl_id, ): From b1bdd9bdf0322198a42063dbc1e9ad87980200bb Mon Sep 17 00:00:00 2001 From: Ilya Kreymer Date: Mon, 17 Nov 2025 14:32:08 -0800 Subject: [PATCH 10/10] test: 0 collections to start on new crawl --- backend/test/test_run_crawl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/test/test_run_crawl.py b/backend/test/test_run_crawl.py index 1533702a5f..8668873cb0 100644 --- a/backend/test/test_run_crawl.py +++ b/backend/test/test_run_crawl.py @@ -535,7 +535,7 @@ def test_update_crawl( assert r.status_code == 200 data = r.json() assert sorted(data["tags"]) == ["wr-test-1", "wr-test-2"] - assert len(data["collectionIds"]) == 1 + assert len(data["collectionIds"]) == 0 # Make new collection r = requests.post(