webrecorder · ikreymer · Apr 12, 2023 · Apr 11, 2023
diff --git a/backend/test_nightly/conftest.py b/backend/test_nightly/conftest.py
@@ -203,3 +203,25 @@ def large_crawl_finished(admin_auth_headers, default_org_id, large_crawl_id):
             time.sleep(30)
             break
         time.sleep(5)
+
+
+@pytest.fixture(scope="session")
+def timeout_crawl(admin_auth_headers, default_org_id):
+    # Start crawl
+    crawl_data = {
+        "runNow": True,
+        "name": "Crawl with crawl timeout",
+        "crawlTimeout": 30,
+        "config": {
+            "seeds": [{"url": "https://webrecorder.net/"}],
+            "scopeType": "domain",
+            "limit": 100,
+        },
+    }
+    r = requests.post(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawlconfigs/",
+        headers=admin_auth_headers,
+        json=crawl_data,
+    )
+    data = r.json()
+    return data["run_now_job"]
diff --git a/backend/test_nightly/test_crawl_timeout.py b/backend/test_nightly/test_crawl_timeout.py
@@ -0,0 +1,27 @@
+import requests
+import time
+
+from .conftest import API_PREFIX
+
+
+def test_crawl_timeout(admin_auth_headers, default_org_id, timeout_crawl):
+    # Verify that crawl has started
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["state"] in ("starting", "running")
+
+    # Wait some time to let crawl start, hit timeout, and gracefully stop
+    time.sleep(180)
+
+    # Verify crawl was stopped
+    r = requests.get(
+        f"{API_PREFIX}/orgs/{default_org_id}/crawls/{timeout_crawl}/replay.json",
+        headers=admin_auth_headers,
+    )
+    assert r.status_code == 200
+    data = r.json()
+    assert data["state"] == "partial_complete"