From f01418a104ae96168487612935653cc5111f18d0 Mon Sep 17 00:00:00 2001 From: Arpan Mukherjee Date: Tue, 26 May 2026 15:17:25 +0530 Subject: [PATCH] fix: chunk bulk delete tasks to prevent SQLite variable limits --- backend/secuscan/models.py | 11 +- backend/secuscan/routes.py | 62 +++-- .../unit/test_bulk_delete_sqlite_limit.py | 252 ++++++++++++++++++ 3 files changed, 305 insertions(+), 20 deletions(-) create mode 100644 testing/backend/unit/test_bulk_delete_sqlite_limit.py diff --git a/backend/secuscan/models.py b/backend/secuscan/models.py index 264363e5..845ee2e1 100644 --- a/backend/secuscan/models.py +++ b/backend/secuscan/models.py @@ -2,12 +2,14 @@ Pydantic models for API requests and responses """ -from typing import Optional, Dict, Any, List +from typing import Optional, Dict, Any, List, Annotated from datetime import datetime -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, RootModel from enum import Enum +MAX_BULK_DELETE = 500 + class SafetyLevel(str, Enum): """Plugin safety level classification""" SAFE = "safe" @@ -161,3 +163,8 @@ class ErrorResponse(BaseModel): message: str field: Optional[str] = None details: Optional[Dict[str, Any]] = None + + +class BulkDeleteRequest(RootModel[Annotated[List[str], Field(max_length=MAX_BULK_DELETE)]]): + """Accepts a JSON array of task IDs directly. Max 500 per request.""" + pass \ No newline at end of file diff --git a/backend/secuscan/routes.py b/backend/secuscan/routes.py index f1d53063..db1580f4 100644 --- a/backend/secuscan/routes.py +++ b/backend/secuscan/routes.py @@ -65,7 +65,7 @@ def build_report_filename(task: Dict[str, Any], extension: str) -> str: from .cache import get_cache from .models import ( TaskCreateRequest, TaskResponse, TaskResult, - PluginListResponse, ErrorResponse + PluginListResponse, ErrorResponse, BulkDeleteRequest ) from .config import settings from .database import get_db @@ -744,28 +744,47 @@ def build_page_url(page_num): "per_page": per_page, "total_pages": total_pages, "total_items": total, - "next": build_page_url(next_page), # ← NEW - "previous": build_page_url(prev_page) # ← NEW + "next": build_page_url(next_page), + "previous": build_page_url(prev_page) } } +SQLITE_CHUNK_SIZE = 500 # safely under SQLITE_LIMIT_VARIABLE_NUMBER = 999 + async def delete_task_records(task_ids: List[str]): - """Helper to delete database records and files for multiple tasks.""" + """Helper to delete database records and files for multiple tasks. + + Processes IDs in chunks of SQLITE_CHUNK_SIZE to stay under + SQLite's SQLITE_LIMIT_VARIABLE_NUMBER = 999 limit. + """ + if not task_ids: + return + db = await get_db() - # Get raw output paths for file cleanup - placeholders = ",".join(["?"] * len(task_ids)) - task_rows = await db.fetchall(f"SELECT raw_output_path FROM tasks WHERE id IN ({placeholders})", tuple(task_ids)) + # Collect all raw_output_paths across chunks for file cleanup + all_task_rows = [] + for i in range(0, len(task_ids), SQLITE_CHUNK_SIZE): + chunk = task_ids[i : i + SQLITE_CHUNK_SIZE] + placeholders = ",".join(["?"] * len(chunk)) + rows = await db.fetchall( + f"SELECT raw_output_path FROM tasks WHERE id IN ({placeholders})", + tuple(chunk) + ) + all_task_rows.extend(rows) - # Delete associated data - await db.execute(f"DELETE FROM findings WHERE task_id IN ({placeholders})", tuple(task_ids)) - await db.execute(f"DELETE FROM reports WHERE task_id IN ({placeholders})", tuple(task_ids)) - await db.execute(f"DELETE FROM audit_log WHERE task_id IN ({placeholders})", tuple(task_ids)) - await db.execute(f"DELETE FROM tasks WHERE id IN ({placeholders})", tuple(task_ids)) + # Delete associated records in chunks + for i in range(0, len(task_ids), SQLITE_CHUNK_SIZE): + chunk = task_ids[i : i + SQLITE_CHUNK_SIZE] + placeholders = ",".join(["?"] * len(chunk)) + await db.execute(f"DELETE FROM findings WHERE task_id IN ({placeholders})", tuple(chunk)) + await db.execute(f"DELETE FROM reports WHERE task_id IN ({placeholders})", tuple(chunk)) + await db.execute(f"DELETE FROM audit_log WHERE task_id IN ({placeholders})", tuple(chunk)) + await db.execute(f"DELETE FROM tasks WHERE id IN ({placeholders})", tuple(chunk)) # Cleanup files on disk - for row in task_rows: + for row in all_task_rows: if row and row["raw_output_path"]: try: path = Path(row["raw_output_path"]) @@ -794,13 +813,21 @@ async def delete_task(task_id: str): @router.delete("/tasks/bulk") -async def bulk_delete_tasks(task_ids: List[str]): - """Delete multiple tasks at once""" +async def bulk_delete_tasks(request: BulkDeleteRequest): + """Delete multiple tasks at once (max 500 IDs per request)""" + task_ids = request.root # RootModel exposes data via .root db = await get_db() - # Check if any tasks are running + # Empty list — return early cleanly (test requires 200, not 422) + if not task_ids: + return {"deleted_count": 0, "success": True} + + # Check running tasks — safe: len(task_ids) <= 500 guaranteed by Pydantic placeholders = ",".join(["?"] * len(task_ids)) - running_tasks = await db.fetchone(f"SELECT id FROM tasks WHERE id IN ({placeholders}) AND status = 'running' LIMIT 1", tuple(task_ids)) + running_tasks = await db.fetchone( + f"SELECT id FROM tasks WHERE id IN ({placeholders}) AND status = 'running' LIMIT 1", + tuple(task_ids) + ) if running_tasks: raise HTTPException(status_code=400, detail="Cannot delete running tasks. Abort them first.") @@ -812,7 +839,6 @@ async def bulk_delete_tasks(task_ids: List[str]): "success": True } - @router.delete("/tasks/clear") async def clear_all_tasks(): """Wipe all scan history and associated data (findings, reports, assets, attack surface)""" diff --git a/testing/backend/unit/test_bulk_delete_sqlite_limit.py b/testing/backend/unit/test_bulk_delete_sqlite_limit.py new file mode 100644 index 00000000..f563aaf5 --- /dev/null +++ b/testing/backend/unit/test_bulk_delete_sqlite_limit.py @@ -0,0 +1,252 @@ +""" +testing/backend/unit/test_bulk_delete_sqlite_limit.py + +Regression tests for Issue #313 — DELETE /tasks/bulk SQLite variable-limit DoS. + +Covers the four cases requested in the PR review: + 1. Empty list → 200, deleted_count=0 + 2. 500 IDs → 200, accepted (at the limit) + 3. 501 IDs → 422, rejected (over the limit) + 4. delete_task_records() chunks correctly when given more than one chunk + (i.e. > SQLITE_CHUNK_SIZE IDs) — verifies the helper never builds a + placeholder string longer than SQLITE_LIMIT_VARIABLE_NUMBER = 999. +""" + +import uuid +from unittest.mock import AsyncMock, MagicMock, call, patch + +import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient +from pydantic import ValidationError + +from backend.secuscan.models import BulkDeleteRequest, MAX_BULK_DELETE +from backend.secuscan.routes import SQLITE_CHUNK_SIZE + +ENDPOINT = "/api/v1/tasks/bulk" + + +# Fixtures (mirrors test_task_cleanup.py) + +@pytest_asyncio.fixture +async def db_path(tmp_path): + return str(tmp_path / "test_secuscan.db") + + +@pytest_asyncio.fixture +async def app_client(db_path): + mock_executor = MagicMock() + mock_executor.cancel_task = AsyncMock(return_value=True) + mock_executor.get_task_status = AsyncMock(return_value={"status": "queued"}) + + with patch("backend.secuscan.routes.executor", mock_executor): + from backend.secuscan.main import app + from backend.secuscan import database as db_module + from backend.secuscan import cache as cache_module + + await cache_module.init_cache() + test_db = await db_module.init_db(db_path) + + async with AsyncClient( + transport=ASGITransport(app=app), base_url="http://test" + ) as client: + client._mock_executor = mock_executor + client._db = test_db + client._db_path = db_path + yield client + + await test_db.disconnect() + db_module.db = None + await cache_module.cache.disconnect() + cache_module.cache = None + + +async def insert_task(db, status: str = "completed") -> str: + task_id = str(uuid.uuid4()) + await db.execute( + "INSERT INTO tasks " + "(id, plugin_id, tool_name, target, status, inputs_json, consent_granted) " + "VALUES (?, 'nmap', 'nmap', '127.0.0.1', ?, '{}', 1)", + (task_id, status), + ) + return task_id + + +# 1. Unit tests — BulkDeleteRequest Pydantic model validation + +class TestBulkDeleteRequestModel: + """ + Validates that the Pydantic model enforces size limits before any + database code runs. No HTTP server needed. + """ + + def test_empty_list_is_valid(self): + """[] must parse successfully — endpoint handles it as a no-op.""" + req = BulkDeleteRequest([]) + assert req.root == [] + + def test_single_id_is_valid(self): + req = BulkDeleteRequest([str(uuid.uuid4())]) + assert len(req.root) == 1 + + def test_exactly_500_ids_is_valid(self): + """500 IDs is the documented limit — must be accepted.""" + ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE)] + req = BulkDeleteRequest(ids) + assert len(req.root) == MAX_BULK_DELETE + + def test_501_ids_raises_validation_error(self): + """501 IDs must be rejected by Pydantic before reaching any SQL.""" + ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE + 1)] + with pytest.raises(ValidationError) as exc_info: + BulkDeleteRequest(ids) + errors = exc_info.value.errors() + assert any(e["type"] in ("too_long", "value_error") for e in errors), ( + f"Expected a list-length validation error, got: {errors}" + ) + + def test_1000_ids_raises_validation_error(self): + """1000 IDs (the original crash threshold) must also be rejected.""" + ids = [str(uuid.uuid4()) for _ in range(1000)] + with pytest.raises(ValidationError): + BulkDeleteRequest(ids) + + +# 2. Integration — HTTP endpoint boundary tests + +class TestBulkDeleteEndpointLimits: + + @pytest.mark.asyncio + async def test_empty_list_returns_200(self, app_client): + """[] must return 200 with deleted_count=0 — not 422.""" + resp = await app_client.request("DELETE", ENDPOINT, json=[]) + assert resp.status_code == 200, resp.text + body = resp.json() + assert body["success"] is True + assert body["deleted_count"] == 0 + + @pytest.mark.asyncio + async def test_500_ids_accepted(self, app_client): + """500 IDs (at the limit) must be accepted with 200.""" + # Use random UUIDs — they won't exist in DB so deleted_count=0, + # but the endpoint must NOT reject the request with 422. + ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE)] + resp = await app_client.request("DELETE", ENDPOINT, json=ids) + assert resp.status_code == 200, ( + f"Expected 200 for {MAX_BULK_DELETE} IDs, got {resp.status_code}: {resp.text}" + ) + + @pytest.mark.asyncio + async def test_501_ids_rejected_with_422(self, app_client): + """501 IDs (one over the limit) must be rejected with 422.""" + ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE + 1)] + resp = await app_client.request("DELETE", ENDPOINT, json=ids) + assert resp.status_code == 422, ( + f"Expected 422 for {MAX_BULK_DELETE + 1} IDs, got {resp.status_code}: {resp.text}" + ) + + @pytest.mark.asyncio + async def test_response_shape_on_success(self, app_client): + """Success response must always include both 'success' and 'deleted_count'.""" + resp = await app_client.request("DELETE", ENDPOINT, json=[]) + body = resp.json() + assert "success" in body + assert "deleted_count" in body + + +# 3. Unit test — delete_task_records() chunking + +class TestDeleteTaskRecordsChunking: + """ + Verifies that delete_task_records() never passes more than + SQLITE_CHUNK_SIZE IDs in a single SQL statement. + + We mock db.execute and db.fetchall to capture the actual placeholder + strings produced, then assert that no call ever exceeds the chunk limit. + This is the core regression guard for the SQLite variable-number crash. + """ + + @pytest.mark.asyncio + async def test_single_chunk_does_not_exceed_sqlite_limit(self): + """ + For a list of exactly SQLITE_CHUNK_SIZE IDs, one chunk is produced + and the placeholder count stays within the SQLite variable limit. + """ + from backend.secuscan.routes import delete_task_records + + ids = [str(uuid.uuid4()) for _ in range(SQLITE_CHUNK_SIZE)] + captured_sql: list[str] = [] + + mock_db = AsyncMock() + mock_db.fetchall = AsyncMock(return_value=[]) + async def capture_execute(sql, params=()): + captured_sql.append(sql) + mock_db.execute = capture_execute + + with patch("backend.secuscan.routes.get_db", return_value=mock_db): + await delete_task_records(ids) + + for sql in captured_sql: + placeholder_count = sql.count("?") + assert placeholder_count <= SQLITE_CHUNK_SIZE, ( + f"SQL exceeded SQLITE_CHUNK_SIZE={SQLITE_CHUNK_SIZE}: " + f"{placeholder_count} placeholders in:\n{sql}" + ) + + @pytest.mark.asyncio + async def test_multi_chunk_splits_correctly(self): + """ + For SQLITE_CHUNK_SIZE + 1 IDs, the helper must issue at least two + batches — no single SQL call may hold all IDs at once. + + This directly reproduces the pre-fix crash path: + OperationalError: too many SQL variables + """ + from backend.secuscan.routes import delete_task_records + + total = SQLITE_CHUNK_SIZE + 1 # forces exactly 2 chunks + ids = [str(uuid.uuid4()) for _ in range(total)] + captured_sql: list[str] = [] + captured_params: list[tuple] = [] + + mock_db = AsyncMock() + mock_db.fetchall = AsyncMock(return_value=[]) + async def capture_execute(sql, params=()): + captured_sql.append(sql) + captured_params.append(params) + mock_db.execute = capture_execute + + with patch("backend.secuscan.routes.get_db", return_value=mock_db): + await delete_task_records(ids) + + # Every single execute call must respect the chunk size + for sql, params in zip(captured_sql, captured_params): + placeholder_count = sql.count("?") + assert placeholder_count <= SQLITE_CHUNK_SIZE, ( + f"A single SQL call had {placeholder_count} placeholders " + f"(limit={SQLITE_CHUNK_SIZE}):\n{sql}" + ) + assert len(params) <= SQLITE_CHUNK_SIZE, ( + f"A single SQL call had {len(params)} bound params " + f"(limit={SQLITE_CHUNK_SIZE})" + ) + + # Sanity: the helper must have issued more than one DELETE per table + delete_tasks_calls = [s for s in captured_sql if "DELETE FROM tasks" in s] + assert len(delete_tasks_calls) >= 2, ( + f"Expected ≥2 DELETE FROM tasks batches for {total} IDs, " + f"got {len(delete_tasks_calls)}" + ) + + @pytest.mark.asyncio + async def test_empty_list_returns_immediately(self): + """delete_task_records([]) must return without touching the database.""" + from backend.secuscan.routes import delete_task_records + + mock_db = AsyncMock() + + with patch("backend.secuscan.routes.get_db", return_value=mock_db): + await delete_task_records([]) + + mock_db.execute.assert_not_called() + mock_db.fetchall.assert_not_called()