From f01418a104ae96168487612935653cc5111f18d0 Mon Sep 17 00:00:00 2001
From: Arpan Mukherjee <mukherjeearpan381@gmail.com>
Date: Tue, 26 May 2026 15:17:25 +0530
Subject: [PATCH] fix: chunk bulk delete tasks to prevent SQLite variable
 limits

---
 backend/secuscan/models.py                    |  11 +-
 backend/secuscan/routes.py                    |  62 +++--
 .../unit/test_bulk_delete_sqlite_limit.py     | 252 ++++++++++++++++++
 3 files changed, 305 insertions(+), 20 deletions(-)
 create mode 100644 testing/backend/unit/test_bulk_delete_sqlite_limit.py

diff --git a/backend/secuscan/models.py b/backend/secuscan/models.py
index 264363e5..845ee2e1 100644
--- a/backend/secuscan/models.py
+++ b/backend/secuscan/models.py
@@ -2,12 +2,14 @@
 Pydantic models for API requests and responses
 """
 
-from typing import Optional, Dict, Any, List
+from typing import Optional, Dict, Any, List, Annotated
 from datetime import datetime
-from pydantic import BaseModel, Field
+from pydantic import BaseModel, Field, RootModel
 from enum import Enum
 
 
+MAX_BULK_DELETE = 500
+
 class SafetyLevel(str, Enum):
     """Plugin safety level classification"""
     SAFE = "safe"
@@ -161,3 +163,8 @@ class ErrorResponse(BaseModel):
     message: str
     field: Optional[str] = None
     details: Optional[Dict[str, Any]] = None
+
+
+class BulkDeleteRequest(RootModel[Annotated[List[str], Field(max_length=MAX_BULK_DELETE)]]):
+    """Accepts a JSON array of task IDs directly. Max 500 per request."""
+    pass
\ No newline at end of file
diff --git a/backend/secuscan/routes.py b/backend/secuscan/routes.py
index f1d53063..db1580f4 100644
--- a/backend/secuscan/routes.py
+++ b/backend/secuscan/routes.py
@@ -65,7 +65,7 @@ def build_report_filename(task: Dict[str, Any], extension: str) -> str:
 from .cache import get_cache
 from .models import (
     TaskCreateRequest, TaskResponse, TaskResult,
-    PluginListResponse, ErrorResponse
+    PluginListResponse, ErrorResponse, BulkDeleteRequest
 )
 from .config import settings
 from .database import get_db
@@ -744,28 +744,47 @@ def build_page_url(page_num):
             "per_page": per_page,
             "total_pages": total_pages,
             "total_items": total,
-            "next": build_page_url(next_page),      # ← NEW
-            "previous": build_page_url(prev_page)    # ← NEW
+            "next": build_page_url(next_page),
+            "previous": build_page_url(prev_page)
         }
     }
 
 
+SQLITE_CHUNK_SIZE = 500  # safely under SQLITE_LIMIT_VARIABLE_NUMBER = 999
+
 async def delete_task_records(task_ids: List[str]):
-    """Helper to delete database records and files for multiple tasks."""
+    """Helper to delete database records and files for multiple tasks.
+
+    Processes IDs in chunks of SQLITE_CHUNK_SIZE to stay under
+    SQLite's SQLITE_LIMIT_VARIABLE_NUMBER = 999 limit.
+    """
+    if not task_ids:
+        return
+
     db = await get_db()
 
-    # Get raw output paths for file cleanup
-    placeholders = ",".join(["?"] * len(task_ids))
-    task_rows = await db.fetchall(f"SELECT raw_output_path FROM tasks WHERE id IN ({placeholders})", tuple(task_ids))
+    # Collect all raw_output_paths across chunks for file cleanup
+    all_task_rows = []
+    for i in range(0, len(task_ids), SQLITE_CHUNK_SIZE):
+        chunk = task_ids[i : i + SQLITE_CHUNK_SIZE]
+        placeholders = ",".join(["?"] * len(chunk))
+        rows = await db.fetchall(
+            f"SELECT raw_output_path FROM tasks WHERE id IN ({placeholders})",
+            tuple(chunk)
+        )
+        all_task_rows.extend(rows)
 
-    # Delete associated data
-    await db.execute(f"DELETE FROM findings WHERE task_id IN ({placeholders})", tuple(task_ids))
-    await db.execute(f"DELETE FROM reports WHERE task_id IN ({placeholders})", tuple(task_ids))
-    await db.execute(f"DELETE FROM audit_log WHERE task_id IN ({placeholders})", tuple(task_ids))
-    await db.execute(f"DELETE FROM tasks WHERE id IN ({placeholders})", tuple(task_ids))
+    # Delete associated records in chunks
+    for i in range(0, len(task_ids), SQLITE_CHUNK_SIZE):
+        chunk = task_ids[i : i + SQLITE_CHUNK_SIZE]
+        placeholders = ",".join(["?"] * len(chunk))
+        await db.execute(f"DELETE FROM findings   WHERE task_id IN ({placeholders})", tuple(chunk))
+        await db.execute(f"DELETE FROM reports    WHERE task_id IN ({placeholders})", tuple(chunk))
+        await db.execute(f"DELETE FROM audit_log  WHERE task_id IN ({placeholders})", tuple(chunk))
+        await db.execute(f"DELETE FROM tasks      WHERE id       IN ({placeholders})", tuple(chunk))
 
     # Cleanup files on disk
-    for row in task_rows:
+    for row in all_task_rows:
         if row and row["raw_output_path"]:
             try:
                 path = Path(row["raw_output_path"])
@@ -794,13 +813,21 @@ async def delete_task(task_id: str):
 
 
 @router.delete("/tasks/bulk")
-async def bulk_delete_tasks(task_ids: List[str]):
-    """Delete multiple tasks at once"""
+async def bulk_delete_tasks(request: BulkDeleteRequest):
+    """Delete multiple tasks at once (max 500 IDs per request)"""
+    task_ids = request.root  # RootModel exposes data via .root
     db = await get_db()
 
-    # Check if any tasks are running
+    # Empty list — return early cleanly (test requires 200, not 422)
+    if not task_ids:
+        return {"deleted_count": 0, "success": True}
+
+    # Check running tasks — safe: len(task_ids) <= 500 guaranteed by Pydantic
     placeholders = ",".join(["?"] * len(task_ids))
-    running_tasks = await db.fetchone(f"SELECT id FROM tasks WHERE id IN ({placeholders}) AND status = 'running' LIMIT 1", tuple(task_ids))
+    running_tasks = await db.fetchone(
+        f"SELECT id FROM tasks WHERE id IN ({placeholders}) AND status = 'running' LIMIT 1",
+        tuple(task_ids)
+    )
     if running_tasks:
         raise HTTPException(status_code=400, detail="Cannot delete running tasks. Abort them first.")
 
@@ -812,7 +839,6 @@ async def bulk_delete_tasks(task_ids: List[str]):
         "success": True
     }
 
-
 @router.delete("/tasks/clear")
 async def clear_all_tasks():
     """Wipe all scan history and associated data (findings, reports, assets, attack surface)"""
diff --git a/testing/backend/unit/test_bulk_delete_sqlite_limit.py b/testing/backend/unit/test_bulk_delete_sqlite_limit.py
new file mode 100644
index 00000000..f563aaf5
--- /dev/null
+++ b/testing/backend/unit/test_bulk_delete_sqlite_limit.py
@@ -0,0 +1,252 @@
+"""
+testing/backend/unit/test_bulk_delete_sqlite_limit.py
+
+Regression tests for Issue #313 — DELETE /tasks/bulk SQLite variable-limit DoS.
+
+Covers the four cases requested in the PR review:
+  1. Empty list  → 200, deleted_count=0
+  2. 500 IDs     → 200, accepted (at the limit)
+  3. 501 IDs     → 422, rejected  (over the limit)
+  4. delete_task_records() chunks correctly when given more than one chunk
+     (i.e. > SQLITE_CHUNK_SIZE IDs) — verifies the helper never builds a
+     placeholder string longer than SQLITE_LIMIT_VARIABLE_NUMBER = 999.
+"""
+
+import uuid
+from unittest.mock import AsyncMock, MagicMock, call, patch
+
+import pytest
+import pytest_asyncio
+from httpx import ASGITransport, AsyncClient
+from pydantic import ValidationError
+
+from backend.secuscan.models import BulkDeleteRequest, MAX_BULK_DELETE
+from backend.secuscan.routes import SQLITE_CHUNK_SIZE
+
+ENDPOINT = "/api/v1/tasks/bulk"
+
+
+# Fixtures  (mirrors test_task_cleanup.py)
+
+@pytest_asyncio.fixture
+async def db_path(tmp_path):
+    return str(tmp_path / "test_secuscan.db")
+
+
+@pytest_asyncio.fixture
+async def app_client(db_path):
+    mock_executor = MagicMock()
+    mock_executor.cancel_task = AsyncMock(return_value=True)
+    mock_executor.get_task_status = AsyncMock(return_value={"status": "queued"})
+
+    with patch("backend.secuscan.routes.executor", mock_executor):
+        from backend.secuscan.main import app
+        from backend.secuscan import database as db_module
+        from backend.secuscan import cache as cache_module
+
+        await cache_module.init_cache()
+        test_db = await db_module.init_db(db_path)
+
+        async with AsyncClient(
+            transport=ASGITransport(app=app), base_url="http://test"
+        ) as client:
+            client._mock_executor = mock_executor
+            client._db = test_db
+            client._db_path = db_path
+            yield client
+
+        await test_db.disconnect()
+        db_module.db = None
+        await cache_module.cache.disconnect()
+        cache_module.cache = None
+
+
+async def insert_task(db, status: str = "completed") -> str:
+    task_id = str(uuid.uuid4())
+    await db.execute(
+        "INSERT INTO tasks "
+        "(id, plugin_id, tool_name, target, status, inputs_json, consent_granted) "
+        "VALUES (?, 'nmap', 'nmap', '127.0.0.1', ?, '{}', 1)",
+        (task_id, status),
+    )
+    return task_id
+
+
+# 1. Unit tests — BulkDeleteRequest Pydantic model validation
+
+class TestBulkDeleteRequestModel:
+    """
+    Validates that the Pydantic model enforces size limits before any
+    database code runs.  No HTTP server needed.
+    """
+
+    def test_empty_list_is_valid(self):
+        """[] must parse successfully — endpoint handles it as a no-op."""
+        req = BulkDeleteRequest([])
+        assert req.root == []
+
+    def test_single_id_is_valid(self):
+        req = BulkDeleteRequest([str(uuid.uuid4())])
+        assert len(req.root) == 1
+
+    def test_exactly_500_ids_is_valid(self):
+        """500 IDs is the documented limit — must be accepted."""
+        ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE)]
+        req = BulkDeleteRequest(ids)
+        assert len(req.root) == MAX_BULK_DELETE
+
+    def test_501_ids_raises_validation_error(self):
+        """501 IDs must be rejected by Pydantic before reaching any SQL."""
+        ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE + 1)]
+        with pytest.raises(ValidationError) as exc_info:
+            BulkDeleteRequest(ids)
+        errors = exc_info.value.errors()
+        assert any(e["type"] in ("too_long", "value_error") for e in errors), (
+            f"Expected a list-length validation error, got: {errors}"
+        )
+
+    def test_1000_ids_raises_validation_error(self):
+        """1000 IDs (the original crash threshold) must also be rejected."""
+        ids = [str(uuid.uuid4()) for _ in range(1000)]
+        with pytest.raises(ValidationError):
+            BulkDeleteRequest(ids)
+
+
+# 2. Integration — HTTP endpoint boundary tests
+
+class TestBulkDeleteEndpointLimits:
+
+    @pytest.mark.asyncio
+    async def test_empty_list_returns_200(self, app_client):
+        """[] must return 200 with deleted_count=0 — not 422."""
+        resp = await app_client.request("DELETE", ENDPOINT, json=[])
+        assert resp.status_code == 200, resp.text
+        body = resp.json()
+        assert body["success"] is True
+        assert body["deleted_count"] == 0
+
+    @pytest.mark.asyncio
+    async def test_500_ids_accepted(self, app_client):
+        """500 IDs (at the limit) must be accepted with 200."""
+        # Use random UUIDs — they won't exist in DB so deleted_count=0,
+        # but the endpoint must NOT reject the request with 422.
+        ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE)]
+        resp = await app_client.request("DELETE", ENDPOINT, json=ids)
+        assert resp.status_code == 200, (
+            f"Expected 200 for {MAX_BULK_DELETE} IDs, got {resp.status_code}: {resp.text}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_501_ids_rejected_with_422(self, app_client):
+        """501 IDs (one over the limit) must be rejected with 422."""
+        ids = [str(uuid.uuid4()) for _ in range(MAX_BULK_DELETE + 1)]
+        resp = await app_client.request("DELETE", ENDPOINT, json=ids)
+        assert resp.status_code == 422, (
+            f"Expected 422 for {MAX_BULK_DELETE + 1} IDs, got {resp.status_code}: {resp.text}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_response_shape_on_success(self, app_client):
+        """Success response must always include both 'success' and 'deleted_count'."""
+        resp = await app_client.request("DELETE", ENDPOINT, json=[])
+        body = resp.json()
+        assert "success" in body
+        assert "deleted_count" in body
+
+
+# 3. Unit test — delete_task_records() chunking
+
+class TestDeleteTaskRecordsChunking:
+    """
+    Verifies that delete_task_records() never passes more than
+    SQLITE_CHUNK_SIZE IDs in a single SQL statement.
+
+    We mock db.execute and db.fetchall to capture the actual placeholder
+    strings produced, then assert that no call ever exceeds the chunk limit.
+    This is the core regression guard for the SQLite variable-number crash.
+    """
+
+    @pytest.mark.asyncio
+    async def test_single_chunk_does_not_exceed_sqlite_limit(self):
+        """
+        For a list of exactly SQLITE_CHUNK_SIZE IDs, one chunk is produced
+        and the placeholder count stays within the SQLite variable limit.
+        """
+        from backend.secuscan.routes import delete_task_records
+
+        ids = [str(uuid.uuid4()) for _ in range(SQLITE_CHUNK_SIZE)]
+        captured_sql: list[str] = []
+
+        mock_db = AsyncMock()
+        mock_db.fetchall = AsyncMock(return_value=[])
+        async def capture_execute(sql, params=()):
+            captured_sql.append(sql)
+        mock_db.execute = capture_execute
+
+        with patch("backend.secuscan.routes.get_db", return_value=mock_db):
+            await delete_task_records(ids)
+
+        for sql in captured_sql:
+            placeholder_count = sql.count("?")
+            assert placeholder_count <= SQLITE_CHUNK_SIZE, (
+                f"SQL exceeded SQLITE_CHUNK_SIZE={SQLITE_CHUNK_SIZE}: "
+                f"{placeholder_count} placeholders in:\n{sql}"
+            )
+
+    @pytest.mark.asyncio
+    async def test_multi_chunk_splits_correctly(self):
+        """
+        For SQLITE_CHUNK_SIZE + 1 IDs, the helper must issue at least two
+        batches — no single SQL call may hold all IDs at once.
+
+        This directly reproduces the pre-fix crash path:
+          OperationalError: too many SQL variables
+        """
+        from backend.secuscan.routes import delete_task_records
+
+        total = SQLITE_CHUNK_SIZE + 1  # forces exactly 2 chunks
+        ids = [str(uuid.uuid4()) for _ in range(total)]
+        captured_sql: list[str] = []
+        captured_params: list[tuple] = []
+
+        mock_db = AsyncMock()
+        mock_db.fetchall = AsyncMock(return_value=[])
+        async def capture_execute(sql, params=()):
+            captured_sql.append(sql)
+            captured_params.append(params)
+        mock_db.execute = capture_execute
+
+        with patch("backend.secuscan.routes.get_db", return_value=mock_db):
+            await delete_task_records(ids)
+
+        # Every single execute call must respect the chunk size
+        for sql, params in zip(captured_sql, captured_params):
+            placeholder_count = sql.count("?")
+            assert placeholder_count <= SQLITE_CHUNK_SIZE, (
+                f"A single SQL call had {placeholder_count} placeholders "
+                f"(limit={SQLITE_CHUNK_SIZE}):\n{sql}"
+            )
+            assert len(params) <= SQLITE_CHUNK_SIZE, (
+                f"A single SQL call had {len(params)} bound params "
+                f"(limit={SQLITE_CHUNK_SIZE})"
+            )
+
+        # Sanity: the helper must have issued more than one DELETE per table
+        delete_tasks_calls = [s for s in captured_sql if "DELETE FROM tasks" in s]
+        assert len(delete_tasks_calls) >= 2, (
+            f"Expected ≥2 DELETE FROM tasks batches for {total} IDs, "
+            f"got {len(delete_tasks_calls)}"
+        )
+
+    @pytest.mark.asyncio
+    async def test_empty_list_returns_immediately(self):
+        """delete_task_records([]) must return without touching the database."""
+        from backend.secuscan.routes import delete_task_records
+
+        mock_db = AsyncMock()
+
+        with patch("backend.secuscan.routes.get_db", return_value=mock_db):
+            await delete_task_records([])
+
+        mock_db.execute.assert_not_called()
+        mock_db.fetchall.assert_not_called()