utksh1 · KaparthyReddy · May 23, 2026 · May 23, 2026 · May 23, 2026
diff --git a/backend/secuscan/database.py b/backend/secuscan/database.py
@@ -171,10 +171,34 @@ async def _create_schema(self):
                 last_run_at TIMESTAMP
             );
 
+            -- Tasks indexes (existing)
             CREATE INDEX IF NOT EXISTS idx_tasks_created ON tasks(created_at);
             CREATE INDEX IF NOT EXISTS idx_tasks_target ON tasks(target);
             CREATE INDEX IF NOT EXISTS idx_tasks_status ON tasks(status);
             CREATE INDEX IF NOT EXISTS idx_tasks_plugin ON tasks(plugin_id);
+            -- Composite index for dashboard running tasks query
+            CREATE INDEX IF NOT EXISTS idx_tasks_status_created ON tasks(status, created_at DESC);
+
+            -- Findings indexes (new)
+            CREATE INDEX IF NOT EXISTS idx_findings_severity ON findings(severity);
+            CREATE INDEX IF NOT EXISTS idx_findings_task_id ON findings(task_id);
+            CREATE INDEX IF NOT EXISTS idx_findings_discovered_at ON findings(discovered_at DESC);
+            CREATE INDEX IF NOT EXISTS idx_findings_plugin_id ON findings(plugin_id);
+            CREATE INDEX IF NOT EXISTS idx_findings_target ON findings(target);
+            -- Composite index for severity counting by task
+            CREATE INDEX IF NOT EXISTS idx_findings_task_severity ON findings(task_id, severity);
+
+            -- Reports indexes (new)
+            CREATE INDEX IF NOT EXISTS idx_reports_task_id ON reports(task_id);
+            CREATE INDEX IF NOT EXISTS idx_reports_generated_at ON reports(generated_at DESC);
+            CREATE INDEX IF NOT EXISTS idx_reports_status ON reports(status);
+
+            -- Audit log indexes (new)
+            CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_log(timestamp DESC);
+            CREATE INDEX IF NOT EXISTS idx_audit_event_type ON audit_log(event_type);
+            CREATE INDEX IF NOT EXISTS idx_audit_task_id ON audit_log(task_id);
+
+            -- Workflows index (existing)
             CREATE INDEX IF NOT EXISTS idx_workflows_enabled ON workflows(enabled);
             """
         )

diff --git a/backend/secuscan/migrations/001_add_performance_indexes.sql b/backend/secuscan/migrations/001_add_performance_indexes.sql
@@ -0,0 +1,31 @@
+-- Migration: 001_add_performance_indexes
+-- Adds missing indexes to findings, reports, and audit_log tables
+-- and a composite index on tasks for dashboard query performance.
+--
+-- Query plans improved:
+--   - Dashboard severity counts: full table scan → indexed GROUP BY on findings.severity
+--   - Dashboard running tasks: full scan + filter → idx_tasks_status_created
+--   - Findings list: unindexed ORDER BY → idx_findings_discovered_at
+--   - Reports list: unindexed ORDER BY → idx_reports_generated_at
+--   - Audit log lookups: unindexed → idx_audit_timestamp, idx_audit_event_type
+
+-- Tasks
+CREATE INDEX IF NOT EXISTS idx_tasks_status_created ON tasks(status, created_at DESC);
+
+-- Findings
+CREATE INDEX IF NOT EXISTS idx_findings_severity ON findings(severity);
+CREATE INDEX IF NOT EXISTS idx_findings_task_id ON findings(task_id);
+CREATE INDEX IF NOT EXISTS idx_findings_discovered_at ON findings(discovered_at DESC);
+CREATE INDEX IF NOT EXISTS idx_findings_plugin_id ON findings(plugin_id);
+CREATE INDEX IF NOT EXISTS idx_findings_target ON findings(target);
+CREATE INDEX IF NOT EXISTS idx_findings_task_severity ON findings(task_id, severity);
+
+-- Reports
+CREATE INDEX IF NOT EXISTS idx_reports_task_id ON reports(task_id);
+CREATE INDEX IF NOT EXISTS idx_reports_generated_at ON reports(generated_at DESC);
+CREATE INDEX IF NOT EXISTS idx_reports_status ON reports(status);
+
+-- Audit log
+CREATE INDEX IF NOT EXISTS idx_audit_timestamp ON audit_log(timestamp DESC);
+CREATE INDEX IF NOT EXISTS idx_audit_event_type ON audit_log(event_type);
+CREATE INDEX IF NOT EXISTS idx_audit_task_id ON audit_log(task_id);
diff --git a/backend/secuscan/routes.py b/backend/secuscan/routes.py
@@ -592,8 +592,15 @@ async def build():
         db = await get_db()
 
         # Get data
-        raw_findings = await db.fetchall("SELECT * FROM findings ORDER BY discovered_at DESC")
-        findings = parse_json_fields(raw_findings, ["metadata_json"])
+        # Push severity aggregation to DB — avoids full table scan in Python
+        severity_rows = await db.fetchall(
+            """
+            SELECT severity, COUNT(*) AS cnt
+            FROM findings
+            GROUP BY severity
+            """
+        )
+        severity_counts = {row["severity"]: row["cnt"] for row in severity_rows}
 
         task_stats = await db.fetchone(
             """
@@ -605,27 +612,35 @@ async def build():
             """
         )
 
-        critical_findings: int = sum(bool(item.get("severity") == "critical")
-                                 for item in findings)
-        high_findings: int = sum(bool(item.get("severity") == "high")
-                             for item in findings)
-        medium_findings: int = sum(bool(item.get("severity") == "medium")
-                               for item in findings)
-        low_findings: int = sum(bool(item.get("severity") == "low")
-                            for item in findings)
-        info_findings: int = sum(bool(item.get("severity") == "info")
-                             for item in findings)
+        total_findings_row = await db.fetchone("SELECT COUNT(*) AS total FROM findings")
+        total_findings = total_findings_row["total"] if total_findings_row else 0
+
+        critical_findings: int = severity_counts.get("critical", 0)
+        high_findings: int = severity_counts.get("high", 0)
+        medium_findings: int = severity_counts.get("medium", 0)
+        low_findings: int = severity_counts.get("low", 0)
+        info_findings: int = severity_counts.get("info", 0)
 
-        recent_findings: List[Dict] = findings[:5]
+        # Fetch only the 5 most recent findings — not the entire table
+        recent_rows = await db.fetchall(
+            """
+            SELECT id, title, category, severity, target, description,
+                remediation, proof, cvss, cve, discovered_at, metadata_json
+            FROM findings
+            ORDER BY discovered_at DESC
+            LIMIT 5
+            """
+        )
+        recent_findings: List[Dict] = parse_json_fields(recent_rows, ["metadata_json"])
 
         return {
-            "total_findings": len(findings),
+            "total_findings": total_findings,
             "critical_findings": critical_findings,
             "high_findings": high_findings,
             "medium_findings": medium_findings,
             "low_findings": low_findings,
             "info_findings": info_findings,
-            "last_scan_time": findings[0].get("discovered_at") if findings else None,
+            "last_scan_time": recent_findings[0].get("discovered_at") if recent_findings else None,
             "recent_findings": recent_findings,
             "scan_activity": {
                 "total": int(task_stats["total"]) if task_stats and task_stats.get("total") is not None else 0,

diff --git a/scripts/benchmark_db.py b/scripts/benchmark_db.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""
+Benchmark: database query performance before and after index optimization.
+
+Usage:
+    python scripts/benchmark_db.py
+
+Seeds a temporary SQLite database with 10,000 findings and 1,000 tasks,
+then measures query execution time for the dashboard hot paths.
+
+Expected output shows time improvement from full-table-scan to indexed queries.
+"""
+
+import asyncio
+import json
+import sqlite3
+import sys
+import tempfile
+import time
+import uuid
+from datetime import datetime, timedelta
+from pathlib import Path
+
+# Add repo root to path
+repo_root = Path(__file__).resolve().parents[1]
+sys.path.insert(0, str(repo_root))
+
+
+SEVERITIES = ["critical", "high", "medium", "low", "info"]
+STATUSES = ["queued", "running", "completed", "failed"]
+
+
+def seed_database(db_path: str, findings_count: int = 10_000, tasks_count: int = 1_000):
+    """Seed the database with realistic load."""
+    print(f"Seeding {findings_count} findings and {tasks_count} tasks...")
+    conn = sqlite3.connect(db_path)
+
+    # Seed tasks
+    for i in range(tasks_count):
+        conn.execute(
+            """
+            INSERT INTO tasks
+                (id, plugin_id, tool_name, target, status, created_at, inputs_json)
+            VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                str(uuid.uuid4()),
+                "http_inspector",
+                "http_inspector",
+                f"192.168.1.{i % 255}",
+                STATUSES[i % len(STATUSES)],
+                (datetime.utcnow() - timedelta(seconds=i)).isoformat(),
+                json.dumps({"target": f"192.168.1.{i % 255}"}),
+            ),
+        )
+
+    # Seed findings
+    for i in range(findings_count):
+        conn.execute(
+            """
+            INSERT INTO findings
+                (id, task_id, plugin_id, title, category, severity,
+                 target, description, remediation, discovered_at, metadata_json)
+            VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+            (
+                str(uuid.uuid4()),
+                str(uuid.uuid4()),
+                "http_inspector",
+                f"Finding {i}",
+                "web",
+                SEVERITIES[i % len(SEVERITIES)],
+                f"192.168.1.{i % 255}",
+                f"Description {i}",
+                "Apply patch",
+                (datetime.utcnow() - timedelta(seconds=i)).isoformat(),
+                json.dumps({}),
+            ),
+        )
+
+    conn.commit()
+    conn.close()
+    print("Seeding complete.\n")
+
+
+def benchmark_query(label: str, db_path: str, query: str, params: tuple = (), runs: int = 10):
+    """Run a query N times and report average execution time."""
+    conn = sqlite3.connect(db_path)
+    times = []
+    for _ in range(runs):
+        start = time.perf_counter()
+        conn.execute(query, params).fetchall()
+        times.append(time.perf_counter() - start)
+    conn.close()
+    avg_ms = (sum(times) / len(times)) * 1000
+    min_ms = min(times) * 1000
+    max_ms = max(times) * 1000
+    print(f"  {label}")
+    print(f"    avg={avg_ms:.2f}ms  min={min_ms:.2f}ms  max={max_ms:.2f}ms")
+    return avg_ms
+
+
+def explain_query(label: str, db_path: str, query: str):
+    """Print SQLite EXPLAIN QUERY PLAN output for a query."""
+    conn = sqlite3.connect(db_path)
+    plan = conn.execute(f"EXPLAIN QUERY PLAN {query}").fetchall()
+    conn.close()
+    print(f"\n  EXPLAIN QUERY PLAN — {label}")
+    for row in plan:
+        print(f"    {row}")
+
+
+def main():
+    with tempfile.TemporaryDirectory() as tmp:
+        db_path = f"{tmp}/benchmark.db"
+
+        # Initialize schema (with indexes)
+        from backend.secuscan.database import Database
+        asyncio.run(Database(db_path).connect())
+
+        seed_database(db_path, findings_count=10_000, tasks_count=1_000)
+
+        print("=" * 60)
+        print("QUERY PLAN ANALYSIS (SQLite EXPLAIN QUERY PLAN)")
+        print("=" * 60)
+
+        explain_query(
+            "Severity GROUP BY (optimized dashboard)",
+            db_path,
+            "SELECT severity, COUNT(*) AS cnt FROM findings GROUP BY severity",
+        )
+        explain_query(
+            "Recent findings LIMIT 5",
+            db_path,
+            "SELECT id, title, severity, discovered_at FROM findings ORDER BY discovered_at DESC LIMIT 5",
+        )
+        explain_query(
+            "Running tasks (composite index)",
+            db_path,
+            "SELECT id, tool_name, target FROM tasks WHERE status = 'running' ORDER BY created_at DESC LIMIT 5",
+        )
+
+        print("\n")
+        print("=" * 60)
+        print("BENCHMARK RESULTS (10,000 findings, 1,000 tasks, 10 runs)")
+        print("=" * 60)
+
+        benchmark_query(
+            "Severity GROUP BY (optimized — DB aggregation)",
+            db_path,
+            "SELECT severity, COUNT(*) AS cnt FROM findings GROUP BY severity",
+        )
+        benchmark_query(
+            "Recent findings LIMIT 5",
+            db_path,
+            "SELECT id, title, severity, discovered_at FROM findings ORDER BY discovered_at DESC LIMIT 5",
+        )
+        benchmark_query(
+            "Running tasks with composite index",
+            db_path,
+            "SELECT id, tool_name, target, status, created_at FROM tasks WHERE status = 'running' ORDER BY created_at DESC LIMIT 5",
+        )
+        benchmark_query(
+            "Total findings COUNT(*)",
+            db_path,
+            "SELECT COUNT(*) FROM findings",
+        )
+        benchmark_query(
+            "Task stats GROUP BY status",
+            db_path,
+            "SELECT status, COUNT(*) FROM tasks GROUP BY status",
+        )
+
+        print("\nBenchmark complete.")
+
+
+if __name__ == "__main__":
+    main()