From 1bc86d88e5654464a0ece92d51fc6a8e44106ad3 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 22 Apr 2026 16:48:54 -0400 Subject: [PATCH 1/5] feat(evidence-export): include task attachments and stream large ZIPs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "Export All Evidence" button only bundled automation runs — user-uploaded files were missing. This blocked a customer acquisition deal (CS-279) where the prospect required a full evidence bundle with attachments. Extend /v1/tasks/:id/evidence/export and /v1/evidence-export/all to also include task attachments, and swap adm-zip for archiver so bundles stream to the response instead of buffering the whole ZIP in RAM. - Task-entity attachments streamed from S3 into 01-attachments/ per task, placed before automation PDFs (human evidence first, system proof second). - Missing S3 objects produce _MISSING_.txt placeholders so the bundle stays auditable instead of 500ing mid-stream. - Filename collisions disambiguated with numeric suffixes. - Org-wide export now includes tasks that have only attachments (previously required at least one automation run). - Summary PDF shows attachment count. Scope confirmed with Dustin: tasks YES; vendor/risk/comment attachments, findings, risk register, KB docs — NO (fast follow via optional toggles). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../evidence-attachment-streamer.ts | 138 +++++ .../evidence-export.controller.spec.ts | 210 +++++++ .../evidence-export.controller.ts | 49 +- .../evidence-export.service.spec.ts | 355 ++++++++++++ .../evidence-export.service.ts | 544 +++++++++++------- .../evidence-export/evidence-export.types.ts | 15 +- .../evidence-export/evidence-pdf-generator.ts | 13 +- 7 files changed, 1108 insertions(+), 216 deletions(-) create mode 100644 apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts create mode 100644 apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts create mode 100644 apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts diff --git a/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts new file mode 100644 index 0000000000..9ee5e31808 --- /dev/null +++ b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts @@ -0,0 +1,138 @@ +/** + * Evidence Attachment Streamer + * Fetches task attachments and streams them from S3 directly into a ZIP archive. + */ + +import { GetObjectCommand } from '@aws-sdk/client-s3'; +import { Logger } from '@nestjs/common'; +import type { Archiver } from 'archiver'; +import { Readable } from 'node:stream'; +import { db } from '@db'; +import { AttachmentEntityType, type Attachment } from '@db'; +import { BUCKET_NAME, s3Client } from '../../app/s3'; + +const logger = new Logger('EvidenceAttachmentStreamer'); + +export type TaskAttachment = Pick< + Attachment, + 'id' | 'name' | 'url' | 'type' | 'createdAt' +>; + +/** + * Fetch attachments uploaded directly to a task. + * Task-items hold vendor/risk attachments (per `TaskItemEntityType`), so they're + * intentionally excluded here — this is the task-evidence scope only. + */ +export async function getTaskAttachments( + organizationId: string, + taskId: string, +): Promise { + return db.attachment.findMany({ + where: { + organizationId, + entityType: AttachmentEntityType.task, + entityId: taskId, + }, + select: { + id: true, + name: true, + url: true, + type: true, + createdAt: true, + }, + orderBy: { createdAt: 'asc' }, + }); +} + +/** + * Create a case-insensitive filename tracker that disambiguates collisions by + * inserting a numeric suffix before the extension. Scoped per directory. + */ +export function createFilenameTracker(): (rawName: string) => string { + const used = new Set(); + return (rawName: string) => { + const sanitized = (rawName || 'file') + .replace(/[\\/]/g, '_') + .replace(/\s+/g, ' ') + .trim(); + const dot = sanitized.lastIndexOf('.'); + const base = dot > 0 ? sanitized.slice(0, dot) : sanitized; + const ext = dot > 0 ? sanitized.slice(dot) : ''; + let candidate = `${base}${ext}`; + let i = 1; + while (used.has(candidate.toLowerCase())) { + candidate = `${base} (${i})${ext}`; + i += 1; + } + used.add(candidate.toLowerCase()); + return candidate; + }; +} + +/** + * Append a single attachment to the archive by streaming its S3 body. + * If the object is missing (deleted from S3 but DB row still exists), writes a + * plaintext placeholder so the bundle remains auditable instead of failing. + */ +export async function appendAttachmentToArchive(params: { + archive: Archiver; + attachment: TaskAttachment; + folderPath: string; + uniqueName: (rawName: string) => string; +}): Promise { + const { archive, attachment, folderPath, uniqueName } = params; + + if (!s3Client || !BUCKET_NAME) { + logger.warn( + `S3 client unavailable — attachment ${attachment.id} skipped with placeholder`, + ); + archive.append( + buildMissingPlaceholder(attachment, 'S3 client not configured'), + { name: `${folderPath}/_MISSING_${uniqueName(attachment.name)}.txt` }, + ); + return; + } + + try { + const response = await s3Client.send( + new GetObjectCommand({ + Bucket: BUCKET_NAME, + Key: attachment.url, + }), + ); + + if (!response.Body) { + throw new Error('S3 returned no body'); + } + + const bodyStream = + response.Body instanceof Readable + ? response.Body + : Readable.from(response.Body as AsyncIterable); + + archive.append(bodyStream, { + name: `${folderPath}/${uniqueName(attachment.name)}`, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + logger.warn( + `Missing S3 object for attachment ${attachment.id} (key=${attachment.url}): ${message}`, + ); + archive.append(buildMissingPlaceholder(attachment, message), { + name: `${folderPath}/_MISSING_${uniqueName(attachment.name)}.txt`, + }); + } +} + +function buildMissingPlaceholder( + attachment: TaskAttachment, + reason: string, +): string { + return [ + `Attachment missing from storage.`, + `attachmentId: ${attachment.id}`, + `originalName: ${attachment.name}`, + `s3Key: ${attachment.url}`, + `reason: ${reason}`, + ].join('\n'); +} diff --git a/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts new file mode 100644 index 0000000000..bb37d54ea4 --- /dev/null +++ b/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts @@ -0,0 +1,210 @@ +// Mocks must be declared before any SUT import so guards' transitive deps +// (Prisma, better-auth) don't instantiate in Jest. +jest.mock('@db', () => ({ + ...jest.requireActual('@prisma/client'), + db: {}, + Prisma: { + PrismaClientKnownRequestError: class PrismaClientKnownRequestError extends Error { + code: string; + constructor(message: string, { code }: { code: string }) { + super(message); + this.code = code; + } + }, + }, +})); + +jest.mock('../../auth/auth.server', () => ({ + auth: { + api: { + getSession: jest.fn(), + }, + }, +})); + +jest.mock('@trycompai/auth', () => ({ + statement: {}, + BUILT_IN_ROLE_PERMISSIONS: {}, +})); + +import { Test } from '@nestjs/testing'; +import { EventEmitter } from 'node:events'; +import { HybridAuthGuard } from '../../auth/hybrid-auth.guard'; +import { PermissionGuard } from '../../auth/permission.guard'; +import { + AuditorEvidenceExportController, + EvidenceExportController, +} from './evidence-export.controller'; +import { EvidenceExportService } from './evidence-export.service'; +import { TasksService } from '../tasks.service'; + +function makeFakeArchive() { + const emitter = new EventEmitter(); + const archive = Object.assign(emitter, { + pipe: jest.fn(), + append: jest.fn(), + finalize: jest.fn(), + abort: jest.fn(), + }); + return archive; +} + +function makeFakeResponse() { + const res: { + setHeader: jest.Mock; + status: jest.Mock; + end: jest.Mock; + headersSent: boolean; + } = { + setHeader: jest.fn(), + status: jest.fn(() => res), + end: jest.fn(), + headersSent: false, + }; + return res; +} + +describe('EvidenceExportController', () => { + let controller: EvidenceExportController; + let service: jest.Mocked< + Pick< + EvidenceExportService, + 'streamTaskEvidenceZip' | 'exportAutomationPDF' | 'getTaskEvidenceSummary' + > + >; + let tasks: jest.Mocked>; + + beforeEach(async () => { + service = { + streamTaskEvidenceZip: jest.fn(), + exportAutomationPDF: jest.fn(), + getTaskEvidenceSummary: jest.fn(), + }; + tasks = { + verifyTaskAccess: jest.fn().mockResolvedValue(undefined), + }; + + const moduleRef = await Test.createTestingModule({ + controllers: [EvidenceExportController], + providers: [ + { provide: EvidenceExportService, useValue: service }, + { provide: TasksService, useValue: tasks }, + ], + }) + .overrideGuard(HybridAuthGuard) + .useValue({ canActivate: () => true }) + .overrideGuard(PermissionGuard) + .useValue({ canActivate: () => true }) + .compile(); + + controller = moduleRef.get(EvidenceExportController); + }); + + it('verifies task access, sets zip headers, and pipes archive to response', async () => { + const archive = makeFakeArchive(); + service.streamTaskEvidenceZip.mockResolvedValue({ + archive: archive as unknown as import('archiver').Archiver, + filename: 'acme_mytask_evidence_2026-04-22.zip', + }); + const res = makeFakeResponse(); + + await controller.exportTaskEvidenceZip( + 'org_1', + 'tsk_1', + 'true', + res as unknown as import('express').Response, + ); + + expect(tasks.verifyTaskAccess).toHaveBeenCalledWith('org_1', 'tsk_1'); + expect(service.streamTaskEvidenceZip).toHaveBeenCalledWith( + 'org_1', + 'tsk_1', + { includeRawJson: true }, + ); + expect(res.setHeader).toHaveBeenCalledWith( + 'Content-Type', + 'application/zip', + ); + expect(res.setHeader).toHaveBeenCalledWith( + 'Content-Disposition', + `attachment; filename="acme_mytask_evidence_2026-04-22.zip"`, + ); + expect(archive.pipe).toHaveBeenCalledWith(res); + }); + + it('treats missing includeJson query as false', async () => { + const archive = makeFakeArchive(); + service.streamTaskEvidenceZip.mockResolvedValue({ + archive: archive as unknown as import('archiver').Archiver, + filename: 'f.zip', + }); + const res = makeFakeResponse(); + + await controller.exportTaskEvidenceZip( + 'org_1', + 'tsk_1', + undefined as unknown as string, + res as unknown as import('express').Response, + ); + + expect(service.streamTaskEvidenceZip).toHaveBeenCalledWith( + 'org_1', + 'tsk_1', + { includeRawJson: false }, + ); + }); +}); + +describe('AuditorEvidenceExportController', () => { + let controller: AuditorEvidenceExportController; + let service: jest.Mocked< + Pick + >; + + beforeEach(async () => { + service = { + streamOrganizationEvidenceZip: jest.fn(), + }; + + const moduleRef = await Test.createTestingModule({ + controllers: [AuditorEvidenceExportController], + providers: [{ provide: EvidenceExportService, useValue: service }], + }) + .overrideGuard(HybridAuthGuard) + .useValue({ canActivate: () => true }) + .overrideGuard(PermissionGuard) + .useValue({ canActivate: () => true }) + .compile(); + + controller = moduleRef.get(AuditorEvidenceExportController); + }); + + it('pipes the org-wide archive to response with correct headers', async () => { + const archive = makeFakeArchive(); + service.streamOrganizationEvidenceZip.mockResolvedValue({ + archive: archive as unknown as import('archiver').Archiver, + filename: 'acme_all-evidence_2026-04-22.zip', + }); + const res = makeFakeResponse(); + + await controller.exportAllEvidence( + 'org_1', + 'true', + res as unknown as import('express').Response, + ); + + expect(service.streamOrganizationEvidenceZip).toHaveBeenCalledWith( + 'org_1', + { includeRawJson: true }, + ); + expect(res.setHeader).toHaveBeenCalledWith( + 'Content-Type', + 'application/zip', + ); + expect(res.setHeader).toHaveBeenCalledWith( + 'Content-Disposition', + `attachment; filename="acme_all-evidence_2026-04-22.zip"`, + ); + expect(archive.pipe).toHaveBeenCalledWith(res); + }); +}); diff --git a/apps/api/src/tasks/evidence-export/evidence-export.controller.ts b/apps/api/src/tasks/evidence-export/evidence-export.controller.ts index bb6e0c461d..6756260967 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.controller.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.controller.ts @@ -173,18 +173,29 @@ export class EvidenceExportController { }); await this.tasksService.verifyTaskAccess(organizationId, taskId); - const result = await this.evidenceExportService.exportTaskEvidenceZip( - organizationId, - taskId, - { includeRawJson: includeJson === 'true' }, - ); + const { archive, filename } = + await this.evidenceExportService.streamTaskEvidenceZip( + organizationId, + taskId, + { includeRawJson: includeJson === 'true' }, + ); - res.setHeader('Content-Type', result.mimeType); + res.setHeader('Content-Type', 'application/zip'); res.setHeader( 'Content-Disposition', - `attachment; filename="${result.filename}"`, + `attachment; filename="${filename}"`, ); - res.send(result.fileBuffer); + + archive.on('error', (err) => { + this.logger.error(`Archive stream error for task ${taskId}: ${err.message}`); + if (!res.headersSent) { + res.status(500).end(); + } else { + res.end(); + } + }); + + archive.pipe(res); } } @@ -238,17 +249,29 @@ export class AuditorEvidenceExportController { includeJson: includeJson === 'true', }); - const result = - await this.evidenceExportService.exportOrganizationEvidenceZip( + const { archive, filename } = + await this.evidenceExportService.streamOrganizationEvidenceZip( organizationId, { includeRawJson: includeJson === 'true' }, ); - res.setHeader('Content-Type', result.mimeType); + res.setHeader('Content-Type', 'application/zip'); res.setHeader( 'Content-Disposition', - `attachment; filename="${result.filename}"`, + `attachment; filename="${filename}"`, ); - res.send(result.fileBuffer); + + archive.on('error', (err) => { + this.logger.error( + `Archive stream error for org ${organizationId}: ${err.message}`, + ); + if (!res.headersSent) { + res.status(500).end(); + } else { + res.end(); + } + }); + + archive.pipe(res); } } diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts new file mode 100644 index 0000000000..95ba24b6f1 --- /dev/null +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts @@ -0,0 +1,355 @@ +import { NotFoundException } from '@nestjs/common'; + +// ------- Mock archiver ------- +// Collect append() calls on a simple harness. finalize() resolves a shared +// deferred so tests can await populate completion. +interface MockArchive { + appendCalls: Array<{ source: unknown; options: { name: string } }>; + append: jest.Mock; + finalize: jest.Mock; + abort: jest.Mock; + on: jest.Mock; + pipe: jest.Mock; + finalized: Promise; + aborted: boolean; +} + +const archiveInstances: MockArchive[] = []; + +function createMockArchive(): MockArchive { + let resolveFinalized!: () => void; + let rejectFinalized!: (err: Error) => void; + const finalized = new Promise((resolve, reject) => { + resolveFinalized = resolve; + rejectFinalized = reject; + }); + + const archive: MockArchive = { + appendCalls: [], + append: jest.fn((source, options) => { + archive.appendCalls.push({ source, options }); + return archive; + }), + finalize: jest.fn(async () => { + resolveFinalized(); + }), + abort: jest.fn(() => { + archive.aborted = true; + rejectFinalized(new Error('aborted')); + }), + on: jest.fn(() => archive), + pipe: jest.fn(() => archive), + finalized, + aborted: false, + }; + return archive; +} + +jest.mock('archiver', () => { + return jest.fn(() => { + const instance = createMockArchive(); + archiveInstances.push(instance); + return instance; + }); +}); + +// ------- Mock S3 ------- +jest.mock('../../app/s3', () => ({ + BUCKET_NAME: 'test-bucket', + s3Client: { + send: jest.fn(), + }, + getSignedUrl: jest.fn(), +})); + +// ------- Mock @db ------- +const mockDb = { + task: { findFirst: jest.fn(), findMany: jest.fn() }, + integrationCheckRun: { findMany: jest.fn() }, + evidenceAutomationRun: { findMany: jest.fn() }, + attachment: { findMany: jest.fn() }, + organization: { findUnique: jest.fn() }, +}; + +jest.mock('@db', () => ({ + db: mockDb, + AttachmentEntityType: { + task: 'task', + vendor: 'vendor', + risk: 'risk', + comment: 'comment', + trust_nda: 'trust_nda', + task_item: 'task_item', + }, +})); + +// ------- Mock PDF generator to avoid heavy jsPDF work ------- +jest.mock('./evidence-pdf-generator', () => ({ + generateTaskSummaryPDF: jest.fn(() => Buffer.from('SUMMARY-PDF')), + generateAutomationPDF: jest.fn(() => Buffer.from('AUTOMATION-PDF')), + sanitizeFilename: (name: string) => + name + .toLowerCase() + .replace(/[^a-z0-9]+/g, '-') + .replace(/^-+|-+$/g, '') + .slice(0, 50) || 'export', +})); + +import { GetObjectCommand } from '@aws-sdk/client-s3'; +import { s3Client } from '../../app/s3'; +import { EvidenceExportService } from './evidence-export.service'; +import { generateTaskSummaryPDF } from './evidence-pdf-generator'; + +describe('EvidenceExportService — streaming ZIPs', () => { + let service: EvidenceExportService; + + beforeEach(() => { + jest.clearAllMocks(); + archiveInstances.length = 0; + service = new EvidenceExportService(); + }); + + const taskRow = { + id: 'tsk_123', + title: 'SOC 2 — Access Review', + organization: { name: 'Acme Corp' }, + }; + + function primeTaskQueries({ + attachments = [] as Array<{ + id: string; + name: string; + url: string; + type: string; + createdAt: Date; + }>, + appRuns = [] as unknown[], + customRuns = [] as unknown[], + }) { + mockDb.task.findFirst.mockResolvedValue(taskRow); + mockDb.integrationCheckRun.findMany.mockResolvedValue(appRuns); + mockDb.evidenceAutomationRun.findMany.mockResolvedValue(customRuns); + mockDb.attachment.findMany.mockResolvedValue(attachments); + } + + describe('streamTaskEvidenceZip', () => { + it('throws NotFoundException when task does not exist', async () => { + mockDb.task.findFirst.mockResolvedValue(null); + + await expect( + service.streamTaskEvidenceZip('org_1', 'tsk_missing'), + ).rejects.toBeInstanceOf(NotFoundException); + }); + + it('returns a filename scoped to org + task + date', async () => { + primeTaskQueries({}); + + const { filename } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + + expect(filename).toMatch( + /^acme-corp_soc-2-access-review_evidence_\d{4}-\d{2}-\d{2}\.zip$/, + ); + }); + + it('appends summary PDF then attachments (ordered before automations)', async () => { + const attachments = [ + { + id: 'att_1', + name: 'contract.pdf', + url: 'org_1/attachments/task/tsk_123/123-abc-contract.pdf', + type: 'document', + createdAt: new Date('2024-01-01'), + }, + ]; + + // Mock S3 GetObject to return a small Buffer body + (s3Client!.send as jest.Mock).mockResolvedValue({ + Body: Buffer.from('FAKE-PDF-BYTES'), + }); + + primeTaskQueries({ attachments }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const paths = mock.appendCalls.map((c) => c.options.name); + expect(paths[0]).toBe( + 'acme-corp_soc-2-access-review_evidence/00-summary.pdf', + ); + expect(paths[1]).toBe( + 'acme-corp_soc-2-access-review_evidence/01-attachments/contract.pdf', + ); + + // S3 hit with attachment's S3 key + expect(s3Client!.send).toHaveBeenCalledWith( + expect.any(GetObjectCommand), + ); + + // Summary PDF rendered with attachment count + expect(generateTaskSummaryPDF).toHaveBeenCalledWith( + expect.any(Object), + { attachmentsCount: 1 }, + ); + }); + + it('writes a placeholder when S3 object is missing', async () => { + const attachments = [ + { + id: 'att_missing', + name: 'ghost.pdf', + url: 'org_1/attachments/task/tsk_123/missing-ghost.pdf', + type: 'document', + createdAt: new Date(), + }, + ]; + + (s3Client!.send as jest.Mock).mockRejectedValue( + new Error('NoSuchKey: The specified key does not exist.'), + ); + primeTaskQueries({ attachments }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const placeholder = mock.appendCalls.find((c) => + c.options.name.includes('_MISSING_'), + ); + expect(placeholder).toBeDefined(); + expect(placeholder!.options.name).toMatch(/_MISSING_ghost\.pdf\.txt$/); + expect(String(placeholder!.source)).toContain('att_missing'); + }); + + it('disambiguates duplicate filenames within attachments folder', async () => { + const attachments = [ + { + id: 'att_a', + name: 'report.pdf', + url: 'key-a', + type: 'document', + createdAt: new Date('2024-01-01'), + }, + { + id: 'att_b', + name: 'report.pdf', + url: 'key-b', + type: 'document', + createdAt: new Date('2024-01-02'), + }, + ]; + + (s3Client!.send as jest.Mock).mockResolvedValue({ + Body: Buffer.from('PDF'), + }); + primeTaskQueries({ attachments }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const attachmentPaths = mock.appendCalls + .map((c) => c.options.name) + .filter((p) => p.includes('/01-attachments/')); + + expect(attachmentPaths).toHaveLength(2); + expect(attachmentPaths[0]).toMatch(/\/report\.pdf$/); + expect(attachmentPaths[1]).toMatch(/\/report \(1\)\.pdf$/); + }); + + it('queries only task-entity attachments (not vendor/risk/comment)', async () => { + primeTaskQueries({}); + await service.streamTaskEvidenceZip('org_1', 'tsk_123'); + const mock = archiveInstances[0]; + await mock.finalized; + + expect(mockDb.attachment.findMany).toHaveBeenCalledWith( + expect.objectContaining({ + where: expect.objectContaining({ + organizationId: 'org_1', + entityType: 'task', + entityId: 'tsk_123', + }), + }), + ); + }); + }); + + describe('streamOrganizationEvidenceZip', () => { + it('throws NotFoundException when organization does not exist', async () => { + mockDb.organization.findUnique.mockResolvedValue(null); + + await expect( + service.streamOrganizationEvidenceZip('org_missing'), + ).rejects.toBeInstanceOf(NotFoundException); + }); + + it('aborts archive with NotFoundException when no tasks have content', async () => { + mockDb.organization.findUnique.mockResolvedValue({ name: 'Acme' }); + mockDb.task.findMany.mockResolvedValue([]); + mockDb.attachment.findMany.mockResolvedValue([]); + + const { archive } = await service.streamOrganizationEvidenceZip( + 'org_1', + ); + const mock = archive as unknown as MockArchive; + + await expect(mock.finalized).rejects.toThrow('aborted'); + expect(mock.abort).toHaveBeenCalled(); + }); + + it('includes a task that has attachments but no automations', async () => { + mockDb.organization.findUnique.mockResolvedValue({ name: 'Acme Corp' }); + // findTasksWithEvidence: one has runs, one is distinct attachments-only + mockDb.task.findMany.mockResolvedValue([]); + mockDb.attachment.findMany + // First call: findTasksWithEvidence's distinct entityIds + .mockResolvedValueOnce([{ entityId: 'tsk_att_only' }]) + // Next calls: getTaskAttachments per task + .mockResolvedValue([ + { + id: 'att_1', + name: 'audit.pdf', + url: 'key', + type: 'document', + createdAt: new Date(), + }, + ]); + mockDb.task.findFirst.mockResolvedValue({ + id: 'tsk_att_only', + title: 'Attachments Only Task', + organization: { name: 'Acme Corp' }, + }); + mockDb.integrationCheckRun.findMany.mockResolvedValue([]); + mockDb.evidenceAutomationRun.findMany.mockResolvedValue([]); + (s3Client!.send as jest.Mock).mockResolvedValue({ + Body: Buffer.from('PDF'), + }); + + const { archive, filename } = + await service.streamOrganizationEvidenceZip('org_1'); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + expect(filename).toMatch(/^acme-corp_all-evidence_\d{4}-\d{2}-\d{2}\.zip$/); + + const paths = mock.appendCalls.map((c) => c.options.name); + expect(paths.some((p) => p.endsWith('/manifest.json'))).toBe(true); + expect(paths.some((p) => p.includes('/01-attachments/audit.pdf'))).toBe( + true, + ); + }); + }); +}); diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.ts index 90f4ee5785..7b68e1951b 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.ts @@ -1,11 +1,14 @@ import { Injectable, Logger, NotFoundException } from '@nestjs/common'; import { db } from '@db'; -import AdmZip from 'adm-zip'; +import { AttachmentEntityType } from '@db'; +import archiver, { type Archiver } from 'archiver'; import { format } from 'date-fns'; import { configure as configureStringify } from 'safe-stable-stringify'; import type { TaskEvidenceSummary, EvidenceExportResult, + EvidenceZipStream, + NormalizedAutomation, } from './evidence-export.types'; import { buildTaskEvidenceSummary } from './evidence-normalizer'; import { @@ -14,6 +17,12 @@ import { sanitizeFilename, } from './evidence-pdf-generator'; import { redactSensitiveData } from './evidence-redaction'; +import { + appendAttachmentToArchive, + createFilenameTracker, + getTaskAttachments, + type TaskAttachment, +} from './evidence-attachment-streamer'; // Configure safe stringify to handle BigInt, circular refs, etc. const safeStringify = configureStringify({ @@ -37,7 +46,6 @@ export class EvidenceExportService { organizationId, taskId, }); - // Get task with organization const task = await db.task.findFirst({ where: { id: taskId, @@ -54,7 +62,6 @@ export class EvidenceExportService { throw new NotFoundException('Task not found'); } - // Get app automation runs (integration check runs) const appAutomationRuns = await db.integrationCheckRun.findMany({ where: { taskId }, include: { @@ -68,21 +75,15 @@ export class EvidenceExportService { orderBy: { createdAt: 'desc' }, }); - // Get custom automation runs (evidence automation runs) - // Only include published runs (version !== null), exclude draft runs + // Only include published custom automation runs (version !== null). const customAutomationRuns = await db.evidenceAutomationRun.findMany({ where: { - evidenceAutomation: { - taskId, - }, + evidenceAutomation: { taskId }, version: { not: null }, }, include: { evidenceAutomation: { - select: { - id: true, - name: true, - }, + select: { id: true, name: true }, }, }, orderBy: { createdAt: 'desc' }, @@ -160,7 +161,7 @@ export class EvidenceExportService { } /** - * Export a single automation's evidence as PDF + * Export a single automation's evidence as PDF (small, buffered — no stream). */ async exportAutomationPDF( organizationId: string, @@ -198,109 +199,216 @@ export class EvidenceExportService { } /** - * Export all evidence for a task as a ZIP file + * Stream all evidence for a task as a ZIP. + * Returns a live archiver — caller pipes it to an HTTP response. S3 bodies + * stream through so peak memory ≈ the largest single file. */ - async exportTaskEvidenceZip( + async streamTaskEvidenceZip( organizationId: string, taskId: string, options: { includeRawJson?: boolean } = {}, - ): Promise { - const summary = await this.getTaskEvidenceSummary(organizationId, taskId); + ): Promise { + const task = await db.task.findFirst({ + where: { id: taskId, organizationId }, + include: { organization: { select: { name: true } } }, + }); + + if (!task) { + throw new NotFoundException('Task not found'); + } + + const folderName = `${sanitizeFilename(task.organization.name)}_${sanitizeFilename(task.title)}_evidence`; + const filename = `${folderName}_${format(new Date(), 'yyyy-MM-dd')}.zip`; + + const archive = archiver('zip', { zlib: { level: 6 } }); + archive.on('warning', (err) => { + this.logger.warn(`Archive warning (task ${taskId}): ${err.message}`); + }); + archive.on('error', (err) => { + this.logger.error(`Archive error (task ${taskId}): ${err.message}`); + }); + + void this.populateTaskArchive({ + archive, + organizationId, + taskId, + folderName, + options, + }).catch((err) => { + this.logger.error( + `Failed to populate task ZIP for ${taskId}: ${ + err instanceof Error ? err.stack : String(err) + }`, + ); + archive.abort(); + }); - const zip = new AdmZip(); - const folderName = `${sanitizeFilename(summary.organizationName)}_${sanitizeFilename(summary.taskTitle)}_evidence`; + return { archive, filename }; + } - // Add summary PDF - const summaryPdf = generateTaskSummaryPDF(summary); - zip.addFile(`${folderName}/00-summary.pdf`, summaryPdf); + private async populateTaskArchive(params: { + archive: Archiver; + organizationId: string; + taskId: string; + folderName: string; + options: { includeRawJson?: boolean }; + }): Promise { + const { archive, organizationId, taskId, folderName, options } = params; + + const [summary, attachments] = await Promise.all([ + this.getTaskEvidenceSummary(organizationId, taskId), + getTaskAttachments(organizationId, taskId), + ]); + + await this.appendTaskContents({ + archive, + summary, + attachments, + folderName, + options, + perAutomationSubfolders: true, + }); + + await archive.finalize(); + + this.logger.log('Task evidence ZIP streamed', { + organizationId, + taskId, + automations: summary.automations.length, + attachments: attachments.length, + includeRawJson: !!options.includeRawJson, + }); + } + + /** + * Append a task's contents (summary PDF, attachments, automation PDFs/JSON) + * to the archive under `folderName`. Shared by per-task and org-wide exports. + */ + private async appendTaskContents(params: { + archive: Archiver; + summary: TaskEvidenceSummary; + attachments: TaskAttachment[]; + folderName: string; + options: { includeRawJson?: boolean }; + perAutomationSubfolders: boolean; + }): Promise { + const { + archive, + summary, + attachments, + folderName, + options, + perAutomationSubfolders, + } = params; + + const summaryPdf = generateTaskSummaryPDF(summary, { + attachmentsCount: attachments.length, + }); + archive.append(summaryPdf, { name: `${folderName}/00-summary.pdf` }); + + if (attachments.length > 0) { + const uniqueName = createFilenameTracker(); + for (const attachment of attachments) { + await appendAttachmentToArchive({ + archive, + attachment, + folderPath: `${folderName}/01-attachments`, + uniqueName, + }); + } + } - // Add individual automation PDFs for (const automation of summary.automations) { const typePrefix = automation.type === 'app_automation' ? 'app' : 'custom'; - // Include short ID suffix to prevent path collisions when names sanitize identically + const automationName = sanitizeFilename(automation.name); const idSuffix = automation.id.slice(-8); - const automationFolder = `${folderName}/${typePrefix}-${sanitizeFilename(automation.name)}-${idSuffix}`; - // Generate PDF for this automation const pdfBuffer = generateAutomationPDF(automation, { organizationName: summary.organizationName, taskTitle: summary.taskTitle, }); - zip.addFile(`${automationFolder}/evidence.pdf`, pdfBuffer); - - // Optionally include raw JSON - if (options.includeRawJson) { - const jsonData = safeStringify( - redactSensitiveData({ - automation: { - id: automation.id, - name: automation.name, - type: automation.type, - integrationName: automation.integrationName, - totalRuns: automation.totalRuns, - successfulRuns: automation.successfulRuns, - failedRuns: automation.failedRuns, - latestRunAt: automation.latestRunAt, + if (perAutomationSubfolders) { + const sub = `${folderName}/${typePrefix}-${automationName}-${idSuffix}`; + archive.append(pdfBuffer, { name: `${sub}/evidence.pdf` }); + if (options.includeRawJson) { + archive.append( + Buffer.from( + this.buildAutomationJson(summary, automation), + 'utf-8', + ), + { name: `${sub}/evidence.json` }, + ); + } + } else { + archive.append(pdfBuffer, { + name: `${folderName}/${typePrefix}-${automationName}-${idSuffix}.pdf`, + }); + if (options.includeRawJson) { + archive.append( + Buffer.from( + this.buildAutomationJson(summary, automation), + 'utf-8', + ), + { + name: `${folderName}/${typePrefix}-${automationName}-${idSuffix}.json`, }, - runs: automation.runs.map((run) => ({ - id: run.id, - status: run.status, - startedAt: run.startedAt, - completedAt: run.completedAt, - durationMs: run.durationMs, - totalChecked: run.totalChecked, - passedCount: run.passedCount, - failedCount: run.failedCount, - evaluationStatus: run.evaluationStatus, - evaluationReason: run.evaluationReason, - logs: run.logs, - output: run.output, - error: run.error, - results: run.results, - createdAt: run.createdAt, - })), - exportedAt: summary.exportedAt, - }), - null, - 2, - ); - - zip.addFile( - `${automationFolder}/evidence.json`, - Buffer.from(jsonData ?? '{}', 'utf-8'), - ); + ); + } } } + } - const filename = `${folderName}_${format(new Date(), 'yyyy-MM-dd')}.zip`; - - const zipBuffer = zip.toBuffer(); - - this.logger.log('Task evidence ZIP generated', { - organizationId, - taskId, - automations: summary.automations.length, - includeRawJson: !!options.includeRawJson, - zipBytes: zipBuffer.length, - }); - - return { - fileBuffer: zipBuffer, - mimeType: 'application/zip', - filename, - }; + private buildAutomationJson( + summary: TaskEvidenceSummary, + automation: NormalizedAutomation, + ): string { + return ( + safeStringify( + redactSensitiveData({ + automation: { + id: automation.id, + name: automation.name, + type: automation.type, + integrationName: automation.integrationName, + totalRuns: automation.totalRuns, + successfulRuns: automation.successfulRuns, + failedRuns: automation.failedRuns, + latestRunAt: automation.latestRunAt, + }, + runs: automation.runs.map((run) => ({ + id: run.id, + status: run.status, + startedAt: run.startedAt, + completedAt: run.completedAt, + durationMs: run.durationMs, + totalChecked: run.totalChecked, + passedCount: run.passedCount, + failedCount: run.failedCount, + evaluationStatus: run.evaluationStatus, + evaluationReason: run.evaluationReason, + logs: run.logs, + output: run.output, + error: run.error, + results: run.results, + createdAt: run.createdAt, + })), + exportedAt: summary.exportedAt, + }), + null, + 2, + ) ?? '{}' + ); } /** - * Export all evidence for an organization (auditor bulk export) + * Stream all evidence across an organization as a ZIP. */ - async exportOrganizationEvidenceZip( + async streamOrganizationEvidenceZip( organizationId: string, options: { includeRawJson?: boolean } = {}, - ): Promise { - // Get organization + ): Promise { const organization = await db.organization.findUnique({ where: { id: organizationId }, select: { name: true }, @@ -310,134 +418,172 @@ export class EvidenceExportService { throw new NotFoundException('Organization not found'); } - // Get all tasks with automation runs (only count published runs for custom automations) - const tasksWithRuns = await db.task.findMany({ - where: { - organizationId, - OR: [ - { - integrationCheckRuns: { - some: {}, - }, - }, - { - evidenceAutomations: { - some: { - runs: { - some: { version: { not: null } }, - }, - }, - }, - }, - ], - }, - select: { - id: true, - title: true, - }, - orderBy: { title: 'asc' }, + const orgFolder = sanitizeFilename(organization.name); + const exportDate = format(new Date(), 'yyyy-MM-dd'); + const filename = `${orgFolder}_all-evidence_${exportDate}.zip`; + + const archive = archiver('zip', { zlib: { level: 6 } }); + archive.on('warning', (err) => { + this.logger.warn( + `Archive warning (org ${organizationId}): ${err.message}`, + ); + }); + archive.on('error', (err) => { + this.logger.error( + `Archive error (org ${organizationId}): ${err.message}`, + ); }); - if (tasksWithRuns.length === 0) { - throw new NotFoundException('No tasks with evidence found'); + void this.populateOrganizationArchive({ + archive, + organizationId, + organizationName: organization.name, + orgFolder, + options, + }).catch((err) => { + this.logger.error( + `Failed to populate org ZIP for ${organizationId}: ${ + err instanceof Error ? err.stack : String(err) + }`, + ); + archive.abort(); + }); + + return { archive, filename }; + } + + private async populateOrganizationArchive(params: { + archive: Archiver; + organizationId: string; + organizationName: string; + orgFolder: string; + options: { includeRawJson?: boolean }; + }): Promise { + const { archive, organizationId, organizationName, orgFolder, options } = + params; + + const taskIds = await this.findTasksWithEvidence(organizationId); + + const tasksWithData: Array<{ + id: string; + title: string; + summary: TaskEvidenceSummary; + attachments: TaskAttachment[]; + }> = []; + + for (const taskId of taskIds) { + try { + const [summary, attachments] = await Promise.all([ + this.getTaskEvidenceSummary(organizationId, taskId), + getTaskAttachments(organizationId, taskId), + ]); + if (summary.automations.length === 0 && attachments.length === 0) { + continue; + } + tasksWithData.push({ + id: summary.taskId, + title: summary.taskTitle, + summary, + attachments, + }); + } catch (error) { + this.logger.warn( + `Failed to prepare task ${taskId} for export: ${ + error instanceof Error ? error.message : String(error) + }`, + ); + } } - const zip = new AdmZip(); - const orgFolder = sanitizeFilename(organization.name); - const exportDate = format(new Date(), 'yyyy-MM-dd'); + if (tasksWithData.length === 0) { + throw new NotFoundException( + 'No tasks with evidence or attachments found', + ); + } + + tasksWithData.sort((a, b) => a.title.localeCompare(b.title)); - // Add a manifest file const manifest = { - organization: organization.name, + organization: organizationName, organizationId, exportedAt: new Date().toISOString(), - tasksCount: tasksWithRuns.length, - tasks: tasksWithRuns.map((t) => ({ id: t.id, title: t.title })), + tasksCount: tasksWithData.length, + totalAttachments: tasksWithData.reduce( + (sum, t) => sum + t.attachments.length, + 0, + ), + tasks: tasksWithData.map((t) => ({ + id: t.id, + title: t.title, + automations: t.summary.automations.length, + attachments: t.attachments.length, + })), }; - zip.addFile( - `${orgFolder}/manifest.json`, + archive.append( Buffer.from(safeStringify(manifest, null, 2) ?? '{}', 'utf-8'), + { name: `${orgFolder}/manifest.json` }, ); - // Export each task - for (const task of tasksWithRuns) { - try { - const summary = await this.getTaskEvidenceSummary( - organizationId, - task.id, - ); - - if (summary.automations.length === 0) { - continue; - } - - // Include short task ID suffix to prevent path collisions - const taskIdSuffix = task.id.slice(-8); - const taskFolder = `${orgFolder}/${sanitizeFilename(task.title)}-${taskIdSuffix}`; - - // Add task summary PDF - const summaryPdf = generateTaskSummaryPDF(summary); - zip.addFile(`${taskFolder}/00-summary.pdf`, summaryPdf); - - // Add automation PDFs - for (const automation of summary.automations) { - const typePrefix = - automation.type === 'app_automation' ? 'app' : 'custom'; - const automationName = sanitizeFilename(automation.name); - // Include short automation ID suffix to prevent path collisions - const automationIdSuffix = automation.id.slice(-8); - - const pdfBuffer = generateAutomationPDF(automation, { - organizationName: summary.organizationName, - taskTitle: summary.taskTitle, - }); - - zip.addFile( - `${taskFolder}/${typePrefix}-${automationName}-${automationIdSuffix}.pdf`, - pdfBuffer, - ); - - // Optional JSON - if (options.includeRawJson) { - const jsonData = safeStringify( - redactSensitiveData({ - automation: { - id: automation.id, - name: automation.name, - type: automation.type, - }, - runs: automation.runs, - exportedAt: summary.exportedAt, - }), - null, - 2, - ); - zip.addFile( - `${taskFolder}/${typePrefix}-${automationName}-${automationIdSuffix}.json`, - Buffer.from(jsonData ?? '{}', 'utf-8'), - ); - } - } - } catch (error) { - this.logger.warn(`Failed to export task ${task.id}: ${error}`); - } + for (const task of tasksWithData) { + const taskIdSuffix = task.id.slice(-8); + const taskFolder = `${orgFolder}/${sanitizeFilename(task.title)}-${taskIdSuffix}`; + + await this.appendTaskContents({ + archive, + summary: task.summary, + attachments: task.attachments, + folderName: taskFolder, + options, + // Org-wide keeps automation files flat (existing convention). + perAutomationSubfolders: false, + }); } - const filename = `${orgFolder}_all-evidence_${exportDate}.zip`; + await archive.finalize(); - const zipBuffer = zip.toBuffer(); - - this.logger.log('Organization evidence ZIP generated', { + this.logger.log('Organization evidence ZIP streamed', { organizationId, - tasks: tasksWithRuns.length, + tasks: tasksWithData.length, + totalAttachments: manifest.totalAttachments, includeRawJson: !!options.includeRawJson, - zipBytes: zipBuffer.length, }); + } - return { - fileBuffer: zipBuffer, - mimeType: 'application/zip', - filename, - }; + /** + * Find task IDs that have either automation runs or task attachments. + * Union of two cheap queries — avoids scanning every task for the org. + */ + private async findTasksWithEvidence( + organizationId: string, + ): Promise { + const [tasksWithRuns, taskAttachments] = await Promise.all([ + db.task.findMany({ + where: { + organizationId, + OR: [ + { integrationCheckRuns: { some: {} } }, + { + evidenceAutomations: { + some: { runs: { some: { version: { not: null } } } }, + }, + }, + ], + }, + select: { id: true }, + }), + db.attachment.findMany({ + where: { + organizationId, + entityType: AttachmentEntityType.task, + }, + select: { entityId: true }, + distinct: ['entityId'], + }), + ]); + + const ids = new Set(); + for (const t of tasksWithRuns) ids.add(t.id); + for (const a of taskAttachments) ids.add(a.entityId); + return Array.from(ids); } } diff --git a/apps/api/src/tasks/evidence-export/evidence-export.types.ts b/apps/api/src/tasks/evidence-export/evidence-export.types.ts index 787f10b190..39bdd1ea58 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.types.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.types.ts @@ -88,10 +88,23 @@ export interface EvidenceExportOptions { } /** - * Export result + * Export result for a single PDF (buffered — small, fits in memory safely) */ export interface EvidenceExportResult { fileBuffer: Buffer; mimeType: string; filename: string; } + +/** + * Streaming export result for ZIP bundles. + * The archive is returned live — the caller pipes it to a response and the + * service populates it asynchronously. Archive memory stays ≈ one file at a + * time instead of buffering the whole ZIP. + */ +export interface EvidenceZipStream { + /** Active archiver — already being populated. Pipe to a response / S3 stream. */ + archive: import('archiver').Archiver; + /** Suggested Content-Disposition filename. */ + filename: string; +} diff --git a/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts b/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts index 17601bb69c..cde5bb2122 100644 --- a/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts +++ b/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts @@ -512,7 +512,10 @@ function getStatusColor( /** * Generate a summary PDF for a task with all automations */ -export function generateTaskSummaryPDF(summary: TaskEvidenceSummary): Buffer { +export function generateTaskSummaryPDF( + summary: TaskEvidenceSummary, + options?: { attachmentsCount?: number }, +): Buffer { const doc = new jsPDF(); const config: PDFConfig = { doc, @@ -539,8 +542,8 @@ export function generateTaskSummaryPDF(summary: TaskEvidenceSummary): Buffer { addSeparator(config); - // Automations overview - addSectionHeader(config, 'Automations Overview'); + // Contents overview + addSectionHeader(config, 'Contents'); const appAutomations = summary.automations.filter( (a) => a.type === 'app_automation', @@ -549,6 +552,10 @@ export function generateTaskSummaryPDF(summary: TaskEvidenceSummary): Buffer { (a) => a.type === 'custom_automation', ); + const attachmentsCount = options?.attachmentsCount ?? 0; + if (attachmentsCount > 0) { + addText(config, `Attachments: ${attachmentsCount}`); + } addText(config, `Total Automations: ${summary.automations.length}`); addText(config, `App Automations: ${appAutomations.length}`); addText(config, `Custom Automations: ${customAutomations.length}`); From 3814e33c31da2e40ef5fed43f2f463e599497a55 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 22 Apr 2026 17:05:14 -0400 Subject: [PATCH 2/5] fix(evidence-export): address review feedback on streaming export MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four issues from the PR review, all legitimate: - Attachment streamer treated every S3 failure as a missing object. Now only NoSuchKey / HTTP 404 produce a `_MISSING_*.txt` placeholder. Network, permission, and throttling errors rethrow so the archive aborts and the user sees a real failure instead of a silently-incomplete bundle. - `streamOrganizationEvidenceZip` was throwing NotFoundException from the async populate task, which fired after headers were already sent — the client got a broken stream instead of a 404. Hoisted the empty-content check to pre-flight so it becomes a proper HTTP 404. - Controllers now listen for client disconnect (`req.on('close')`) and abort the archive so cancelled downloads stop consuming backend resources (S3 fetches, background populate task). - Org populate no longer buffers all task summaries into an array before writing. Each task is streamed into the archive as it's processed, and only a lightweight manifest (id / title / counts) is accumulated. Manifest is written last. Tests: 31 passing (was 29) — added AccessDenied rethrow, client-disconnect abort. Pre-flight 404 test now asserts the throw is synchronous and no archive is created. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../evidence-attachment-streamer.ts | 46 ++++++-- .../evidence-export.controller.spec.ts | 50 +++++++-- .../evidence-export.controller.ts | 77 +++++++++---- .../evidence-export.service.spec.ts | 61 +++++++++-- .../evidence-export.service.ts | 103 ++++++++++-------- 5 files changed, 242 insertions(+), 95 deletions(-) diff --git a/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts index 9ee5e31808..8bb7bb884f 100644 --- a/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts +++ b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts @@ -71,8 +71,12 @@ export function createFilenameTracker(): (rawName: string) => string { /** * Append a single attachment to the archive by streaming its S3 body. - * If the object is missing (deleted from S3 but DB row still exists), writes a - * plaintext placeholder so the bundle remains auditable instead of failing. + * + * - Genuine missing-object errors (`NoSuchKey` / HTTP 404) → write a + * `_MISSING_.txt` placeholder so the bundle stays auditable. + * - All other failures (network, permissions, throttling, empty body) → rethrow + * so the archive aborts and the user sees a real failure instead of silently + * receiving an incomplete export. */ export async function appendAttachmentToArchive(params: { archive: Archiver; @@ -83,14 +87,11 @@ export async function appendAttachmentToArchive(params: { const { archive, attachment, folderPath, uniqueName } = params; if (!s3Client || !BUCKET_NAME) { - logger.warn( - `S3 client unavailable — attachment ${attachment.id} skipped with placeholder`, - ); - archive.append( - buildMissingPlaceholder(attachment, 'S3 client not configured'), - { name: `${folderPath}/_MISSING_${uniqueName(attachment.name)}.txt` }, + // Misconfiguration at process level — fail the whole export, don't silently + // produce placeholders for every attachment. + throw new Error( + 'S3 client or bucket not configured; cannot stream attachments', ); - return; } try { @@ -114,6 +115,15 @@ export async function appendAttachmentToArchive(params: { name: `${folderPath}/${uniqueName(attachment.name)}`, }); } catch (error) { + if (!isS3MissingObjectError(error)) { + logger.error( + `Failed to fetch attachment ${attachment.id} (key=${attachment.url}): ${ + error instanceof Error ? error.message : String(error) + }`, + ); + throw error; + } + const message = error instanceof Error ? error.message : String(error); logger.warn( `Missing S3 object for attachment ${attachment.id} (key=${attachment.url}): ${message}`, @@ -124,6 +134,24 @@ export async function appendAttachmentToArchive(params: { } } +/** + * True only for "the object does not exist" — NoSuchKey or HTTP 404. + * Everything else (AccessDenied, SlowDown, NetworkError, timeouts) is treated + * as a real failure that should surface, not a silent skip. + */ +function isS3MissingObjectError(error: unknown): boolean { + if (!error || typeof error !== 'object') return false; + const err = error as { + name?: string; + Code?: string; + $metadata?: { httpStatusCode?: number }; + }; + if (err.name === 'NoSuchKey' || err.name === 'NotFound') return true; + if (err.Code === 'NoSuchKey' || err.Code === 'NotFound') return true; + if (err.$metadata?.httpStatusCode === 404) return true; + return false; +} + function buildMissingPlaceholder( attachment: TaskAttachment, reason: string, diff --git a/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts index bb37d54ea4..082d9a1b09 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts @@ -50,20 +50,23 @@ function makeFakeArchive() { } function makeFakeResponse() { - const res: { - setHeader: jest.Mock; - status: jest.Mock; - end: jest.Mock; - headersSent: boolean; - } = { + const emitter = new EventEmitter(); + const res = Object.assign(emitter, { setHeader: jest.fn(), - status: jest.fn(() => res), + status: jest.fn(function (this: unknown) { + return res; + }), end: jest.fn(), headersSent: false, - }; + writableEnded: false, + }); return res; } +function makeFakeRequest() { + return new EventEmitter(); +} + describe('EvidenceExportController', () => { let controller: EvidenceExportController; let service: jest.Mocked< @@ -106,12 +109,14 @@ describe('EvidenceExportController', () => { archive: archive as unknown as import('archiver').Archiver, filename: 'acme_mytask_evidence_2026-04-22.zip', }); + const req = makeFakeRequest(); const res = makeFakeResponse(); await controller.exportTaskEvidenceZip( 'org_1', 'tsk_1', 'true', + req as unknown as import('express').Request, res as unknown as import('express').Response, ); @@ -138,12 +143,14 @@ describe('EvidenceExportController', () => { archive: archive as unknown as import('archiver').Archiver, filename: 'f.zip', }); + const req = makeFakeRequest(); const res = makeFakeResponse(); await controller.exportTaskEvidenceZip( 'org_1', 'tsk_1', undefined as unknown as string, + req as unknown as import('express').Request, res as unknown as import('express').Response, ); @@ -153,6 +160,31 @@ describe('EvidenceExportController', () => { { includeRawJson: false }, ); }); + + it('aborts the archive when the client disconnects mid-stream', async () => { + const archive = makeFakeArchive(); + service.streamTaskEvidenceZip.mockResolvedValue({ + archive: archive as unknown as import('archiver').Archiver, + filename: 'f.zip', + }); + const req = makeFakeRequest(); + const res = makeFakeResponse(); + + await controller.exportTaskEvidenceZip( + 'org_1', + 'tsk_1', + 'false', + req as unknown as import('express').Request, + res as unknown as import('express').Response, + ); + + expect(archive.abort).not.toHaveBeenCalled(); + + // Simulate client closing the connection before the stream finished. + req.emit('close'); + + expect(archive.abort).toHaveBeenCalledTimes(1); + }); }); describe('AuditorEvidenceExportController', () => { @@ -185,11 +217,13 @@ describe('AuditorEvidenceExportController', () => { archive: archive as unknown as import('archiver').Archiver, filename: 'acme_all-evidence_2026-04-22.zip', }); + const req = makeFakeRequest(); const res = makeFakeResponse(); await controller.exportAllEvidence( 'org_1', 'true', + req as unknown as import('express').Request, res as unknown as import('express').Response, ); diff --git a/apps/api/src/tasks/evidence-export/evidence-export.controller.ts b/apps/api/src/tasks/evidence-export/evidence-export.controller.ts index 6756260967..8560daa8ee 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.controller.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.controller.ts @@ -3,6 +3,7 @@ import { Get, Param, Query, + Req, Res, UseGuards, Logger, @@ -15,7 +16,8 @@ import { ApiSecurity, ApiTags, } from '@nestjs/swagger'; -import type { Response } from 'express'; +import type { Request, Response } from 'express'; +import type { Archiver } from 'archiver'; import { AuditRead } from '../../audit/skip-audit-log.decorator'; import { OrganizationId } from '../../auth/auth-context.decorator'; import { HybridAuthGuard } from '../../auth/hybrid-auth.guard'; @@ -164,6 +166,7 @@ export class EvidenceExportController { @OrganizationId() organizationId: string, @Param('taskId') taskId: string, @Query('includeJson') includeJson: string, + @Req() req: Request, @Res() res: Response, ) { this.logger.log('Export task evidence ZIP', { @@ -186,16 +189,13 @@ export class EvidenceExportController { `attachment; filename="${filename}"`, ); - archive.on('error', (err) => { - this.logger.error(`Archive stream error for task ${taskId}: ${err.message}`); - if (!res.headersSent) { - res.status(500).end(); - } else { - res.end(); - } + pipeArchiveToResponse({ + archive, + req, + res, + logger: this.logger, + tag: `task ${taskId}`, }); - - archive.pipe(res); } } @@ -242,6 +242,7 @@ export class AuditorEvidenceExportController { async exportAllEvidence( @OrganizationId() organizationId: string, @Query('includeJson') includeJson: string, + @Req() req: Request, @Res() res: Response, ) { this.logger.log('Auditor exporting all evidence', { @@ -261,17 +262,51 @@ export class AuditorEvidenceExportController { `attachment; filename="${filename}"`, ); - archive.on('error', (err) => { - this.logger.error( - `Archive stream error for org ${organizationId}: ${err.message}`, - ); - if (!res.headersSent) { - res.status(500).end(); - } else { - res.end(); - } + pipeArchiveToResponse({ + archive, + req, + res, + logger: this.logger, + tag: `org ${organizationId}`, }); - - archive.pipe(res); } } + +/** + * Wire an archive to the HTTP response with two concerns: + * 1. Archive errors → log and end the response (500 if headers not yet sent). + * 2. Client disconnect → abort the archive so S3 fetches stop and the + * background populate task doesn't keep running for a closed socket. + */ +function pipeArchiveToResponse(params: { + archive: Archiver; + req: Request; + res: Response; + logger: Logger; + tag: string; +}): void { + const { archive, req, res, logger, tag } = params; + let aborted = false; + + const abortIfIncomplete = () => { + if (aborted) return; + if (res.writableEnded) return; + aborted = true; + logger.warn(`Client disconnected during export (${tag}); aborting archive`); + archive.abort(); + }; + + req.once('close', abortIfIncomplete); + res.once('close', abortIfIncomplete); + + archive.on('error', (err) => { + logger.error(`Archive stream error (${tag}): ${err.message}`); + if (!res.headersSent) { + res.status(500).end(); + } else if (!res.writableEnded) { + res.end(); + } + }); + + archive.pipe(res); +} diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts index 95ba24b6f1..01928c3ce6 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts @@ -199,7 +199,7 @@ describe('EvidenceExportService — streaming ZIPs', () => { ); }); - it('writes a placeholder when S3 object is missing', async () => { + it('writes a placeholder when S3 object is truly missing (NoSuchKey)', async () => { const attachments = [ { id: 'att_missing', @@ -210,9 +210,11 @@ describe('EvidenceExportService — streaming ZIPs', () => { }, ]; - (s3Client!.send as jest.Mock).mockRejectedValue( - new Error('NoSuchKey: The specified key does not exist.'), - ); + const noSuchKeyError = Object.assign(new Error('NoSuchKey'), { + name: 'NoSuchKey', + $metadata: { httpStatusCode: 404 }, + }); + (s3Client!.send as jest.Mock).mockRejectedValue(noSuchKeyError); primeTaskQueries({ attachments }); const { archive } = await service.streamTaskEvidenceZip( @@ -230,6 +232,41 @@ describe('EvidenceExportService — streaming ZIPs', () => { expect(String(placeholder!.source)).toContain('att_missing'); }); + it('aborts the archive on transient S3 failures (not a placeholder)', async () => { + const attachments = [ + { + id: 'att_err', + name: 'file.pdf', + url: 'org_1/attachments/task/tsk_123/file.pdf', + type: 'document', + createdAt: new Date(), + }, + ]; + + // AccessDenied — NOT a missing-object error; must surface as a failure. + const accessDeniedError = Object.assign(new Error('Access Denied'), { + name: 'AccessDenied', + $metadata: { httpStatusCode: 403 }, + }); + (s3Client!.send as jest.Mock).mockRejectedValue(accessDeniedError); + primeTaskQueries({ attachments }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + + await expect(mock.finalized).rejects.toThrow('aborted'); + expect(mock.abort).toHaveBeenCalled(); + + // No placeholder text file written for a non-missing error + const placeholder = mock.appendCalls.find((c) => + c.options.name.includes('_MISSING_'), + ); + expect(placeholder).toBeUndefined(); + }); + it('disambiguates duplicate filenames within attachments folder', async () => { const attachments = [ { @@ -296,18 +333,20 @@ describe('EvidenceExportService — streaming ZIPs', () => { ).rejects.toBeInstanceOf(NotFoundException); }); - it('aborts archive with NotFoundException when no tasks have content', async () => { + it('throws NotFoundException synchronously (pre-flight) when no tasks have content', async () => { + // Org exists but no tasks with automations and no attachments. mockDb.organization.findUnique.mockResolvedValue({ name: 'Acme' }); mockDb.task.findMany.mockResolvedValue([]); mockDb.attachment.findMany.mockResolvedValue([]); - const { archive } = await service.streamOrganizationEvidenceZip( - 'org_1', - ); - const mock = archive as unknown as MockArchive; + // Must reject synchronously — before an archive is returned — so the + // controller can produce a real HTTP 404 instead of a broken streamed ZIP. + await expect( + service.streamOrganizationEvidenceZip('org_1'), + ).rejects.toBeInstanceOf(NotFoundException); - await expect(mock.finalized).rejects.toThrow('aborted'); - expect(mock.abort).toHaveBeenCalled(); + // No archive should have been created at all. + expect(archiveInstances).toHaveLength(0); }); it('includes a task that has attachments but no automations', async () => { diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.ts index 7b68e1951b..2e98851eb1 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.ts @@ -404,6 +404,10 @@ export class EvidenceExportService { /** * Stream all evidence across an organization as a ZIP. + * + * Pre-flight checks (organization exists, has content) run synchronously + * before the archive is created so failures turn into proper HTTP errors + * instead of mid-stream aborts with headers already sent. */ async streamOrganizationEvidenceZip( organizationId: string, @@ -418,6 +422,14 @@ export class EvidenceExportService { throw new NotFoundException('Organization not found'); } + // Pre-flight: error synchronously if nothing to export. + const taskIds = await this.findTasksWithEvidence(organizationId); + if (taskIds.length === 0) { + throw new NotFoundException( + 'No tasks with evidence or attachments found', + ); + } + const orgFolder = sanitizeFilename(organization.name); const exportDate = format(new Date(), 'yyyy-MM-dd'); const filename = `${orgFolder}_all-evidence_${exportDate}.zip`; @@ -439,6 +451,7 @@ export class EvidenceExportService { organizationId, organizationName: organization.name, orgFolder, + taskIds, options, }).catch((err) => { this.logger.error( @@ -457,19 +470,27 @@ export class EvidenceExportService { organizationId: string; organizationName: string; orgFolder: string; + taskIds: string[]; options: { includeRawJson?: boolean }; }): Promise { - const { archive, organizationId, organizationName, orgFolder, options } = - params; - - const taskIds = await this.findTasksWithEvidence(organizationId); + const { + archive, + organizationId, + organizationName, + orgFolder, + taskIds, + options, + } = params; - const tasksWithData: Array<{ + // Stream tasks one at a time. Only keep lightweight manifest metadata in + // memory (task title + counts) — never hold all summaries simultaneously. + const manifestEntries: Array<{ id: string; title: string; - summary: TaskEvidenceSummary; - attachments: TaskAttachment[]; + automations: number; + attachments: number; }> = []; + let totalAttachments = 0; for (const taskId of taskIds) { try { @@ -477,74 +498,64 @@ export class EvidenceExportService { this.getTaskEvidenceSummary(organizationId, taskId), getTaskAttachments(organizationId, taskId), ]); + if (summary.automations.length === 0 && attachments.length === 0) { continue; } - tasksWithData.push({ - id: summary.taskId, - title: summary.taskTitle, + + const taskIdSuffix = summary.taskId.slice(-8); + const taskFolder = `${orgFolder}/${sanitizeFilename(summary.taskTitle)}-${taskIdSuffix}`; + + await this.appendTaskContents({ + archive, summary, attachments, + folderName: taskFolder, + options, + // Org-wide keeps automation files flat (existing convention). + perAutomationSubfolders: false, + }); + + manifestEntries.push({ + id: summary.taskId, + title: summary.taskTitle, + automations: summary.automations.length, + attachments: attachments.length, }); + totalAttachments += attachments.length; } catch (error) { this.logger.warn( - `Failed to prepare task ${taskId} for export: ${ + `Failed to export task ${taskId}: ${ error instanceof Error ? error.message : String(error) }`, ); } } - if (tasksWithData.length === 0) { - throw new NotFoundException( - 'No tasks with evidence or attachments found', - ); - } - - tasksWithData.sort((a, b) => a.title.localeCompare(b.title)); + manifestEntries.sort((a, b) => a.title.localeCompare(b.title)); + // Manifest written last — archiver doesn't care about append order for the + // final ZIP structure, and this lets us include final counts without a + // separate aggregation pass. const manifest = { organization: organizationName, organizationId, exportedAt: new Date().toISOString(), - tasksCount: tasksWithData.length, - totalAttachments: tasksWithData.reduce( - (sum, t) => sum + t.attachments.length, - 0, - ), - tasks: tasksWithData.map((t) => ({ - id: t.id, - title: t.title, - automations: t.summary.automations.length, - attachments: t.attachments.length, - })), + tasksCount: manifestEntries.length, + totalAttachments, + tasks: manifestEntries, }; archive.append( Buffer.from(safeStringify(manifest, null, 2) ?? '{}', 'utf-8'), { name: `${orgFolder}/manifest.json` }, ); - for (const task of tasksWithData) { - const taskIdSuffix = task.id.slice(-8); - const taskFolder = `${orgFolder}/${sanitizeFilename(task.title)}-${taskIdSuffix}`; - - await this.appendTaskContents({ - archive, - summary: task.summary, - attachments: task.attachments, - folderName: taskFolder, - options, - // Org-wide keeps automation files flat (existing convention). - perAutomationSubfolders: false, - }); - } - await archive.finalize(); this.logger.log('Organization evidence ZIP streamed', { organizationId, - tasks: tasksWithData.length, - totalAttachments: manifest.totalAttachments, + tasks: manifestEntries.length, + totalAttachments, includeRawJson: !!options.includeRawJson, }); } From 317d407b23c089235990c754e5afd1757467d2d1 Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 22 Apr 2026 17:12:45 -0400 Subject: [PATCH 3/5] fix(evidence-export): tighten S3 missing-object check to specific error codes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The blanket `httpStatusCode === 404` fallback in `isS3MissingObjectError` misclassified `NoSuchBucket` (bucket misconfigured) as a per-attachment missing object. That would have produced an export that completed "successfully" with nothing but `_MISSING_*.txt` placeholders — worse than failing outright, because the customer would have no idea their bundle contains none of the actual evidence. Now we only treat the specific error codes `NoSuchKey` and `NotFound` as missing; everything else (including other 404s like `NoSuchBucket`) rethrows and aborts the archive. Added a regression test asserting NoSuchBucket aborts the archive rather than producing a placeholder. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../evidence-attachment-streamer.ts | 21 +++++------ .../evidence-export.service.spec.ts | 36 +++++++++++++++++++ 2 files changed, 45 insertions(+), 12 deletions(-) diff --git a/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts index 8bb7bb884f..36459970ae 100644 --- a/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts +++ b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts @@ -135,21 +135,18 @@ export async function appendAttachmentToArchive(params: { } /** - * True only for "the object does not exist" — NoSuchKey or HTTP 404. - * Everything else (AccessDenied, SlowDown, NetworkError, timeouts) is treated - * as a real failure that should surface, not a silent skip. + * True only for "the object does not exist" — specifically `NoSuchKey` (or + * `NotFound` for HeadObject semantics). Anything else — including the other + * 404s like `NoSuchBucket`, or 403s like `AccessDenied` — is a real failure + * that must surface, not a silent per-attachment skip. A misconfigured bucket + * returning NoSuchBucket would otherwise produce an export full of placeholders + * that looks "successful" but contains none of the customer's evidence. */ function isS3MissingObjectError(error: unknown): boolean { if (!error || typeof error !== 'object') return false; - const err = error as { - name?: string; - Code?: string; - $metadata?: { httpStatusCode?: number }; - }; - if (err.name === 'NoSuchKey' || err.name === 'NotFound') return true; - if (err.Code === 'NoSuchKey' || err.Code === 'NotFound') return true; - if (err.$metadata?.httpStatusCode === 404) return true; - return false; + const err = error as { name?: string; Code?: string }; + const code = err.name ?? err.Code; + return code === 'NoSuchKey' || code === 'NotFound'; } function buildMissingPlaceholder( diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts index 01928c3ce6..380f392bc0 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts @@ -267,6 +267,42 @@ describe('EvidenceExportService — streaming ZIPs', () => { expect(placeholder).toBeUndefined(); }); + it('aborts on NoSuchBucket (a 404 that is NOT a missing object)', async () => { + const attachments = [ + { + id: 'att_bucket', + name: 'file.pdf', + url: 'org_1/attachments/task/tsk_123/file.pdf', + type: 'document', + createdAt: new Date(), + }, + ]; + + // Bucket misconfiguration — returns HTTP 404 but must NOT be + // treated as a single missing attachment. Otherwise the export looks + // "successful" while silently containing only placeholders. + const noSuchBucketError = Object.assign(new Error('NoSuchBucket'), { + name: 'NoSuchBucket', + $metadata: { httpStatusCode: 404 }, + }); + (s3Client!.send as jest.Mock).mockRejectedValue(noSuchBucketError); + primeTaskQueries({ attachments }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + + await expect(mock.finalized).rejects.toThrow('aborted'); + expect(mock.abort).toHaveBeenCalled(); + + const placeholder = mock.appendCalls.find((c) => + c.options.name.includes('_MISSING_'), + ); + expect(placeholder).toBeUndefined(); + }); + it('disambiguates duplicate filenames within attachments folder', async () => { const attachments = [ { From a4db5a2553b59feeec02d32bced0b846def878aa Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 22 Apr 2026 17:57:44 -0400 Subject: [PATCH 4/5] test(evidence-export): lock in automation-only and mixed-org scenarios Add explicit regression tests for the three scenarios that aren't covered by the attachment-focused happy path: - Per-task export with automations but no attachments: verifies the 01-attachments/ folder is NOT created, no S3 calls are made, and the summary PDF is rendered with attachmentsCount=0 (line omitted). - Per-task export with neither automations nor attachments: verifies the ZIP contains only the summary PDF. - Org-wide export where no task has attachments (classic pre-PR scenario): verifies manifest shows totalAttachments=0 and no 01-attachments/ folders appear anywhere. Matches exact shape of old behaviour. - Org-wide export mixing an automation-only task with an attachment-only task: verifies both appear in the same ZIP with correct contents. All 36 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../evidence-export.service.spec.ts | 230 ++++++++++++++++++ 1 file changed, 230 insertions(+) diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts index 380f392bc0..e6b65e39fe 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts @@ -358,6 +358,74 @@ describe('EvidenceExportService — streaming ZIPs', () => { }), ); }); + + it('produces a valid ZIP with automations only (no attachments) — no regression', async () => { + // Simulates the common case: task has automation runs but no files + // uploaded. Must NOT create an empty 01-attachments/ folder and must + // still emit the summary + automation PDFs like before this PR. + const appRun = { + id: 'icr_1', + checkId: 'mfa-check', + checkName: 'MFA Enabled', + status: 'success', + startedAt: new Date('2024-01-15T10:00:00Z'), + completedAt: new Date('2024-01-15T10:00:05Z'), + durationMs: 5000, + totalChecked: 1, + passedCount: 1, + failedCount: 0, + errorMessage: null, + logs: null, + createdAt: new Date('2024-01-15T10:00:00Z'), + connection: { provider: { slug: 'g', name: 'G' } }, + results: [], + }; + primeTaskQueries({ attachments: [], appRuns: [appRun], customRuns: [] }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const paths = mock.appendCalls.map((c) => c.options.name); + + // Summary PDF present + expect(paths).toContain( + 'acme-corp_soc-2-access-review_evidence/00-summary.pdf', + ); + // No 01-attachments/ folder at all + expect(paths.some((p) => p.includes('/01-attachments/'))).toBe(false); + // At least one automation PDF was written in a subfolder (per-task uses subfolders) + expect(paths.some((p) => /\/app-.+\/evidence\.pdf$/.test(p))).toBe(true); + + // S3 GetObject should NOT have been called — no attachments to fetch. + expect(s3Client!.send).not.toHaveBeenCalled(); + + // Summary PDF renders with attachmentsCount=0 (line omitted in PDF). + expect(generateTaskSummaryPDF).toHaveBeenCalledWith( + expect.any(Object), + { attachmentsCount: 0 }, + ); + }); + + it('produces a summary-only ZIP when task has neither automations nor attachments', async () => { + primeTaskQueries({ attachments: [], appRuns: [], customRuns: [] }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const paths = mock.appendCalls.map((c) => c.options.name); + expect(paths).toEqual([ + 'acme-corp_soc-2-access-review_evidence/00-summary.pdf', + ]); + expect(s3Client!.send).not.toHaveBeenCalled(); + }); }); describe('streamOrganizationEvidenceZip', () => { @@ -426,5 +494,167 @@ describe('EvidenceExportService — streaming ZIPs', () => { true, ); }); + + it('works for an org with automations but zero attachments anywhere — no regression', async () => { + // Core pre-existing behaviour: org has tasks with automation runs, no + // attachments uploaded. Export must succeed with the same structure as + // before this PR (manifest + per-task folders with automation PDFs, + // no 01-attachments/ folders, totalAttachments: 0). + mockDb.organization.findUnique.mockResolvedValue({ name: 'Acme Corp' }); + + // findTasksWithEvidence: one task has runs, attachments query returns empty + mockDb.task.findMany.mockResolvedValue([{ id: 'tsk_auto' }]); + mockDb.attachment.findMany.mockResolvedValueOnce([]); // no tasks with attachments + + // Per-task fetches inside populate loop + mockDb.task.findFirst.mockResolvedValue({ + id: 'tsk_auto', + title: 'Automated Task', + organization: { name: 'Acme Corp' }, + }); + mockDb.integrationCheckRun.findMany.mockResolvedValue([ + { + id: 'icr_1', + checkId: 'c1', + checkName: 'Check 1', + status: 'success', + startedAt: new Date('2024-01-01'), + completedAt: new Date('2024-01-01'), + durationMs: 100, + totalChecked: 1, + passedCount: 1, + failedCount: 0, + errorMessage: null, + logs: null, + createdAt: new Date('2024-01-01'), + connection: { provider: { slug: 'g', name: 'G' } }, + results: [], + }, + ]); + mockDb.evidenceAutomationRun.findMany.mockResolvedValue([]); + // Per-task attachment fetch inside loop + mockDb.attachment.findMany.mockResolvedValue([]); + + const { archive } = + await service.streamOrganizationEvidenceZip('org_1'); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const paths = mock.appendCalls.map((c) => c.options.name); + + // Manifest present, task folder with summary + automation PDF present, + // no 01-attachments/ folder anywhere. + expect(paths.some((p) => p.endsWith('/manifest.json'))).toBe(true); + expect(paths.some((p) => p.endsWith('/00-summary.pdf'))).toBe(true); + expect(paths.some((p) => /\/app-.+\.pdf$/.test(p))).toBe(true); + expect(paths.some((p) => p.includes('/01-attachments/'))).toBe(false); + + // No S3 fetches triggered (no attachments to stream) + expect(s3Client!.send).not.toHaveBeenCalled(); + + // Manifest should record zero attachments + const manifestCall = mock.appendCalls.find((c) => + c.options.name.endsWith('/manifest.json'), + ); + expect(manifestCall).toBeDefined(); + const manifestJson = JSON.parse(String(manifestCall!.source)); + expect(manifestJson.totalAttachments).toBe(0); + expect(manifestJson.tasksCount).toBe(1); + expect(manifestJson.tasks[0]).toMatchObject({ + title: 'Automated Task', + automations: 1, + attachments: 0, + }); + }); + + it('mixes attachment-only and automation-only tasks in the same export', async () => { + mockDb.organization.findUnique.mockResolvedValue({ name: 'Acme Corp' }); + + // findTasksWithEvidence: one task has runs, another has only attachments + mockDb.task.findMany.mockResolvedValue([{ id: 'tsk_auto' }]); + mockDb.attachment.findMany.mockResolvedValueOnce([ + { entityId: 'tsk_att' }, + ]); + + // Per-task dispatch — depends on which task findFirst is called for. + mockDb.task.findFirst.mockImplementation((args: { where: { id: string } }) => { + if (args.where.id === 'tsk_auto') { + return Promise.resolve({ + id: 'tsk_auto', + title: 'Automated', + organization: { name: 'Acme Corp' }, + }); + } + return Promise.resolve({ + id: 'tsk_att', + title: 'Attached', + organization: { name: 'Acme Corp' }, + }); + }); + mockDb.integrationCheckRun.findMany.mockImplementation( + (args: { where: { taskId: string } }) => + args.where.taskId === 'tsk_auto' + ? Promise.resolve([ + { + id: 'icr_1', + checkId: 'c', + checkName: 'Check', + status: 'success', + startedAt: new Date(), + completedAt: new Date(), + durationMs: 100, + totalChecked: 1, + passedCount: 1, + failedCount: 0, + errorMessage: null, + logs: null, + createdAt: new Date(), + connection: { provider: { slug: 'g', name: 'G' } }, + results: [], + }, + ]) + : Promise.resolve([]), + ); + mockDb.evidenceAutomationRun.findMany.mockResolvedValue([]); + + // Per-task attachment fetches — only tsk_att has any + mockDb.attachment.findMany.mockImplementation( + (args: { where: { entityId?: string } }) => + args.where.entityId === 'tsk_att' + ? Promise.resolve([ + { + id: 'att_1', + name: 'proof.pdf', + url: 'key', + type: 'document', + createdAt: new Date(), + }, + ]) + : Promise.resolve([]), + ); + + (s3Client!.send as jest.Mock).mockResolvedValue({ + Body: Buffer.from('PDF'), + }); + + const { archive } = + await service.streamOrganizationEvidenceZip('org_1'); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const paths = mock.appendCalls.map((c) => c.options.name); + expect(paths.some((p) => p.includes('/automated-'))).toBe(true); + expect(paths.some((p) => p.includes('/attached-'))).toBe(true); + expect( + paths.some((p) => p.includes('/01-attachments/proof.pdf')), + ).toBe(true); + + const manifestCall = mock.appendCalls.find((c) => + c.options.name.endsWith('/manifest.json'), + ); + const manifestJson = JSON.parse(String(manifestCall!.source)); + expect(manifestJson.totalAttachments).toBe(1); + expect(manifestJson.tasksCount).toBe(2); + }); }); }); From 752ed244dd4909dbf75188df36998e1bf1c46e2d Mon Sep 17 00:00:00 2001 From: Tofik Hasanov Date: Wed, 22 Apr 2026 18:10:25 -0400 Subject: [PATCH 5/5] fix(evidence-export): address two follow-up review findings Two issues cubic flagged on the production deploy PR (#2640): 1. Client-disconnect detection was listening on `req.once('close')`. In Node 16+ `req.close` fires on both disconnect AND normal request completion, which could falsely abort a successful export. Now we listen only on `res.close` and distinguish normal completion via `res.writableFinished` (true only after the full response is flushed, unlike `writableEnded` which only reflects that `.end()` was called). 2. Filename collisions were deduplicated against the raw attachment name, then wrapped into `_MISSING_.txt` for placeholders. A legitimate upload named `_MISSING_foo.txt` plus a missing upload named `foo` could therefore both land at the same final ZIP path. Now the tracker sees the full final name (including any `_MISSING_` wrapping) so the collision is resolved on what actually ends up in the ZIP. Added two regression tests: normal-completion does not abort, and success/placeholder names don't collide in the final ZIP path. 38 tests pass. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../evidence-attachment-streamer.ts | 12 +++- .../evidence-export.controller.spec.ts | 38 ++++++++++- .../evidence-export.controller.ts | 19 +++--- .../evidence-export.service.spec.ts | 63 +++++++++++++++++++ 4 files changed, 122 insertions(+), 10 deletions(-) diff --git a/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts index 36459970ae..75407f5c69 100644 --- a/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts +++ b/apps/api/src/tasks/evidence-export/evidence-attachment-streamer.ts @@ -77,6 +77,11 @@ export function createFilenameTracker(): (rawName: string) => string { * - All other failures (network, permissions, throttling, empty body) → rethrow * so the archive aborts and the user sees a real failure instead of silently * receiving an incomplete export. + * + * Filename collisions are resolved on the *final* ZIP entry name (including + * any `_MISSING_…txt` wrapping), not the raw attachment name — otherwise a + * success-path file named `_MISSING_foo.txt` could collide with a failure-path + * placeholder for a file named `foo` once the wrapping is applied. */ export async function appendAttachmentToArchive(params: { archive: Archiver; @@ -128,8 +133,13 @@ export async function appendAttachmentToArchive(params: { logger.warn( `Missing S3 object for attachment ${attachment.id} (key=${attachment.url}): ${message}`, ); + // Feed the FULL final name (including `_MISSING_` prefix and `.txt` suffix) + // into the same collision tracker that success paths use, so a legitimate + // file uploaded as `_MISSING_foo.txt` can't silently collide with a + // placeholder for a different missing attachment named `foo`. + const placeholderName = uniqueName(`_MISSING_${attachment.name}.txt`); archive.append(buildMissingPlaceholder(attachment, message), { - name: `${folderPath}/_MISSING_${uniqueName(attachment.name)}.txt`, + name: `${folderPath}/${placeholderName}`, }); } } diff --git a/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts index 082d9a1b09..e9f29c13e1 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.controller.spec.ts @@ -59,6 +59,7 @@ function makeFakeResponse() { end: jest.fn(), headersSent: false, writableEnded: false, + writableFinished: false, }); return res; } @@ -180,11 +181,44 @@ describe('EvidenceExportController', () => { expect(archive.abort).not.toHaveBeenCalled(); - // Simulate client closing the connection before the stream finished. - req.emit('close'); + // Simulate a client-side disconnect: res emits 'close' before the response + // has fully flushed (writableFinished is still false). + res.writableFinished = false; + res.emit('close'); expect(archive.abort).toHaveBeenCalledTimes(1); }); + + it('does NOT abort the archive when the response completed normally', async () => { + // Regression guard for cubic P1: with req.once('close') this would falsely + // abort on Node 16+. Using res.close + writableFinished avoids that. + const archive = makeFakeArchive(); + service.streamTaskEvidenceZip.mockResolvedValue({ + archive: archive as unknown as import('archiver').Archiver, + filename: 'f.zip', + }); + const req = makeFakeRequest(); + const res = makeFakeResponse(); + + await controller.exportTaskEvidenceZip( + 'org_1', + 'tsk_1', + 'false', + req as unknown as import('express').Request, + res as unknown as import('express').Response, + ); + + // Successful flow: archiver finishes, writableFinished becomes true before + // 'close' fires on the response. + res.writableFinished = true; + res.emit('close'); + + // And even if req 'close' fires too (Node 16+ does this on normal + // completion), we ignore it — no listener attached there. + req.emit('close'); + + expect(archive.abort).not.toHaveBeenCalled(); + }); }); describe('AuditorEvidenceExportController', () => { diff --git a/apps/api/src/tasks/evidence-export/evidence-export.controller.ts b/apps/api/src/tasks/evidence-export/evidence-export.controller.ts index 8560daa8ee..3a5cd924d6 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.controller.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.controller.ts @@ -277,6 +277,12 @@ export class AuditorEvidenceExportController { * 1. Archive errors → log and end the response (500 if headers not yet sent). * 2. Client disconnect → abort the archive so S3 fetches stop and the * background populate task doesn't keep running for a closed socket. + * + * We listen on `res.close` only and distinguish normal-completion from + * disconnect via `res.writableFinished` (true only after the full response + * has been flushed). `req.close` is not used because it fires on normal + * request completion in modern Node, which can race with our response flush + * and cause false aborts of successful exports. */ function pipeArchiveToResponse(params: { archive: Archiver; @@ -285,19 +291,18 @@ function pipeArchiveToResponse(params: { logger: Logger; tag: string; }): void { - const { archive, req, res, logger, tag } = params; + const { archive, res, logger, tag } = params; let aborted = false; - const abortIfIncomplete = () => { + res.once('close', () => { if (aborted) return; - if (res.writableEnded) return; + // writableFinished becomes true only after the response is fully flushed + // and the 'finish' event fires; on a client disconnect this stays false. + if (res.writableFinished) return; aborted = true; logger.warn(`Client disconnected during export (${tag}); aborting archive`); archive.abort(); - }; - - req.once('close', abortIfIncomplete); - res.once('close', abortIfIncomplete); + }); archive.on('error', (err) => { logger.error(`Archive stream error (${tag}): ${err.message}`); diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts index e6b65e39fe..22bcf409d2 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts @@ -303,6 +303,69 @@ describe('EvidenceExportService — streaming ZIPs', () => { expect(placeholder).toBeUndefined(); }); + it('prevents placeholder vs success filename collision in final ZIP path', async () => { + // Regression guard for cubic P2: if a legitimate file is uploaded with + // the name `_MISSING_foo.txt` and succeeds from S3, AND another upload + // named `foo` fails (NoSuchKey), the wrapping of the failure into + // `_MISSING_foo.txt` must NOT collide with the successful file. + const attachments = [ + { + id: 'att_real', + name: '_MISSING_foo.txt', + url: 'key-real', + type: 'document', + createdAt: new Date('2024-01-01'), + }, + { + id: 'att_miss', + name: 'foo', + url: 'key-miss', + type: 'document', + createdAt: new Date('2024-01-02'), + }, + ]; + + (s3Client!.send as jest.Mock).mockImplementation( + (cmd: { input: { Key: string } }) => { + if (cmd.input.Key === 'key-real') { + return Promise.resolve({ Body: Buffer.from('REAL') }); + } + return Promise.reject( + Object.assign(new Error('NoSuchKey'), { + name: 'NoSuchKey', + $metadata: { httpStatusCode: 404 }, + }), + ); + }, + ); + primeTaskQueries({ attachments }); + + const { archive } = await service.streamTaskEvidenceZip( + 'org_1', + 'tsk_123', + ); + const mock = archive as unknown as MockArchive; + await mock.finalized; + + const attachmentPaths = mock.appendCalls + .map((c) => c.options.name) + .filter((p) => p.includes('/01-attachments/')); + + // Two entries, each with a distinct final path inside the ZIP. + expect(attachmentPaths).toHaveLength(2); + const uniquePaths = new Set(attachmentPaths); + expect(uniquePaths.size).toBe(2); + + // The real file keeps its name; the placeholder gets a numeric suffix + // because the tracker now dedupes on the final ZIP entry name. + expect(attachmentPaths).toContain( + 'acme-corp_soc-2-access-review_evidence/01-attachments/_MISSING_foo.txt', + ); + expect(attachmentPaths).toContain( + 'acme-corp_soc-2-access-review_evidence/01-attachments/_MISSING_foo (1).txt', + ); + }); + it('disambiguates duplicate filenames within attachments folder', async () => { const attachments = [ {