diff --git a/src/cli/output/TableRenderer.test.ts b/src/cli/output/TableRenderer.test.ts index 6da3d42..a35b4f1 100644 --- a/src/cli/output/TableRenderer.test.ts +++ b/src/cli/output/TableRenderer.test.ts @@ -1,7 +1,9 @@ import { describe, expect, it } from "bun:test"; -import { renderTable } from "./TableRenderer"; +import { renderTable, __testing } from "./TableRenderer"; import type { ColumnDef, RenderOptions } from "./TableRenderer"; +const { escapeCsvValue } = __testing; + const columns: ReadonlyArray = [ { key: "id", header: "ID", width: 6 }, { key: "name", header: "Name", width: 10 }, @@ -154,3 +156,79 @@ describe("renderTable", () => { }); }); }); + +describe("escapeCsvValue", () => { + // Per OWASP CSV Injection (https://owasp.org/www-community/attacks/CSV_Injection), + // cells beginning with =/+/-/@ (or TAB/CR after whitespace stripping) must be + // neutralized with a leading single-quote before being handed to a spreadsheet. + + it("prefixes a single-quote when value starts with '='", () => { + expect(escapeCsvValue("=cmd")).toBe("'=cmd"); + }); + + it("prefixes a single-quote when value starts with '+'", () => { + expect(escapeCsvValue("+cmd")).toBe("'+cmd"); + }); + + it("prefixes a single-quote when value starts with '-'", () => { + expect(escapeCsvValue("-cmd")).toBe("'-cmd"); + }); + + it("prefixes a single-quote when value starts with '@'", () => { + expect(escapeCsvValue("@cmd")).toBe("'@cmd"); + }); + + it("prefixes a single-quote when value starts with TAB", () => { + expect(escapeCsvValue("\tcmd")).toBe("'\tcmd"); + }); + + it("prefixes a single-quote when value starts with CR", () => { + // CR alone forces RFC 4180 quoting too because the cell contains a CR. + expect(escapeCsvValue("\rcmd")).toBe('"\'\rcmd"'); + }); + + it("defuses leading ASCII space before '=' (spreadsheets strip leading WS)", () => { + expect(escapeCsvValue(" =cmd")).toBe("' =cmd"); + }); + + it("defuses leading U+00A0 NBSP before '=' (spreadsheets strip NBSP too)", () => { + expect(escapeCsvValue(" =cmd")).toBe("' =cmd"); + }); + + it("leaves plain alphabetic values unchanged", () => { + expect(escapeCsvValue("abc")).toBe("abc"); + }); + + it("leaves plain numeric values unchanged", () => { + expect(escapeCsvValue("123")).toBe("123"); + }); + + it("defuses a dangerous line after LF inside a multi-line cell", () => { + // Excel/Sheets re-parse every physical line of a quoted multi-line cell, + // so the second line must also be defused. + expect(escapeCsvValue("safe\n=cmd")).toBe('"safe\n\'=cmd"'); + }); + + it("defuses a dangerous line after CRLF inside a multi-line cell", () => { + expect(escapeCsvValue("safe\r\n=cmd")).toBe('"safe\r\n\'=cmd"'); + }); + + it("defuses every dangerous follow-up line, leaving safe interleaved lines alone", () => { + const input = "safe\n=danger1\nstillsafe\n+danger2\n@danger3"; + const expected = + '"safe\n\'=danger1\nstillsafe\n\'+danger2\n\'@danger3"'; + expect(escapeCsvValue(input)).toBe(expected); + }); + + it("wraps cells that contain a comma in double quotes", () => { + expect(escapeCsvValue("a,b")).toBe('"a,b"'); + }); + + it("doubles embedded double-quotes inside a wrapped cell", () => { + expect(escapeCsvValue('he said "hi"')).toBe('"he said ""hi"""'); + }); + + it("returns an empty string unchanged", () => { + expect(escapeCsvValue("")).toBe(""); + }); +}); diff --git a/src/cli/output/TableRenderer.ts b/src/cli/output/TableRenderer.ts index e92426d..d5f3218 100644 --- a/src/cli/output/TableRenderer.ts +++ b/src/cli/output/TableRenderer.ts @@ -34,21 +34,60 @@ function pad(value: string, width: number): string { return truncate(value, width).padEnd(width); } +// Characters that trigger formula interpretation when found at the start of a +// spreadsheet cell. See OWASP CSV Injection +// (https://owasp.org/www-community/attacks/CSV_Injection). +const DANGEROUS_LEAD = /^[=+\-@\t\r]/; +// ASCII whitespace plus U+00A0 NBSP — Excel/Sheets/Numbers strip leading +// whitespace (including NBSP) before parsing a cell as a formula, so we must +// strip it too before checking for the dangerous leading char. +const LEADING_WS = /^[\s ]+/; + +function needsFormulaPrefix(line: string): boolean { + return DANGEROUS_LEAD.test(line.replace(LEADING_WS, "")); +} + +function defuseLine(line: string): string { + return needsFormulaPrefix(line) ? "'" + line : line; +} + +/** + * Defuse CSV/spreadsheet formula injection per OWASP CSV Injection guidance. + * + * When Excel/Sheets/Numbers open a CSV, a cell starting with =/+/-/@ (or TAB/CR + * which some loaders strip) is interpreted as a formula and can exfiltrate data + * via WEBSERVICE/HYPERLINK or run external commands. Prefixing a single quote + * neutralizes the formula — spreadsheets render the apostrophe as a literal and + * hide the prefix; plain CSV consumers see one extra leading apostrophe. + * + * The naive `^[=+\-@\t\r]` check has three bypasses we handle here: + * 1. Leading ASCII whitespace (" =cmd...") — spreadsheets strip it. + * 2. Leading U+00A0 NBSP (" =cmd...") — also stripped. + * 3. Dangerous char after an embedded newline in a quoted multi-line cell + * ("safe\n=cmd") — each physical line is re-parsed. + * + * Each line of the value is defused independently; the result is then RFC 4180 + * quoted if it contains a comma, quote, CR, or LF. + */ function escapeCsvValue(value: string): string { - // Defuse CSV/spreadsheet formula injection. When Excel/Sheets/Numbers - // open a CSV, a cell starting with =/+/-/@ (or with leading TAB/CR - // that some loaders strip) is interpreted as a formula, which can - // exfiltrate data via WEBSERVICE/HYPERLINK or run external commands. - // Prefix a single quote — spreadsheets render it as a literal and hide - // the prefix; plain CSV consumers see the original text with one - // leading apostrophe, which is a small price for not shipping a known - // attack vector. - const dangerous = value.length > 0 && /^[=+\-@\t\r]/.test(value); - let escaped = dangerous ? "'" + value : value; - if (escaped.includes(",") || escaped.includes('"') || escaped.includes("\n")) { - return '"' + escaped.replace(/"/g, '""') + '"'; + if (value.length === 0) { + return value; } - return escaped; + // Capturing-group split preserves the separators at odd indices so we can + // round-trip the exact line endings (LF vs CRLF) the caller used. + const parts = value.split(/(\r?\n)/); + const defused = parts + .map((part, i) => (i % 2 === 0 ? defuseLine(part) : part)) + .join(""); + if ( + defused.includes(",") || + defused.includes('"') || + defused.includes("\n") || + defused.includes("\r") + ) { + return '"' + defused.replace(/"/g, '""') + '"'; + } + return defused; } function cellValue(row: Record, key: string): string { @@ -128,3 +167,6 @@ export function renderTable( return renderTextTable(rows, columns, options); } } + +// Exported for unit tests; not part of the module's public API. +export const __testing = { escapeCsvValue };