webadderallorg · webadderall · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026 · Apr 17, 2026
diff --git a/electron/ipc/captions/generate.ts b/electron/ipc/captions/generate.ts
@@ -0,0 +1,248 @@
+import { constants as fsConstants } from "node:fs";
+import fs from "node:fs/promises";
+import path from "node:path";
+import { execFile, spawnSync } from "node:child_process";
+import { promisify } from "node:util";
+import { app } from "electron";
+import { getFfmpegBinaryPath } from "../ffmpeg/binary";
+import { getBundledWhisperExecutableCandidates } from "../paths/binaries";
+import { parseWhisperJsonCues, parseSrtCues, shouldRetryWhisperWithoutJson } from "./parser";
+import { normalizeVideoSourcePath } from "../utils";
+import { resolveRecordingSession } from "../project/session";
+
+const execFileAsync = promisify(execFile);
+
+export async function ensureReadableFile(filePath: string, options?: { executable?: boolean }) {
+	await fs.access(filePath, fsConstants.R_OK);
+	if (options?.executable) {
+		try {
+			await fs.access(filePath, fsConstants.X_OK);
+		} catch {
+			throw new Error("The selected Whisper executable is not marked as executable.");
+		}
+	}
+}
+
+export async function isExecutableFile(filePath: string) {
+	try {
+		await fs.access(filePath, fsConstants.R_OK | fsConstants.X_OK);
+		return true;
+	} catch {
+		return false;
+	}
+}
+
+export async function resolveWhisperExecutablePath(preferredPath?: string | null) {
+	const candidatePaths = [
+		preferredPath?.trim() || null,
+		...getBundledWhisperExecutableCandidates(),
+		process.env["WHISPER_CPP_PATH"]?.trim() || null,
+		process.platform === "darwin" ? "/opt/homebrew/bin/whisper-cli" : null,
+		process.platform === "darwin" ? "/usr/local/bin/whisper-cli" : null,
+		process.platform === "darwin" ? "/opt/homebrew/bin/whisper-cpp" : null,
+		process.platform === "darwin" ? "/usr/local/bin/whisper-cpp" : null,
+	].filter((value): value is string => Boolean(value));
+
+	for (const candidate of candidatePaths) {
+		const normalized = path.resolve(candidate);
+		if (await isExecutableFile(normalized)) {
+			return normalized;
+		}
+	}
+
+	const pathCommand = process.platform === "win32" ? "where" : "which";
+	const binaryNames =
+		process.platform === "win32"
+			? ["whisper-cli.exe", "whisper.exe", "main.exe"]
+			: ["whisper-cli", "whisper-cpp", "whisper", "main"];
+
+	for (const binaryName of binaryNames) {
+		const result = spawnSync(pathCommand, [binaryName], { encoding: "utf-8" });
+		if (result.status === 0) {
+			const resolvedPath = result.stdout
+				.split(/\r?\n/)
+				.map((line) => line.trim())
+				.find(Boolean);
+
+			if (resolvedPath && (await isExecutableFile(resolvedPath))) {
+				return resolvedPath;
+			}
+		}
+	}
+
+	throw new Error(
+		"No Whisper runtime was found. Recordly looked for a bundled binary first, then checked common system install locations.",
+	);
+}
+
+export async function resolveCaptionAudioCandidates(videoPath: string) {
+	const candidates: Array<{ path: string; label: string }> = [];
+	const seenPaths = new Set<string>();
+
+	const pushCandidate = (candidatePath: string | null | undefined, label: string) => {
+		const normalizedCandidatePath = normalizeVideoSourcePath(candidatePath);
+		if (!normalizedCandidatePath || seenPaths.has(normalizedCandidatePath)) {
+			return;
+		}
+
+		seenPaths.add(normalizedCandidatePath);
+		candidates.push({ path: normalizedCandidatePath, label });
+	};
+
+	pushCandidate(videoPath, "recording");
+
+	const requestedRecordingSession = await resolveRecordingSession(videoPath);
+	pushCandidate(requestedRecordingSession?.webcamPath, "linked webcam recording");
+
+	return candidates;
+}
+
+export async function extractCaptionAudioSource(options: {
+	videoPath: string;
+	ffmpegPath: string;
+	wavPath: string;
+}) {
+	const candidates = await resolveCaptionAudioCandidates(options.videoPath);
+	const attemptedCandidates: Array<{
+		path: string;
+		label: string;
+		readable: boolean;
+		extractedAudio: boolean;
+		error?: string;
+	}> = [];
+
+	for (const candidate of candidates) {
+		try {
+			await ensureReadableFile(candidate.path);
+			await execFileAsync(
+				options.ffmpegPath,
+				[
+					"-y",
+					"-i",
+					candidate.path,
+					"-map",
+					"0:a:0",
+					"-vn",
+					"-ac",
+					"1",
+					"-ar",
+					"16000",
+					"-c:a",
+					"pcm_s16le",
+					options.wavPath,
+				],
+				{ timeout: 5 * 60 * 1000, maxBuffer: 20 * 1024 * 1024 },
+			);
+			attemptedCandidates.push({ ...candidate, readable: true, extractedAudio: true });
+			return candidate;
+		} catch (error) {
+			attemptedCandidates.push({
+				...candidate,
+				readable: true,
+				extractedAudio: false,
+				error: error instanceof Error ? error.message : String(error),
+			});
+		}
+	}
+
+	console.warn(
+		"[auto-captions] No audio source candidate could be extracted:",
+		attemptedCandidates,
+	);
+
+	throw new Error(
+		"No audio was found to transcribe in the saved recording file. Captions need an audio track. If this recording should have contained sound, the recording was saved without an audio stream.",
+	);
+}
+
+export async function generateAutoCaptionsFromVideo(options: {
+	videoPath: string;
+	whisperExecutablePath?: string;
+	whisperModelPath: string;
+	language?: string;
+}) {
+	const ffmpegPath = getFfmpegBinaryPath();
+	const normalizedVideoPath = normalizeVideoSourcePath(options.videoPath);
+	if (!normalizedVideoPath) {
+		throw new Error("Missing source video path.");
+	}
+
+	const whisperExecutablePath = await resolveWhisperExecutablePath(options.whisperExecutablePath);
+	const whisperModelPath = path.resolve(options.whisperModelPath);
+	await ensureReadableFile(whisperExecutablePath, { executable: true });
+	await ensureReadableFile(whisperModelPath);
+
+	const tempBase = path.join(
+		app.getPath("temp"),
+		`recordly-captions-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`,
+	);
+	const wavPath = `${tempBase}.wav`;
+	const outputBase = `${tempBase}-whisper`;
+	const srtPath = `${outputBase}.srt`;
+	const jsonPath = `${outputBase}.json`;
+
+	try {
+		const audioSource = await extractCaptionAudioSource({
+			videoPath: normalizedVideoPath,
+			ffmpegPath,
+			wavPath,
+		});
+
+		const language =
+			options.language && options.language.trim() ? options.language.trim() : "auto";
+		const whisperBaseArgs = [
+			"-m",
+			whisperModelPath,
+			"-f",
+			wavPath,
+			"-osrt",
+			"-of",
+			outputBase,
+			"-l",
+			language,
+			"-np",
+		];
+
+		let jsonEnabled = true;
+		try {
+			await execFileAsync(whisperExecutablePath, [...whisperBaseArgs, "-ojf"], {
+				timeout: 30 * 60 * 1000,
+				maxBuffer: 20 * 1024 * 1024,
+			});
+		} catch (error) {
+			if (!shouldRetryWhisperWithoutJson(error)) {
+				throw error;
+			}
+
+			jsonEnabled = false;
+			console.warn(
+				"[auto-captions] Whisper runtime does not support JSON full output, retrying with SRT only:",
+				error,
+			);
+			await execFileAsync(whisperExecutablePath, whisperBaseArgs, {
+				timeout: 30 * 60 * 1000,
+				maxBuffer: 20 * 1024 * 1024,
+			});
+		}
+
+		const timedCues = jsonEnabled
+			? parseWhisperJsonCues(await fs.readFile(jsonPath, "utf-8"))
+			: [];
+		const cues =
+			timedCues.length > 0 ? timedCues : parseSrtCues(await fs.readFile(srtPath, "utf-8"));
+		if (cues.length === 0) {
+			throw new Error("Whisper completed, but no caption cues were produced.");
+		}
+
+		return {
+			cues,
+			audioSourceLabel: audioSource.label,
+		};
+	} finally {
+		await Promise.allSettled([
+			fs.rm(wavPath, { force: true }),
+			fs.rm(srtPath, { force: true }),
+			fs.rm(jsonPath, { force: true }),
+		]);
+	}
+}