From 28e561160315ba87ac19c1768c33ece80a1be366 Mon Sep 17 00:00:00 2001 From: an-lee Date: Mon, 19 Feb 2024 13:02:59 +0800 Subject: [PATCH 1/2] use ffmpeg static if valid --- enjoy/src/main.ts | 5 +- enjoy/src/main/ffmpeg.ts | 102 +++++++++++++++--- enjoy/src/main/whisper.ts | 4 +- enjoy/src/preload.ts | 3 + .../context/app-settings-provider.tsx | 21 +++- enjoy/src/renderer/hooks/use-transcribe.tsx | 28 +++-- enjoy/src/types/enjoy-app.d.ts | 5 + enjoy/src/types/index.d.ts | 10 -- 8 files changed, 137 insertions(+), 41 deletions(-) diff --git a/enjoy/src/main.ts b/enjoy/src/main.ts index 53ae17fdc..505070bb6 100644 --- a/enjoy/src/main.ts +++ b/enjoy/src/main.ts @@ -43,9 +43,12 @@ protocol.registerSchemesAsPrivileged([ app.on("ready", async () => { protocol.handle("enjoy", (request) => { let url = request.url.replace("enjoy://", ""); - if (url.startsWith("library")) { + if (url.match(/library\/(audios|videos|recordings)/g)) { url = url.replace("library/", ""); url = path.join(settings.userDataPath(), url); + } else if (url.startsWith("library")) { + url = url.replace("library/", ""); + url = path.join(settings.libraryPath(), url); } return net.fetch(`file:///${url}`); diff --git a/enjoy/src/main/ffmpeg.ts b/enjoy/src/main/ffmpeg.ts index 456cdd3a3..e78ef34ec 100644 --- a/enjoy/src/main/ffmpeg.ts +++ b/enjoy/src/main/ffmpeg.ts @@ -5,24 +5,18 @@ import Ffmpeg from "fluent-ffmpeg"; import log from "electron-log/main"; import path from "path"; import fs from "fs-extra"; +import settings from "./settings"; + +Ffmpeg.setFfmpegPath(ffmpegPath); +Ffmpeg.setFfprobePath(ffprobePath); const logger = log.scope("ffmpeg"); export default class FfmpegWrapper { - public ffmpeg: Ffmpeg.FfmpegCommand; - - constructor() { - const ff = Ffmpeg(); - logger.debug("Using ffmpeg path:", ffmpegPath); - logger.debug("Using ffprobe path:", ffprobePath); - ff.setFfmpegPath(ffmpegPath); - ff.setFfprobePath(ffprobePath); - this.ffmpeg = ff; - } - checkCommand(): Promise { + const ffmpeg = Ffmpeg(); const sampleFile = path.join(__dirname, "samples", "jfk.wav"); return new Promise((resolve, _reject) => { - this.ffmpeg.input(sampleFile).getAvailableFormats((err, _formats) => { + ffmpeg.input(sampleFile).getAvailableFormats((err, _formats) => { if (err) { logger.error("Command not valid:", err); resolve(false); @@ -35,8 +29,9 @@ export default class FfmpegWrapper { } generateMetadata(input: string): Promise { + const ffmpeg = Ffmpeg(); return new Promise((resolve, reject) => { - this.ffmpeg + ffmpeg .input(input) .on("start", (commandLine) => { logger.info("Spawned FFmpeg with command: " + commandLine); @@ -57,8 +52,9 @@ export default class FfmpegWrapper { } generateCover(input: string, output: string): Promise { + const ffmpeg = Ffmpeg(); return new Promise((resolve, reject) => { - this.ffmpeg + ffmpeg .input(input) .thumbnail({ count: 1, @@ -91,8 +87,9 @@ export default class FfmpegWrapper { fs.removeSync(output); } + const ffmpeg = Ffmpeg(); return new Promise((resolve, reject) => { - this.ffmpeg + ffmpeg .input(input) .outputOptions("-ar", `${sampleRate}`) .on("error", (err) => { @@ -112,8 +109,9 @@ export default class FfmpegWrapper { output: string, options: string[] = [] ): Promise { + const ffmpeg = Ffmpeg(); return new Promise((resolve, reject) => { - this.ffmpeg + ffmpeg .input(input) .outputOptions( "-ar", @@ -135,7 +133,7 @@ export default class FfmpegWrapper { } if (stderr) { - logger.error(stderr); + logger.info(stderr); } if (fs.existsSync(output)) { @@ -176,9 +174,79 @@ export default class FfmpegWrapper { return this.convertToWav(input, output); } + async transcode( + input: string, + output?: string, + options?: string[] + ): Promise { + if (input.match(/enjoy:\/\/library\/(audios|videos|recordings)/g)) { + input = path.join( + settings.userDataPath(), + input.replace("enjoy://library/", "") + ); + } else if (input.startsWith("enjoy://library/")) { + input = path.join( + settings.libraryPath(), + input.replace("enjoy://library/", "") + ); + } + + if (!output) { + output = path.join(settings.cachePath(), `${path.basename(input)}.wav`); + } + + if (output.startsWith("enjoy://library/")) { + output = path.join( + settings.libraryPath(), + output.replace("enjoy://library/", "") + ); + } + + options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"]; + + const ffmpeg = Ffmpeg(); + return new Promise((resolve, reject) => { + ffmpeg + .input(input) + .outputOptions(...options) + .on("start", (commandLine) => { + logger.debug(`Trying to convert ${input} to ${output}`); + logger.info("Spawned FFmpeg with command: " + commandLine); + fs.ensureDirSync(path.dirname(output)); + }) + .on("end", (stdout, stderr) => { + if (stdout) { + logger.debug(stdout); + } + + if (stderr) { + logger.info(stderr); + } + + if (fs.existsSync(output)) { + resolve(output); + } else { + reject(new Error("FFmpeg command failed")); + } + }) + .on("error", (err: Error) => { + logger.error(err); + reject(err); + }) + .save(output); + }); + } + registerIpcHandlers() { ipcMain.handle("ffmpeg-check-command", async (_event) => { return await this.checkCommand(); }); + + ipcMain.handle( + "ffmpeg-transcode", + async (_event, input, output, options) => { + return await this.transcode(input, output, options); + } + ); } } diff --git a/enjoy/src/main/whisper.ts b/enjoy/src/main/whisper.ts index 5281b9361..698c2f0fa 100644 --- a/enjoy/src/main/whisper.ts +++ b/enjoy/src/main/whisper.ts @@ -104,7 +104,7 @@ class Whipser { } if (stderr) { - logger.error("stderr", stderr); + logger.info("stderr", stderr); } if (stdout) { @@ -199,7 +199,7 @@ class Whipser { command.stderr.on("data", (data) => { const output = data.toString(); - logger.error(`stderr: ${output}`); + logger.info(`stderr: ${output}`); if (output.startsWith("whisper_print_progress_callback")) { const progress = parseInt(output.match(/\d+%/)?.[0] || "0"); if (typeof progress === "number") onProgress(progress); diff --git a/enjoy/src/preload.ts b/enjoy/src/preload.ts index 139e05403..71c8b2dbc 100644 --- a/enjoy/src/preload.ts +++ b/enjoy/src/preload.ts @@ -389,6 +389,9 @@ contextBridge.exposeInMainWorld("__ENJOY_APP__", { check: () => { return ipcRenderer.invoke("ffmpeg-check-command"); }, + transcode: (input: string, output: string, options: string[]) => { + return ipcRenderer.invoke("ffmpeg-transcode", input, output, options); + }, }, download: { onState: ( diff --git a/enjoy/src/renderer/context/app-settings-provider.tsx b/enjoy/src/renderer/context/app-settings-provider.tsx index 9d1a52b58..c4bc2bc39 100644 --- a/enjoy/src/renderer/context/app-settings-provider.tsx +++ b/enjoy/src/renderer/context/app-settings-provider.tsx @@ -16,7 +16,8 @@ type AppSettingsProviderState = { login?: (user: UserType) => void; logout?: () => void; setLibraryPath?: (path: string) => Promise; - ffmpeg?: FFmpeg; + ffmpegWasm?: FFmpeg; + ffmpegValid?: boolean; EnjoyApp?: EnjoyAppType; language?: "en" | "zh-CN"; switchLanguage?: (language: "en" | "zh-CN") => void; @@ -44,7 +45,8 @@ export const AppSettingsProvider = ({ const [webApi, setWebApi] = useState(null); const [user, setUser] = useState(null); const [libraryPath, setLibraryPath] = useState(""); - const [ffmpeg, setFfmpeg] = useState(null); + const [ffmpegWasm, setFfmpegWasm] = useState(null); + const [ffmpegValid, setFfmpegValid] = useState(false); const [language, setLanguage] = useState<"en" | "zh-CN">(); const [proxy, setProxy] = useState(); const EnjoyApp = window.__ENJOY_APP__; @@ -56,7 +58,7 @@ export const AppSettingsProvider = ({ fetchUser(); fetchLibraryPath(); fetchLanguage(); - loadFfmpegWASM(); + prepareFfmpeg(); fetchProxyConfig(); }, []); @@ -76,6 +78,14 @@ export const AppSettingsProvider = ({ ); }, [user, apiUrl, language]); + const prepareFfmpeg = async () => { + const valid = await EnjoyApp.ffmpeg.check(); + setFfmpegValid(valid); + if (!valid) { + loadFfmpegWASM(); + } + }; + const loadFfmpegWASM = async () => { const baseURL = "assets/libs"; ffmpegRef.current.on("log", ({ message }) => { @@ -101,7 +111,7 @@ export const AppSettingsProvider = ({ wasmURL, workerURL, }); - setFfmpeg(ffmpegRef.current); + setFfmpegWasm(ffmpegRef.current); } catch (err) { toast.error(err.message); } @@ -195,7 +205,8 @@ export const AppSettingsProvider = ({ logout, libraryPath, setLibraryPath: setLibraryPathHandler, - ffmpeg, + ffmpegValid, + ffmpegWasm, proxy, setProxy: setProxyConfigHandler, initialized, diff --git a/enjoy/src/renderer/hooks/use-transcribe.tsx b/enjoy/src/renderer/hooks/use-transcribe.tsx index be5396e78..4b35884e2 100644 --- a/enjoy/src/renderer/hooks/use-transcribe.tsx +++ b/enjoy/src/renderer/hooks/use-transcribe.tsx @@ -12,16 +12,32 @@ import * as sdk from "microsoft-cognitiveservices-speech-sdk"; import axios from "axios"; import take from "lodash/take"; import sortedUniqBy from "lodash/sortedUniqBy"; -import { groupTranscription, END_OF_WORD_REGEX, milisecondsToTimestamp } from "@/utils"; +import { + groupTranscription, + END_OF_WORD_REGEX, + milisecondsToTimestamp, +} from "@/utils"; export const useTranscribe = () => { - const { EnjoyApp, ffmpeg, user, webApi } = useContext( + const { EnjoyApp, ffmpegWasm, ffmpegValid, user, webApi } = useContext( AppSettingsProviderContext ); const { whisperConfig, openai } = useContext(AISettingsProviderContext); const transcode = async (src: string, options?: string[]) => { - if (!ffmpeg?.loaded) return; + if (ffmpegValid) { + const output = `enjoy://library/cache/${src.split("/").pop()}.wav`; + const res = await EnjoyApp.ffmpeg.transcode(src, output, options); + console.log(res); + const data = await fetchFile(output); + return new Blob([data], { type: "audio/wav" }); + } else { + return transcodeUsingWasm(src, options); + } + }; + + const transcodeUsingWasm = async (src: string, options?: string[]) => { + if (!ffmpegWasm?.loaded) return; options = options || ["-ar", "16000", "-ac", "1", "-c:a", "pcm_s16le"]; @@ -29,9 +45,9 @@ export const useTranscribe = () => { const uri = new URL(src); const input = uri.pathname.split("/").pop(); const output = input.replace(/\.[^/.]+$/, ".wav"); - await ffmpeg.writeFile(input, await fetchFile(src)); - await ffmpeg.exec(["-i", input, ...options, output]); - const data = await ffmpeg.readFile(output); + await ffmpegWasm.writeFile(input, await fetchFile(src)); + await ffmpegWasm.exec(["-i", input, ...options, output]); + const data = await ffmpegWasm.readFile(output); return new Blob([data], { type: "audio/wav" }); } catch (e) { toast.error(t("transcodeError")); diff --git a/enjoy/src/types/enjoy-app.d.ts b/enjoy/src/types/enjoy-app.d.ts index b0050dbe7..bceb874bd 100644 --- a/enjoy/src/types/enjoy-app.d.ts +++ b/enjoy/src/types/enjoy-app.d.ts @@ -228,6 +228,11 @@ type EnjoyAppType = { }; ffmpeg: { check: () => Promise; + transcode: ( + input: string, + output: string, + options?: string[] + ) => Promise; }; download: { onState: (callback: (event, state) => void) => void; diff --git a/enjoy/src/types/index.d.ts b/enjoy/src/types/index.d.ts index 6b9cdb264..2f007d08a 100644 --- a/enjoy/src/types/index.d.ts +++ b/enjoy/src/types/index.d.ts @@ -90,16 +90,6 @@ type TransactionStateType = { record?: AudioType | UserType | RecordingType; }; -type FfmpegConfigType = { - os: string; - arch: string; - commandExists: boolean; - ffmpegPath?: string; - ffprobePath?: string; - scanDirs: string[]; - ready: boolean; -}; - type LookupType = { id: string; word: string; From 0dc37d80752647060caa086ce0f3ac2c5352225c Mon Sep 17 00:00:00 2001 From: an-lee Date: Mon, 19 Feb 2024 14:05:04 +0800 Subject: [PATCH 2/2] transcribe after decoded --- enjoy/src/renderer/components/audios/audio-detail.tsx | 5 +++-- enjoy/src/renderer/components/videos/video-detail.tsx | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/enjoy/src/renderer/components/audios/audio-detail.tsx b/enjoy/src/renderer/components/audios/audio-detail.tsx index 2d5e144d6..11e3451b2 100644 --- a/enjoy/src/renderer/components/audios/audio-detail.tsx +++ b/enjoy/src/renderer/components/audios/audio-detail.tsx @@ -173,6 +173,7 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => { }, [audio]); useEffect(() => { + if (!initialized) return; if (!transcription) return; addDblistener(onTransactionUpdate); @@ -192,7 +193,7 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => { removeDbListener(onTransactionUpdate); EnjoyApp.whisper.removeProgressListeners(); }; - }, [md5, transcription]); + }, [md5, transcription, initialized]); if (!audio) { return ; @@ -324,7 +325,7 @@ export const AudioDetail = (props: { id?: string; md5?: string }) => { {!transcription ? (
- + {t("loadingTranscription")}
) : transcription.result ? ( diff --git a/enjoy/src/renderer/components/videos/video-detail.tsx b/enjoy/src/renderer/components/videos/video-detail.tsx index 72892fbb8..ee2b9ca24 100644 --- a/enjoy/src/renderer/components/videos/video-detail.tsx +++ b/enjoy/src/renderer/components/videos/video-detail.tsx @@ -179,6 +179,7 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => { }, [video]); useEffect(() => { + if (!initialized) return; if (!transcription) return; addDblistener(onTransactionUpdate); @@ -198,7 +199,7 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => { removeDbListener(onTransactionUpdate); EnjoyApp.whisper.removeProgressListeners(); }; - }, [md5, transcription]); + }, [md5, transcription, initialized]); if (!video) { return ; @@ -337,7 +338,7 @@ export const VideoDetail = (props: { id?: string; md5?: string }) => { {!transcription ? (
- + {t("loadingTranscription")}
) : transcription.result ? (