Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Transcription force alignment & more #416

Merged
merged 52 commits into from
Mar 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
af9e303
add wavesurfer-provider
an-lee Feb 27, 2024
d676c47
brand new layout for player
an-lee Mar 6, 2024
ebe24c8
refactor pitch contour
an-lee Mar 7, 2024
3157c9c
clean up
an-lee Mar 7, 2024
d1cf769
update styl
an-lee Mar 7, 2024
de4f981
refactor
an-lee Mar 7, 2024
64a75d3
update layout
an-lee Mar 8, 2024
f36f581
use new layout for video
an-lee Mar 8, 2024
5fbb640
refactor
an-lee Mar 8, 2024
060f99a
may select word
an-lee Mar 8, 2024
7f1407f
may edit word timestamp
an-lee Mar 8, 2024
4c77283
may toggle multiselect words
an-lee Mar 8, 2024
8d6438e
clean code
an-lee Mar 8, 2024
5fdb476
improve word region update
an-lee Mar 8, 2024
984eb35
improve layout
an-lee Mar 9, 2024
a280d30
update layout
an-lee Mar 9, 2024
9da9d18
add echogarden
an-lee Mar 9, 2024
9a01910
fix test
an-lee Mar 9, 2024
5b51476
use aligned transcription
an-lee Mar 12, 2024
be3af76
fix ipa
an-lee Mar 12, 2024
80772ab
some refactor
an-lee Mar 13, 2024
a989b1b
improve code
an-lee Mar 13, 2024
0ce05f7
implement ipa & translate & lookup
an-lee Mar 13, 2024
dedff4d
recording play & share
an-lee Mar 13, 2024
93dbe82
fix
an-lee Mar 13, 2024
e54eb0f
fix post audio
an-lee Mar 13, 2024
a23e8d1
improve layout
an-lee Mar 13, 2024
f708f1b
may delete recording
an-lee Mar 13, 2024
bd99f68
may record
an-lee Mar 14, 2024
1a50f41
fix video player layout
an-lee Mar 14, 2024
a9499a4
fix player in conversation
an-lee Mar 14, 2024
93ad7e6
render recording along with orignal audio
an-lee Mar 14, 2024
0d2af62
may custom create region in recording
an-lee Mar 14, 2024
824719f
fix float issue when seekTo
an-lee Mar 14, 2024
fc4ae35
fix recording player
an-lee Mar 14, 2024
1316c5b
fix load more recordings
an-lee Mar 14, 2024
8b2327b
fix seekTo
an-lee Mar 14, 2024
e0e28f1
clean up
an-lee Mar 14, 2024
0492f1e
refactor pitch contour
an-lee Mar 14, 2024
04dd162
fix some warnings
an-lee Mar 14, 2024
fb6c8fe
upgrade deps
an-lee Mar 15, 2024
8232278
fix group transcription sentence
an-lee Mar 15, 2024
3b0bccb
zoom to fit when segment update
an-lee Mar 15, 2024
528d3ae
add more hotkeys
an-lee Mar 15, 2024
c976221
update player layout
an-lee Mar 15, 2024
7047b91
improve style
an-lee Mar 16, 2024
a4bfb6f
play recording overlap audio when comparing
an-lee Mar 16, 2024
91cbc52
update echogarden dep
an-lee Mar 16, 2024
e0df6b9
add recorded mark on transcription
an-lee Mar 16, 2024
8ed138b
fix recording pitch contour rendering
an-lee Mar 16, 2024
1206b3c
improve recording
an-lee Mar 16, 2024
c970072
adjust pitch finder params
an-lee Mar 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions 1000-hours/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
"markdown-it-mathjax3": "^4.3.2",
"markdown-it-sub": "^2.0.0",
"markdown-it-sup": "^2.0.0",
"mermaid": "^10.8.0",
"sass": "^1.71.1",
"vitepress": "^1.0.0-rc.42",
"mermaid": "^10.9.0",
"sass": "^1.72.0",
"vitepress": "^1.0.0-rc.45",
"vitepress-plugin-mermaid": "^2.0.16",
"vue": "^3.4.21"
},
Expand Down
10 changes: 10 additions & 0 deletions enjoy/e2e/main.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ test("valid ffmpeg command", async () => {
expect(res).toBeTruthy();
});

test("validate echogarden align command", async () => {
const res = await page.evaluate(() => {
return window.__ENJOY_APP__.echogarden.check();
});
expect(res).toBeTruthy();

const settings = fs.readJsonSync(path.join(resultDir, "settings.json"));
expect(settings.whisper.service).toBe("local");
});

test("should setup default library path", async () => {
const settings = fs.readJsonSync(path.join(resultDir, "settings.json"));
expect(settings.library).not.toBeNull();
Expand Down
66 changes: 34 additions & 32 deletions enjoy/e2e/renderer.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,39 @@ test.describe("with login", async () => {
},
});
});
});

/*
* steps:
* 1. create a tts conversation
* 2. submit a message to the conversation
* 3. the speech should auto create
*/
test("tts conversation", async () => {
// navigate to the conversations page
await page.getByTestId("sidebar-conversations").click();

// trigger new conversation modal
await page.getByTestId("conversation-new-button").click();

// create a tts conversation
await page.click("[data-testid=conversation-preset-tts]");
await page.getByTestId("conversation-form").waitFor();
await page.click("[data-testid=conversation-form-submit]");

// wait for the conversation to be created
await page.getByTestId("conversation-page").waitFor();

// submit a message to the conversation
await page.getByTestId("conversation-page-input").fill("How are you?");
await page.getByTestId("conversation-page-submit").click();
await page.locator(".ai-message").waitFor();
const player = page
.locator(".ai-message")
.getByTestId("wavesurfer-container");
await player.waitFor();

expect(await player.isVisible()).toBeTruthy();
});

/*
Expand All @@ -136,6 +166,9 @@ test.describe("with login", async () => {
* 5. audio waveform player should be visible and transcription should be generated
*/
test("gpt conversation", async () => {
// navigate to the conversations page
await page.getByTestId("sidebar-conversations").click();

// trigger new conversation modal
await page.getByTestId("conversation-new-button").click();

Expand Down Expand Up @@ -166,43 +199,12 @@ test.describe("with login", async () => {

// add to library
await page.getByTestId("message-start-shadow").click();
await page.getByTestId("audio-detail").waitFor();
await page.getByTestId("audio-player").waitFor();
await page.getByTestId("media-player-container").waitFor();
await page.getByTestId("media-transcription").waitFor();
await page.getByTestId("media-transcription-result").waitFor();
expect(
await page.getByTestId("media-transcription-result").isVisible()
).toBeTruthy();
});

/*
* steps:
* 1. create a tts conversation
* 2. submit a message to the conversation
* 3. the speech should auto create
*/
test("tts conversation", async () => {
// trigger new conversation modal
await page.getByTestId("conversation-new-button").click();

// create a tts conversation
await page.click("[data-testid=conversation-preset-tts]");
await page.getByTestId("conversation-form").waitFor();
await page.click("[data-testid=conversation-form-submit]");

// wait for the conversation to be created
await page.getByTestId("conversation-page").waitFor();

// submit a message to the conversation
await page.getByTestId("conversation-page-input").fill("How are you?");
await page.getByTestId("conversation-page-submit").click();
await page.locator(".ai-message").waitFor();
const player = page
.locator(".ai-message")
.getByTestId("wavesurfer-container");
await player.waitFor();

expect(await player.isVisible()).toBeTruthy();
});
});
});
2 changes: 1 addition & 1 deletion enjoy/forge.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ const config = {
asar: {
// Binary files won't work in asar, so we need to unpack them
unpackDir:
"{.vite/build/lib,.vite/build/samples,node_modules/ffmpeg-static,node_modules/@andrkrn/ffprobe-static}",
"{.vite/build/lib,.vite/build/samples,node_modules/ffmpeg-static,node_modules/@andrkrn/ffprobe-static,node_modules/onnxruntime-node/bin}",
},
icon: "./assets/icon",
name: "Enjoy",
Expand Down
47 changes: 25 additions & 22 deletions enjoy/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,18 +47,18 @@
"@types/fluent-ffmpeg": "^2.1.24",
"@types/html-to-text": "^9.0.4",
"@types/intl-tel-input": "^18.1.4",
"@types/lodash": "^4.14.202",
"@types/lodash": "^4.17.0",
"@types/mark.js": "^8.11.12",
"@types/node": "^20.11.24",
"@types/react": "^18.2.62",
"@types/react-dom": "^18.2.19",
"@types/node": "^20.11.27",
"@types/react": "^18.2.66",
"@types/react-dom": "^18.2.22",
"@types/validator": "^13.11.9",
"@types/wavesurfer.js": "^6.0.12",
"@typescript-eslint/eslint-plugin": "^7.1.1",
"@typescript-eslint/parser": "^7.1.1",
"@typescript-eslint/eslint-plugin": "^7.2.0",
"@typescript-eslint/parser": "^7.2.0",
"@vitejs/plugin-react": "^4.2.1",
"autoprefixer": "^10.4.18",
"electron": "^29.1.0",
"electron": "^29.1.4",
"electron-playwright-helpers": "^1.7.1",
"eslint": "^8.57.0",
"eslint-import-resolver-typescript": "^3.6.1",
Expand All @@ -67,12 +67,13 @@
"octokit": "^3.1.2",
"progress": "^2.0.3",
"tailwind-merge": "^2.2.1",
"tailwind-scrollbar": "^3.1.0",
"tailwindcss": "^3.4.1",
"tailwindcss-animate": "^1.0.7",
"ts-node": "^10.9.2",
"tslib": "^2.6.2",
"typescript": "^5.3.3",
"vite": "^5.1.5",
"typescript": "^5.4.2",
"vite": "^5.1.6",
"vite-plugin-static-copy": "^1.0.1",
"zx": "^7.2.3"
},
Expand All @@ -81,7 +82,7 @@
"@ffmpeg/ffmpeg": "^0.12.10",
"@ffmpeg/util": "^0.12.1",
"@hookform/resolvers": "^3.3.4",
"@langchain/community": "^0.0.34",
"@langchain/community": "^0.0.39",
"@langchain/google-genai": "^0.0.10",
"@mozilla/readability": "^0.5.0",
"@radix-ui/react-accordion": "^1.1.2",
Expand Down Expand Up @@ -112,6 +113,7 @@
"axios": "^1.6.7",
"camelcase": "^8.0.0",
"camelcase-keys": "^9.1.3",
"chart.js": "^4.4.2",
"cheerio": "^1.0.0-rc.12",
"class-variance-authority": "^0.7.0",
"clsx": "^2.1.0",
Expand All @@ -122,35 +124,36 @@
"dayjs": "^1.11.10",
"decamelize": "^6.0.0",
"decamelize-keys": "^2.0.1",
"electron-log": "^5.1.1",
"echogarden": "https://github.com/an-lee/echogarden",
"electron-log": "^5.1.2",
"electron-settings": "^4.0.2",
"electron-squirrel-startup": "^1.0.0",
"ffmpeg-static": "^5.2.0",
"fluent-ffmpeg": "^2.1.2",
"fs-extra": "^11.2.0",
"html-to-text": "^9.0.5",
"https-proxy-agent": "^7.0.4",
"i18next": "^23.10.0",
"intl-tel-input": "^19.5.5",
"i18next": "^23.10.1",
"intl-tel-input": "^19.5.7",
"js-md5": "^0.8.3",
"langchain": "^0.1.25",
"langchain": "^0.1.28",
"lodash": "^4.17.21",
"lucide-react": "^0.344.0",
"lucide-react": "^0.358.0",
"mark.js": "^8.11.1",
"microsoft-cognitiveservices-speech-sdk": "^1.35.0",
"next-themes": "^0.2.1",
"openai": "^4.28.4",
"microsoft-cognitiveservices-speech-sdk": "^1.36.0",
"next-themes": "^0.3.0",
"openai": "^4.29.0",
"pitchfinder": "^2.3.2",
"postcss": "^8.4.35",
"proxy-agent": "^6.4.0",
"react": "^18.2.0",
"react-activity-calendar": "^2.2.7",
"react-activity-calendar": "^2.2.8",
"react-dom": "^18.2.0",
"react-hook-form": "^7.51.0",
"react-hotkeys-hook": "^4.5.0",
"react-i18next": "^14.0.5",
"react-i18next": "^14.1.0",
"react-markdown": "^9.0.1",
"react-router-dom": "^6.22.2",
"react-router-dom": "^6.22.3",
"react-tooltip": "^5.26.3",
"reflect-metadata": "^0.2.1",
"rimraf": "^5.0.5",
Expand All @@ -160,7 +163,7 @@
"sqlite3": "^5.1.7",
"tailwind-scrollbar-hide": "^1.1.7",
"umzug": "^3.7.0",
"wavesurfer.js": "^7.7.3",
"wavesurfer.js": "^7.7.5",
"zod": "^3.22.4"
}
}
2 changes: 1 addition & 1 deletion enjoy/playwright.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ export default defineConfig({
/* Retry on CI only */
retries: process.env.CI ? 2 : 0,
/* Opt out of parallel tests on CI. */
workers: process.env.CI ? 1 : undefined,
workers: 1,
/* Reporter to use. See https://playwright.dev/docs/test-reporters */
reporter: "html",
/* Shared settings for all the projects below. See https://playwright.dev/docs/api/class-testoptions. */
Expand Down
Loading
Loading