Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix download script #317

Merged
merged 3 commits into from
Feb 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
32 changes: 23 additions & 9 deletions enjoy/scripts/download-ffmpeg-wasm.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,7 @@ await Promise.all(
console.info(chalk.green(`✅ File ${file.name} valid`));
} else {
console.warn(
chalk.yellow(
`❌ File ${file.name} not valid, start to redownload`
)
chalk.yellow(`❌ File ${file.name} not valid, start to redownload`)
);
fs.removeSync(path.join(dir, file.name));
pendingFiles.push(file);
Expand Down Expand Up @@ -81,6 +79,8 @@ if (proxyUrl) {
}

const download = async (url, dest, md5) => {
console.info(chalk.blue(`=> Start to download ${url} to ${dest}`));

return spinner(async () => {
console.info(chalk.blue(`=> Start to download file ${url}`));
await axios
Expand All @@ -89,22 +89,27 @@ const download = async (url, dest, md5) => {
})
.then(async (response) => {
const data = Buffer.from(response.data, "binary");
console.info(chalk.green(`✅ ${dest} downloaded successfully`));

fs.writeFileSync(dest, data);
const hash = await hashFile(dest, { algo: "md5" });
if (hash === md5) {
console.info(chalk.green(`✅ ${dest} downloaded successfully`));
console.info(chalk.green(`✅ ${dest} valid`));
} else {
console.error(
chalk.red(
`❌ Error: ${dest} MD5 not match, ${hash} should be ${md5}`
`❌ Error: ${dest} not valid. \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
process.exit(1);
}
})
.catch((err) => {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
process.exit(1);
});
});
Expand All @@ -126,20 +131,29 @@ const cleanup = () => {
try {
fs.removeSync(path.join(dir, file.name));
} catch (err) {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
}
});
};

const baseURL = "https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/esm";
// const baseURL = "https://unpkg.com/@ffmpeg/core-mt@0.12.6/dist/esm";
const baseURL = "https://enjoy-storage.baizhiheizi.com";
try {
await Promise.all(
pendingFiles.map((file) =>
download(`${baseURL}/${file.name}`, path.join(dir, file.name), file.md5)
)
);
} catch (err) {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download(${err}). \nPlease try again using the command "yarn workspace enjoy download-ffmpeg-wasm"`
)
);
cleanup();
process.exit(1);
}
Expand Down
37 changes: 27 additions & 10 deletions enjoy/scripts/download-whisper-model.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ import axios from "axios";
import progress from "progress";
import { createHash } from "crypto";

const model = "ggml-base.en-q5_1.bin";
const md5 = "55309cc6613788f07ac7988985210734";
const model = "ggml-tiny.en.bin";
const sha = "c78c86eb1a8faa21b369bcd33207cc90d64ae9df";

const dir = path.join(process.cwd(), "lib/whisper.cpp/models");

Expand All @@ -15,8 +15,8 @@ fs.ensureDirSync(dir);
try {
if (fs.statSync(path.join(dir, model)).isFile()) {
console.info(chalk.green(`✅ Model ${model} already exists`));
const hash = await hashFile(path.join(dir, model), { algo: "md5" });
if (hash === md5) {
const hash = await hashFile(path.join(dir, model), { algo: "sha1" });
if (hash === sha) {
console.info(chalk.green(`✅ Model ${model} valid`));
process.exit(0);
} else {
Expand Down Expand Up @@ -50,11 +50,12 @@ if (proxyUrl) {
};
}

const modelUrlPrefix =
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
// const modelUrlPrefix =
// "https://huggingface.co/ggerganov/whisper.cpp/resolve/main";
const modelUrlPrefix = "https://enjoy-storage.baizhiheizi.com";

function hashFile(path, options) {
const algo = options.algo || "md5";
const algo = options.algo || "sha1";
return new Promise((resolve, reject) => {
const hash = createHash(algo);
const stream = fs.createReadStream(path);
Expand All @@ -65,6 +66,7 @@ function hashFile(path, options) {
}

const download = async (url, dest) => {
console.info(chalk.blue(`=> Start to download from ${url} to ${dest}`));
return axios
.get(url, { responseType: "stream" })
.then((response) => {
Expand All @@ -82,13 +84,28 @@ const download = async (url, dest) => {
progressBar.tick(chunk.length);
});

response.data.pipe(fs.createWriteStream(dest)).on("close", () => {
response.data.pipe(fs.createWriteStream(dest)).on("close", async () => {
console.info(chalk.green(`✅ Model ${model} downloaded successfully`));
process.exit(0);
const hash = await hashFile(path.join(dir, model), { algo: "sha1" });
if (hash === sha) {
console.info(chalk.green(`✅ Model ${model} valid`));
process.exit(0);
} else {
console.error(
chalk.red(
`❌ Model ${model} not valid, please try again using command \`yarn workspace enjoy download-whisper-model\``
)
);
process.exit(1);
}
});
})
.catch((err) => {
console.error(chalk.red(`❌ Error: ${err}`));
console.error(
chalk.red(
`❌ Failed to download ${url}: ${err}.\nPlease try again using command \`yarn workspace enjoy download-whisper-model\``
)
);
process.exit(1);
});
};
Expand Down
15 changes: 10 additions & 5 deletions enjoy/src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,36 @@ export const WHISPER_MODELS_OPTIONS = [
{
type: "tiny",
name: "ggml-tiny.en.bin",
size: "77.7 MB",
size: "75 MB",
sha: "c78c86eb1a8faa21b369bcd33207cc90d64ae9df",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-tiny.en.bin",
},
{
type: "base",
name: "ggml-base.en.bin",
size: "148 MB",
size: "142 MB",
sha: "137c40403d78fd54d454da0f9bd998f78703390c",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-base.en.bin",
},
{
type: "small",
name: "ggml-small.en.bin",
size: "488 MB",
size: "466 MB",
sha: "db8a495a91d927739e50b3fc1cc4c6b8f6c2d022",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-small.en.bin",
},
{
type: "medium",
name: "ggml-medium.en.bin",
size: "1.53 GB",
size: "1.5 GB",
sha: "8c30f0e44ce9560643ebd10bbe50cd20eafd3723",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-medium.en.bin",
},
{
type: "large",
name: "ggml-large-v3.bin",
size: "3.09 GB",
size: "2.9 GB",
sha: "ad82bf6a9043ceed055076d0fd39f5f186ff8062",
url: "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/ggml-large-v3.bin",
},
];
Expand Down
39 changes: 21 additions & 18 deletions enjoy/src/main/whisper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ const logger = log.scope("whisper");

class Whipser {
private binMain: string;
private defaultModel: string;
private bundledModelsDir: string;
public config: WhisperConfigType;

constructor(config?: WhisperConfigType) {
Expand All @@ -20,13 +20,7 @@ class Whipser {
"whisper",
"main"
);
this.defaultModel = path.join(
__dirname,
"lib",
"whisper",
"models",
"ggml-base.en-q5_1.bin"
);
this.bundledModelsDir = path.join(__dirname, "lib", "whisper", "models");
if (fs.existsSync(customWhisperPath)) {
this.binMain = customWhisperPath;
} else {
Expand All @@ -36,23 +30,32 @@ class Whipser {

currentModel() {
if (!this.config.availableModels) return;
if (!this.config.model) {
const model = this.config.availableModels[0];
settings.setSync("whisper.model", this.config.availableModels[0].name);
return model.savePath;

let model: WhisperConfigType["availableModels"][0];
if (this.config.model) {
model = (this.config.availableModels || []).find(
(m) => m.name === this.config.model
);
}
if (!model) {
model = this.config.availableModels[0];
}

return (this.config.availableModels || []).find(
(m) => m.name === this.config.model
)?.savePath;
settings.setSync("whisper.model", model.name);
return model.savePath;
}

async initialize() {
const bundleModels = fs.readdirSync(this.bundledModelsDir);

const dir = path.join(settings.libraryPath(), "whisper", "models");
fs.ensureDirSync(dir);
const files = fs.readdirSync(dir);

const availableModelFiles = bundleModels.concat(files);

const models = [];
for (const file of files) {
for (const file of availableModelFiles) {
const model = WHISPER_MODELS_OPTIONS.find((m) => m.name == file);
if (!model) continue;

Expand Down Expand Up @@ -102,7 +105,7 @@ class Whipser {
async check() {
await this.initialize();

const model = this.currentModel() || this.defaultModel;
const model = this.currentModel();

const sampleFile = path.join(__dirname, "samples", "jfk.wav");
const tmpDir = settings.cachePath();
Expand Down Expand Up @@ -169,7 +172,7 @@ class Whipser {
throw new Error("No file or blob provided");
}

const model = this.currentModel() || this.defaultModel;
const model = this.currentModel();

if (blob) {
const format = blob.type.split("/")[1];
Expand Down