Skip to content

Commit

Permalink
feat: update pretrained model url, raise error if there are no files …
Browse files Browse the repository at this point in the history
…to preprocess, shuffle files consistently (#715)

* feat: update urls for pretrained models
* fix(preprocess_resample): raise if no files found
* fix(preprocess_flist_config): consistent random shuffling

BREAKING CHANGE: default preset changed
  • Loading branch information
34j committed May 29, 2023
1 parent 975bc12 commit c4c719c
Show file tree
Hide file tree
Showing 9 changed files with 61 additions and 14 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
<img src="https://img.shields.io/pypi/l/so-vits-svc-fork.svg?style=flat-square" alt="License">
</p>

A fork of [`so-vits-svc`](https://github.com/svc-develop-team/so-vits-svc) with **realtime support** and **greatly improved interface**. Based on branch `4.0` (v1) and the models are compatible.
A fork of [`so-vits-svc`](https://github.com/svc-develop-team/so-vits-svc) with **realtime support** and **greatly improved interface**. Based on branch `4.0` (v1) (or `4.1`) and the models are compatible.

## Features not available in the original repo

Expand Down
2 changes: 1 addition & 1 deletion src/so_vits_svc_fork/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,7 +546,7 @@ def pre_resample(
"-t",
"--config-type",
type=click.Choice([x.stem for x in CONFIG_TEMPLATE_DIR.rglob("*.json")]),
default="so-vits-svc-4.0v1-legacy",
default="so-vits-svc-4.0v1",
help="config type",
)
def pre_config(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,11 @@
"type_": "ms-istft",
"gen_istft_n_fft": 16,
"gen_istft_hop_size": 4,
"subbands": 4
"subbands": 4,
"pretrained": {
"D_0.pth": "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_D_320000.pth",
"G_0.pth": "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_G_320000.pth"
}
},
"spk": {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@
"use_spectral_norm": false,
"gin_channels": 256,
"ssl_dim": 256,
"n_speakers": 200
"n_speakers": 200,
"pretrained": {
"D_0.pth": "https://huggingface.co/therealvul/so-vits-svc-4.0-init/resolve/main/D_0.pth",
"G_0.pth": "https://huggingface.co/therealvul/so-vits-svc-4.0-init/resolve/main/G_0.pth"
}
},
"spk": {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@
"gin_channels": 256,
"ssl_dim": 768,
"n_speakers": 200,
"type_": "hifi-gan"
"type_": "hifi-gan",
"pretrained": {
"D_0.pth": "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_D_320000.pth",
"G_0.pth": "https://huggingface.co/datasets/ms903/sovits4.0-768vec-layer12/resolve/main/sovits_768l12_pre_large_320k/clean_G_320000.pth"
}
},
"spk": {}
}
5 changes: 3 additions & 2 deletions src/so_vits_svc_fork/preprocessing/preprocess_flist_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from copy import deepcopy
from logging import getLogger
from pathlib import Path
from random import shuffle

import numpy as np
from librosa import get_duration
from tqdm import tqdm

Expand All @@ -32,6 +32,7 @@ def preprocess_config(
test = []
spk_dict = {}
spk_id = 0
random = np.random.RandomState(1234)
for speaker in os.listdir(input_dir):
spk_dict[speaker] = spk_id
spk_id += 1
Expand All @@ -41,7 +42,7 @@ def preprocess_config(
LOG.warning(f"skip {path} because it is too short.")
continue
paths.append(path)
shuffle(paths)
random.shuffle(paths)
if len(paths) <= 4:
raise ValueError(
f"too few files in {input_dir / speaker} (expected at least 5)."
Expand Down
7 changes: 4 additions & 3 deletions src/so_vits_svc_fork/preprocessing/preprocess_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,11 @@ def preprocess_resample(
output_dir = Path(output_dir)
"""Preprocess audio files in input_dir and save them to output_dir."""

in_paths = []
out_paths = []
for in_path in input_dir.rglob("*.*"):
in_paths = list(input_dir.rglob("*.*"))
if not in_paths:
raise ValueError(f"No audio files found in {input_dir}")
for in_path in in_paths:
in_path_relative = in_path.relative_to(input_dir)
if not in_path.is_absolute() and is_relative_to(
in_path, Path("dataset_raw") / "44k"
Expand All @@ -125,7 +127,6 @@ def preprocess_resample(
out_path = output_dir / speaker_name / file_name
out_path = _get_unique_filename(out_path, out_paths)
out_path.parent.mkdir(parents=True, exist_ok=True)
in_paths.append(in_path)
out_paths.append(out_path)

in_and_out_paths = list(zip(in_paths, out_paths))
Expand Down
15 changes: 13 additions & 2 deletions src/so_vits_svc_fork/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import lightning.pytorch as pl
import torch
from lightning.pytorch.accelerators import MPSAccelerator, TPUAccelerator
from lightning.pytorch.callbacks import DeviceStatsMonitor
from lightning.pytorch.loggers import TensorBoardLogger
from lightning.pytorch.strategies.ddp import DDPStrategy
from lightning.pytorch.tuner import Tuner
Expand Down Expand Up @@ -73,7 +74,16 @@ def train(
model_path = Path(model_path)

hparams = utils.get_backup_hparams(config_path, model_path)
utils.ensure_pretrained_model(model_path, hparams.model.get("type_", "hifi-gan"))
utils.ensure_pretrained_model(
model_path,
hparams.model.get(
"pretrained",
{
"D_0.pth": "https://huggingface.co/therealvul/so-vits-svc-4.0-init/resolve/main/D_0.pth",
"G_0.pth": "https://huggingface.co/therealvul/so-vits-svc-4.0-init/resolve/main/G_0.pth",
},
),
)

datamodule = VCDataModule(hparams)
strategy = (
Expand All @@ -100,7 +110,8 @@ def train(
if hparams.train.get("bf16_run", False)
else 32,
strategy=strategy,
callbacks=[pl.callbacks.RichProgressBar()] if not is_notebook() else None,
callbacks=([pl.callbacks.RichProgressBar()] if not is_notebook() else [])
+ [DeviceStatsMonitor()],
benchmark=True,
enable_checkpointing=False,
)
Expand Down
26 changes: 24 additions & 2 deletions src/so_vits_svc_fork/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,31 @@ def download_file(


def ensure_pretrained_model(
folder_path: Path | str, type_: str, **tqdm_kwargs: Any
folder_path: Path | str, type_: str | dict[str, str], **tqdm_kwargs: Any
) -> tuple[Path, ...] | None:
folder_path = Path(folder_path)

# new code
if not isinstance(type_, str):
try:
Parallel(n_jobs=len(type_))(
[
delayed(download_file)(
url,
folder_path / filename,
position=i,
skip_if_exists=True,
**tqdm_kwargs,
)
for i, (filename, url) in enumerate(type_.items())
]
)
return tuple(folder_path / filename for filename in type_.values())
except Exception as e:
LOG.error(f"Failed to download {type_}")
LOG.exception(e)

# old code
models_candidates = PRETRAINED_MODEL_URLS.get(type_, None)
if models_candidates is None:
LOG.warning(f"Unknown pretrained model type: {type_}")
Expand All @@ -133,8 +155,8 @@ def ensure_pretrained_model(
)
return tuple(paths)
except Exception as e:
LOG.error(f"Failed to download {model_urls}")
LOG.exception(e)
return


class HubertModelWithFinalProj(HubertModel):
Expand Down

0 comments on commit c4c719c

Please sign in to comment.