feat(preprocessing): add pre-classify command to manually classify fi…

…les (#527)
voicepaw · Apr 30, 2023 · 7a0319c · 7a0319c
1 parent e76f91f
commit 7a0319c
Show file tree

Hide file tree

Showing 3 changed files with 138 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -151,6 +151,7 @@ svc infer source.wav
 - If your dataset has BGM, please remove the BGM using software such as [Ultimate Vocal Remover](https://ultimatevocalremover.com/). `3_HP-Vocal-UVR.pth` or `UVR-MDX-NET Main` is recommended. [^1]
 - If your dataset is a long audio file with a single speaker, use `svc pre-split` to split the dataset into multiple files (using `librosa`).
 - If your dataset is a long audio file with multiple speakers, use `svc pre-sd` to split the dataset into multiple files (using `pyannote.audio`). Further manual classification may be necessary due to accuracy issues. If speakers speak with a variety of speech styles, set --min-speakers larger than the actual number of speakers. Due to unresolved dependencies, please install `pyannote.audio` manually: `pip install pyannote-audio`.
+- To manually classify audio files, `svc pre-classify` is available. Up and down arrow keys can be used to change the playback speed.
 
 [^1]: https://ytpmv.info/how-to-use-uvr/
 
@@ -209,7 +210,8 @@ Options:
 Commands:
   clean          Clean up files, only useful if you are using the default file structure
   infer          Inference
-  onnx           Export model to onnx
+  onnx           Export model to onnx (currently not working)
+  pre-classify   Classify multiple audio files into multiple files
   pre-config     Preprocessing part 2: config
   pre-hubert     Preprocessing part 3: hubert If the HuBERT model is not found, it will be...
   pre-resample   Preprocessing part 1: resample

diff --git a/src/so_vits_svc_fork/__main__.py b/src/so_vits_svc_fork/__main__.py
@@ -716,6 +716,45 @@ def pre_split(
     )
 
 
+@cli.command()
+@click.option(
+    "-i",
+    "--input-dir",
+    type=click.Path(exists=True),
+    required=True,
+    help="path to source dir",
+)
+@click.option(
+    "-o",
+    "--output-dir",
+    type=click.Path(),
+    default=None,
+    help="path to output dir",
+)
+@click.option(
+    "-c/-nc",
+    "--create-new/--no-create-new",
+    type=bool,
+    default=True,
+    help="create a new folder for the speaker if not exist",
+)
+def pre_classify(
+    input_dir: Path | str,
+    output_dir: Path | str | None,
+    create_new: bool,
+) -> None:
+    """Classify multiple audio files into multiple files"""
+    from so_vits_svc_fork.preprocessing.preprocess_classify import preprocess_classify
+
+    if output_dir is None:
+        output_dir = input_dir
+    preprocess_classify(
+        input_dir=input_dir,
+        output_dir=output_dir,
+        create_new=create_new,
+    )
+
+
 @cli.command
 def clean():
     """Clean up files, only useful if you are using the default file structure"""
@@ -763,8 +802,8 @@ def clean():
 def onnx(
     input_path: Path, output_path: Path, config_path: Path, device: torch.device | str
 ) -> None:
+    """Export model to onnx (currently not working)"""
     raise NotImplementedError("ONNX export is not yet supported")
-    """Export model to onnx"""
     input_path = Path(input_path)
     if input_path.is_dir():
         input_path = list(input_path.glob("*.pth"))[0]

diff --git a/src/so_vits_svc_fork/preprocessing/preprocess_classify.py b/src/so_vits_svc_fork/preprocessing/preprocess_classify.py
@@ -0,0 +1,95 @@
+from __future__ import annotations
+
+from logging import getLogger
+from pathlib import Path
+
+import keyboard
+import librosa
+import sounddevice as sd
+import soundfile as sf
+from rich.console import Console
+from tqdm.rich import tqdm
+
+LOG = getLogger(__name__)
+
+
+def preprocess_classify(
+    input_dir: Path | str, output_dir: Path | str, create_new: bool = True
+) -> None:
+    # paths
+    input_dir_ = Path(input_dir)
+    output_dir_ = Path(output_dir)
+    speed = 1
+    if not input_dir_.is_dir():
+        raise ValueError(f"{input_dir} is not a directory.")
+    output_dir_.mkdir(exist_ok=True)
+
+    console = Console()
+    # get audio paths and folders
+    audio_paths = list(input_dir_.glob("*.*"))
+    last_folders = [x for x in output_dir_.glob("*") if x.is_dir()]
+    console.print("Press ↑ or ↓ to change speed. Press any other key to classify.")
+    console.print(f"Folders: {[x.name for x in last_folders]}")
+
+    pbar_description = ""
+
+    pbar = tqdm(audio_paths)
+    for audio_path in pbar:
+        # read file
+        audio, sr = sf.read(audio_path)
+
+        # update description
+        duration = librosa.get_duration(y=audio, sr=sr)
+        pbar_description = f"{duration:.1f} {pbar_description}"
+        pbar.set_description(pbar_description)
+
+        while True:
+            # start playing
+            sd.play(librosa.effects.time_stretch(audio, rate=speed), sr, loop=True)
+
+            # wait for key press
+            key = str(keyboard.read_key())
+            if key == "down":
+                speed /= 1.1
+                console.print(f"Speed: {speed:.2f}")
+            elif key == "up":
+                speed *= 1.1
+                console.print(f"Speed: {speed:.2f}")
+            else:
+                break
+
+            # stop playing
+            sd.stop()
+
+        # print if folder changed
+        folders = [x for x in output_dir_.glob("*") if x.is_dir()]
+        if folders != last_folders:
+            console.print(f"Folders updated: {[x.name for x in folders]}")
+            last_folders = folders
+
+        # get folder
+        folder_candidates = [x for x in folders if x.name.startswith(key)]
+        if len(folder_candidates) == 0:
+            if create_new:
+                folder = output_dir_ / key
+            else:
+                console.print(f"No folder starts with {key}.")
+                continue
+        else:
+            if len(folder_candidates) > 1:
+                LOG.warning(
+                    f"Multiple folders ({[x.name for x in folder_candidates]}) start with {key}. "
+                    f"Using first one ({folder_candidates[0].name})."
+                )
+            folder = folder_candidates[0]
+        folder.mkdir(exist_ok=True)
+
+        # move file
+        new_path = folder / audio_path.name
+        audio_path.rename(new_path)
+
+        # update description
+        pbar_description = f"Last: {audio_path.name} -> {folder.name}"
+
+        # yield result
+        # yield audio_path, key, folder, new_path