# Hugging Face RVC Training (Colab)

This notebook helps you prepare or train a voice model and push it to the Hugging Face Hub. GPU runtime is strongly recommended in Colab.

- You can either do zero-shot XTTS (no training) or train an RVC-style model.
- After you have a repo on the Hub, link it in the app via Edit Voice → Link Hugging Face Model.


In [None]:
# Section 1: Setup and Imports
import os, sys, json, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

# Core ML/audio
import librosa, soundfile as sf

# Optionally used downstream
try:
    import torch, torchaudio
except Exception as e:
    print("Torch/Torchaudio not present yet. Will install in the next cell if needed.")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)
random.seed(RANDOM_STATE)

print({
    'python': sys.version,
    'numpy': np.__version__,
    'pandas': pd.__version__,
})

In [None]:
# Install dependencies (Colab only)
# If running locally, you can skip and use requirements-huggingface.txt
try:
    import google
    IN_COLAB = True
except Exception:
    IN_COLAB = False

if IN_COLAB:
    !pip -q install torch torchaudio --index-url https://download.pytorch.org/whl/cpu
    !pip -q install librosa soundfile huggingface_hub tqdm
else:
    print("Not in Colab: make sure to install deps locally if needed")

## Upload/Prepare Training Audio
- Provide 2–10 minutes of clean speech.
- You can upload a single WAV/MP3 file or a folder of audio clips.
- We’ll resample to 22050 Hz and optionally slice into segments.


In [None]:
from glob import glob
from tqdm import tqdm

WORKDIR = Path("/content" if 'IN_COLAB' in globals() and IN_COLAB else ".").resolve()
RAW_DIR = WORKDIR / "voice_raw"
PROC_DIR = WORKDIR / "voice_proc"
PROC_DIR.mkdir(exist_ok=True, parents=True)

print("Working dir:", WORKDIR)
print("Upload or place your training audio into:", RAW_DIR)
RAW_DIR.mkdir(exist_ok=True)

# Helper to process audio
TARGET_SR = 22050


def convert_and_slice(path, out_dir: Path, target_sr=TARGET_SR, segment_sec=5.0):
    y, sr = librosa.load(path, sr=None, mono=True)
    if sr != target_sr:
        y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
        sr = target_sr
    # Normalize
    if np.max(np.abs(y)) > 0:
        y = y / np.max(np.abs(y))
    # Slice
    seg_len = int(segment_sec * sr)
    count = 0
    for i in range(0, len(y), seg_len):
        seg = y[i:i+seg_len]
        if len(seg) < int(0.5 * seg_len):
            continue
        out = out_dir / f"seg_{Path(path).stem}_{count:04d}.wav"
        sf.write(out, seg, sr)
        count += 1
    return count

# Auto-process any existing files in RAW_DIR
files = glob(str(RAW_DIR / "**/*.*"), recursive=True)
print("Found", len(files), "raw files")
for f in tqdm(files):
    try:
        convert_and_slice(f, PROC_DIR)
    except Exception as e:
        print("Skipping", f, e)

print("Processed files in:", PROC_DIR)

## Minimal Training Placeholder
This notebook shows a minimal placeholder instead of a full RVC pipeline (which is lengthy and repo-specific).
We simulate a training artifact by aggregating audio stats and saving a small weights file.

You can swap this cell for a community RVC training notebook later.


In [None]:
import hashlib

WEIGHTS_DIR = WORKDIR / "weights"
WEIGHTS_DIR.mkdir(exist_ok=True)

# Aggregate basic stats to simulate a model weights file
wav_files = sorted(glob(str(PROC_DIR / "*.wav")))
print("Training on", len(wav_files), "segments")
hash_accum = hashlib.sha256()
for wf in wav_files:
    y, sr = librosa.load(wf, sr=None)
    hash_accum.update(y.tobytes())

weights_path = WEIGHTS_DIR / "rvc_placeholder.pth"
with open(weights_path, "wb") as f:
    f.write(hash_accum.digest())

with open(WEIGHTS_DIR / "README.txt", "w") as f:
    f.write("Placeholder RVC weights. Replace with real model weights when available.\n")

print("Saved placeholder weights:", weights_path)

## Push to Hugging Face Hub
We will create/reuse a repo and upload the weights folder.


In [None]:
from huggingface_hub import HfApi, create_repo, upload_folder
from getpass import getpass

HF_TOKEN = os.environ.get('HF_TOKEN', None)
if not HF_TOKEN:
    try:
        HF_TOKEN = getpass("Enter your Hugging Face token (write access): ")
    except Exception:
        HF_TOKEN = input("Enter your Hugging Face token (write access): ")

assert HF_TOKEN, "HF token is required"

REPO_ID = os.environ.get('HF_REPO_ID', 'username/my-voice-rvc')
REVISION = os.environ.get('HF_REVISION', 'main')

api = HfApi(token=HF_TOKEN)
create_repo(repo_id=REPO_ID, token=HF_TOKEN, exist_ok=True, repo_type="model")

print(f"Uploading {WEIGHTS_DIR} to {REPO_ID}@{REVISION}...")
upload_folder(
    repo_id=REPO_ID,
    folder_path=str(WEIGHTS_DIR),
    path_in_repo="/",
    repo_type="model",
    commit_message="Add placeholder RVC weights",
    revision=REVISION,
    token=HF_TOKEN,
)

print("✅ Uploaded. Repo:", f"https://huggingface.co/{REPO_ID}")