# 02 — AudioCraft training (compression/debug)
Run Dora training on the prepared `fma_small_mini` dataset and capture logs.


In [1]:
from pathlib import Path

AUDIOCRAFT_REPO_DIR = Path("/workspace/audiocraft")
EXPERIMENTS_DIR = Path("/workspace/experiments/audiocraft")

DSET = "audio/fma_small_mini"
SOLVER = "compression/debug"
import os

SEGMENT_SECONDS = 10
BATCH_SIZE = 80  # Increased from 8 to improve GPU utilization
# Auto-pick workers: reduce to 4-6 to avoid CPU contention
_cpu_count = os.cpu_count() or 32
NUM_WORKERS = min(16, max(8, _cpu_count // 4))   # 8–16 is usually the sweet spot
UPDATES_PER_EPOCH = 50
VALID_NUM_SAMPLES = 30
GENERATE_EVERY = 2
EVALUATE_EVERY = 2

CONFIG_PATH = AUDIOCRAFT_REPO_DIR / "config" / "dset" / "audio" / "fma_small_mini.yaml"
TRAIN_JSONL = Path("/workspace/data/fma_small_mini/egs/train/data.jsonl")
VALID_JSONL = Path("/workspace/data/fma_small_mini/egs/valid/data.jsonl")
print(NUM_WORKERS)

16


## 1) Sanity checks
Ensures dataset + config exist and CUDA is available.


In [2]:
import torch
import importlib

missing = [p for p in [CONFIG_PATH, TRAIN_JSONL, VALID_JSONL] if not p.exists()]
if missing:
    raise FileNotFoundError(f"Missing artifacts: {missing}. Run Notebook A first.")

importlib.import_module("audiocraft")
if not torch.cuda.is_available():
    raise SystemError("CUDA is required for this run. Check your RunPod GPU setup.")

print("All sanity checks passed.")


All sanity checks passed.


## 2) Run Dora training (compression/debug)
Uses `%%bash` so it can be re-run easily.


In [3]:
import os
import subprocess

# Pass Python variables to bash environment
env = os.environ.copy()
env['AUDIOCRAFT_TEAM'] = 'default'
env['AUDIOCRAFT_DORA_DIR'] = str(EXPERIMENTS_DIR)
env['USER'] = env.get('USER', 'root')
env['PYTHONWARNINGS'] = 'ignore::FutureWarning,ignore::UserWarning'

print(f"Using config: dset={DSET}, solver={SOLVER}")
print(f"Training params: segment_duration={SEGMENT_SECONDS}, batch_size={BATCH_SIZE}, num_workers={NUM_WORKERS}")
print(f"Optimizer: updates_per_epoch={UPDATES_PER_EPOCH}")
print(f"Validation: num_samples={VALID_NUM_SAMPLES}, evaluate.every={EVALUATE_EVERY}, generate.every={GENERATE_EVERY}")

cmd = f"""
cd {AUDIOCRAFT_REPO_DIR} && \
python -m dora run \
  solver={SOLVER} \
  dset={DSET} \
  dataset.segment_duration={SEGMENT_SECONDS} \
  dataset.batch_size={BATCH_SIZE} \
  dataset.num_workers={NUM_WORKERS} \
  optim.updates_per_epoch={UPDATES_PER_EPOCH} \
  dataset.valid.num_samples={VALID_NUM_SAMPLES} \
  generate.every={GENERATE_EVERY} \
  evaluate.every={EVALUATE_EVERY}
"""

subprocess.run(cmd, shell=True, check=True, env=env)

Using config: dset=audio/fma_small_mini, solver=compression/debug
Training params: segment_duration=10, batch_size=80, num_workers=16
Optimizer: updates_per_epoch=50
Validation: num_samples=30, evaluate.every=2, generate.every=2


Dora directory: /workspace/experiments/audiocraft
[[36m01-26 03:51:04[0m][[34mdora.distrib[0m][[32mINFO[0m] - world_size is 1, skipping init.[0m
[[36m01-26 03:51:04[0m][[34mflashy.solver[0m][[32mINFO[0m] - Instantiating solver CompressionSolver for XP 550e2fc2[0m
[[36m01-26 03:51:04[0m][[34mflashy.solver[0m][[32mINFO[0m] - All XP logs are stored in /workspace/experiments/audiocraft/xps/550e2fc2[0m
[[36m01-26 03:51:04[0m][[34maudiocraft.solvers.builders[0m][[32mINFO[0m] - Loading audio data split train: /workspace/data/fma_small_mini/egs/train[0m
[[36m01-26 03:51:04[0m][[34maudiocraft.solvers.builders[0m][[32mINFO[0m] - Loading audio data split valid: /workspace/data/fma_small_mini/egs/valid[0m
[[36m01-26 03:51:04[0m][[34maudiocraft.solvers.builders[0m][[32mINFO[0m] - Loading audio data split evaluate: /workspace/data/fma_small_mini/egs/valid[0m
[[36m01-26 03:51:04[0m][[34maudiocraft.solvers.builders[0m][[32mINFO[0m] - Loading audio data s

KeyboardInterrupt: 

## 3) Capture XP id and logs path
Finds the most recent Dora experiment under `/workspace/experiments/audiocraft/xps/`.


In [None]:
from pathlib import Path

xp_root = EXPERIMENTS_DIR / "xps"
xp_root.mkdir(parents=True, exist_ok=True)
xp_dirs = sorted([p for p in xp_root.iterdir() if p.is_dir()], key=lambda p: p.stat().st_mtime)

if not xp_dirs:
    raise FileNotFoundError("No Dora runs found yet. Re-run the training cell above.")

latest = xp_dirs[-1]
print("XP id:", latest.name)
print("Logs dir:", latest)
sample_logs = list(latest.glob("**/*.log"))[:5]
print("Sample log files:")
for lf in sample_logs:
    print(" •", lf.relative_to(latest))


## 4) Post-run validation
Checks for artifacts/logs inside the XP directory.


In [None]:
from pathlib import Path

xp_root = EXPERIMENTS_DIR / "xps"
xp_dirs = sorted([p for p in xp_root.iterdir() if p.is_dir()], key=lambda p: p.stat().st_mtime)
latest = xp_dirs[-1]

checkpoints = list(latest.rglob("*.pt"))
logs = list(latest.rglob("*.log"))

print({
    "xp_id": latest.name,
    "num_checkpoints": len(checkpoints),
    "num_logs": len(logs),
    "log_sample": [p.relative_to(latest) for p in logs[:3]],
})
if logs:
    tail_path = logs[0]
    print("\nTail of", tail_path.name)
    print("================")
    print("".join(tail_path.read_text().splitlines(True)[-20:]))

## 5) Optional quick re-run
Tweak batch size/workers without editing previous cell.


In [None]:
import os
import subprocess

# Quick re-run with adjusted batch size
env = os.environ.copy()
env['AUDIOCRAFT_TEAM'] = 'default'
env['AUDIOCRAFT_DORA_DIR'] = str(EXPERIMENTS_DIR)
env['USER'] = env.get('USER', 'root')
env['PYTHONWARNINGS'] = 'ignore::FutureWarning,ignore::UserWarning'

# Override batch size for this run
alt_batch_size = 4

print(f"Using config: dset={DSET}, solver={SOLVER}")
print(f"Training params: segment_duration={SEGMENT_SECONDS}, batch_size={alt_batch_size}, num_workers={NUM_WORKERS}")
print(f"Optimizer: updates_per_epoch={UPDATES_PER_EPOCH}")
print(f"Validation: num_samples={VALID_NUM_SAMPLES}, evaluate.every={EVALUATE_EVERY}, generate.every={GENERATE_EVERY}")

cmd = f"""
cd {AUDIOCRAFT_REPO_DIR} && \
python -m dora run \
  solver={SOLVER} \
  dset={DSET} \
  dataset.segment_duration={SEGMENT_SECONDS} \
  dataset.batch_size={alt_batch_size} \
  dataset.num_workers={NUM_WORKERS} \
  optim.updates_per_epoch={UPDATES_PER_EPOCH} \
  dataset.valid.num_samples={VALID_NUM_SAMPLES} \
  generate.every={GENERATE_EVERY} \
  evaluate.every={EVALUATE_EVERY}
"""

subprocess.run(cmd, shell=True, check=True, env=env)

## 6) Test model
Test a completed model


In [None]:
%%bash
cd /workspace/audiocraft

python -m venv .venv-ac --system-site-packages
source .venv-ac/bin/activate

python -m pip install -U pip wheel setuptools
python -m pip install -U xformers ipykernel
python -m pip install -e .
python -m ipykernel install --user --name audiocraft-ac --display-name "Python (audiocraft-ac)"

In [None]:
import os, torch
from audiocraft.solvers import CompressionSolver

# IMPORTANT: this must point at the same dora dir used during training
# (your logs looked like: /workspace/experiments/audiocraft/xps/<SIG>)
os.environ["AUDIOCRAFT_DORA_DIR"] = "/workspace/experiments/audiocraft"

sig = "550e2fc2"  # <-- replace with your XP signature from the logs
device = "cuda" if torch.cuda.is_available() else "cpu"

model = CompressionSolver.model_from_checkpoint(f"//sig/{sig}", device=device)
print("Loaded:", type(model), "sr=", model.sample_rate, "channels=", model.channels)

In [None]:
import torchaudio
import torch

# Load any WAV/MP3
wav_path = "/workspace/data/test.wav"  # <-- your file
wav, sr = torchaudio.load(wav_path)   # wav: [C, T]

# Force mono if needed
if wav.shape[0] > 1:
    wav = wav.mean(dim=0, keepdim=True)

# Resample to model SR
target_sr = model.sample_rate
if sr != target_sr:
    wav = torchaudio.functional.resample(wav, sr, target_sr)

# Make a batch + move to device: [B, C, T]
wav = wav.unsqueeze(0).to(device)

# Optional: crop to 10s (matches your training segment_duration=10)
max_len = int(10 * target_sr)
wav = wav[..., :max_len]

with torch.no_grad():
    codes, scale = model.encode(wav)              # codes: discrete tokens  [oai_citation:3‡Facebook Research](https://facebookresearch.github.io/audiocraft/api_docs/audiocraft/models/encodec.html)
    recon = model.decode(codes, scale=scale)      # recon: waveform  [oai_citation:4‡Facebook Research](https://facebookresearch.github.io/audiocraft/api_docs/audiocraft/models/encodec.html)

# Save reconstructed audio
out_path = "/workspace/recon.wav"
torchaudio.save(out_path, recon.squeeze(0).cpu(), target_sr)
print("Wrote:", out_path)