# 01 — FMA small mini setup
End-to-end, idempotent setup for AudioCraft + a small FMA-based mini dataset on RunPod (`/workspace`).


In [1]:
from pathlib import Path
import os

BASE_DIR = Path("/workspace")
DATA_DIR = BASE_DIR / "data" / "fma_small_mini"
RAW_DIR = BASE_DIR / "data" / "fma_raw"
AUDIOCRAFT_REPO_DIR = BASE_DIR / "audiocraft"
EXPERIMENTS_DIR = BASE_DIR / "experiments" / "audiocraft"

SEGMENT_SECONDS = 10
TARGET_SR = 32000
CHANNELS = 1
TRAIN_RATIO = 0.9
RANDOM_SEED = 42
NUM_SAMPLES_TOTAL = 100  # adjust to control how many source tracks to keep

FMA_ARCHIVE_URLS = [
    os.environ.get("FMA_SAMPLE_ARCHIVE_URL"),
    "https://os.unil.cloud.switch.ch/fma/fma_small.zip",
    "https://mirror.math.princeton.edu/pub/fma/fma_small.zip",
    "https://huggingface.co/datasets/echonest/fma_small/resolve/main/fma_small.zip",
]
FMA_ARCHIVE_URLS = [u for u in FMA_ARCHIVE_URLS if u]

WAV_DIR = DATA_DIR / "wav_32k_mono"
SEGMENTS_DIR = DATA_DIR / "segments_10s"
MANIFEST_DIR = DATA_DIR / "manifests"
EGS_TRAIN = DATA_DIR / "egs" / "train"
EGS_VALID = DATA_DIR / "egs" / "valid"

for p in (DATA_DIR, RAW_DIR, WAV_DIR, SEGMENTS_DIR, MANIFEST_DIR, EGS_TRAIN, EGS_VALID, EXPERIMENTS_DIR):
    p.mkdir(parents=True, exist_ok=True)

print("BASE_DIR:", BASE_DIR)
print("Using URLs (in order):", FMA_ARCHIVE_URLS)


BASE_DIR: /workspace
Using URLs (in order): ['https://os.unil.cloud.switch.ch/fma/fma_small.zip', 'https://mirror.math.princeton.edu/pub/fma/fma_small.zip', 'https://huggingface.co/datasets/echonest/fma_small/resolve/main/fma_small.zip']


## 1) System dependencies
Installs ffmpeg + related libs and shows versions.


In [None]:
# System deps: ffmpeg + unzip + sha1sum
!apt-get update -y
!apt-get install -y ffmpeg unzip coreutils

## 2) Python dependencies
Upgrades pip/build tools, installs Dora, PyTorch CUDA (cu121), Transformers.


In [None]:
%%bash
set -euo pipefail
python -m pip install -U pip setuptools wheel
python -m pip install -U dora-search
python -m pip install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
python -m pip install -U transformers


In [None]:
import torch
print("torch version:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())


## 3) Clone AudioCraft repo
Fresh clone into `/workspace/audiocraft`.


In [None]:
# System deps: ffmpeg libraries

!apt-get update -y
!apt-get install -y pkg-config libavformat-dev libavdevice-dev libavfilter-dev libavcodec-dev libavutil-dev libswscale-dev libswresample-dev

In [None]:
%%bash
set -euo pipefail
AUDIOCRAFT_REPO_DIR="/workspace/audiocraft"
if [ -d "$AUDIOCRAFT_REPO_DIR" ]; then
  rm -rf "$AUDIOCRAFT_REPO_DIR"
fi
git clone https://github.com/facebookresearch/audiocraft.git "$AUDIOCRAFT_REPO_DIR"
ls -la "$AUDIOCRAFT_REPO_DIR/config" | head


## 4) Install AudioCraft
Installs requirements and editable package.


In [None]:
%%bash
set -euo pipefail
cd /workspace/audiocraft
python -m pip install -r requirements.txt
python -m pip install -e .


In [None]:
%%bash
set -e
apt-get install -y aria2

In [None]:
%pip install -U "torch==2.5.1" "torchvision==0.20.1" "torchaudio==2.5.1" --index-url https://download.pytorch.org/whl/cu121
%pip install -U "transformers==4.46.3"
%pip install --force-reinstall "pydantic<2" "pydantic-core<2"

# IMPORTANT: Restart the kernel after running this cell to ensure proper loading of the updated packages

In [2]:
import sys, subprocess
print("Kernel python:", sys.executable)
subprocess.run([sys.executable, "-m", "pip", "install", "-e", "/workspace/audiocraft"], check=True)
subprocess.run([sys.executable, "-m", "pip", "show", "audiocraft"], check=True)

Kernel python: /usr/bin/python
Obtaining file:///workspace/audiocraft
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Checking if build backend supports build_editable: started
  Checking if build backend supports build_editable: finished with status 'done'
  Getting requirements to build editable: started
  Getting requirements to build editable: finished with status 'done'
  Preparing editable metadata (pyproject.toml): started
  Preparing editable metadata (pyproject.toml): finished with status 'done'
Collecting torch==2.1.0 (from audiocraft==1.4.0a2)
  Using cached torch-2.1.0-cp311-cp311-manylinux1_x86_64.whl.metadata (25 kB)
Collecting torchaudio<2.1.2,>=2.0.0 (from audiocraft==1.4.0a2)
  Using cached torchaudio-2.1.1-cp311-cp311-manylinux1_x86_64.whl.metadata (6.4 kB)
Collecting torchvision==0.16.0 (from audiocraft==1.4.0a2)
  Using cached torchvision-0.16.0-cp311-cp311-manylinux1_x86_64.whl.metadata (6.6 kB)
Collecting nvi

[0m

Name: audiocraft
Version: 1.4.0a2
Summary: Audio generation research library for PyTorch
Home-page: https://github.com/facebookresearch/audiocraft
Author: FAIR Speech & Audio
Author-email: defossez@meta.com, jadecopet@meta.com
License: MIT License
Location: /usr/local/lib/python3.11/dist-packages
Editable project location: /workspace/audiocraft
Requires: av, demucs, einops, encodec, flashy, gradio, huggingface_hub, hydra-core, hydra_colorlog, julius, librosa, num2words, numpy, pesq, protobuf, pystoi, sentencepiece, soundfile, spacy, torch, torchaudio, torchdiffeq, torchmetrics, torchtext, torchvision, tqdm, transformers, xformers
Required-by: 


CompletedProcess(args=['/usr/bin/python', '-m', 'pip', 'show', 'audiocraft'], returncode=0)

In [3]:
import sys
sys.path.insert(0, "/workspace/audiocraft")
import audiocraft
print("audiocraft import: OK", audiocraft.__file__)

audiocraft import: OK /workspace/audiocraft/audiocraft/__init__.py


## Data download & prep (moved)
Continue with `01b_fma_small_mini_downloader.ipynb` for steps 6–13 (download, segmentation, manifests, config).
