In [15]:
# Install dependencies
!pip install spleeter tensorflow numpy matplotlib



# To do. Need to figure out how exactly we use the .wav files and how the paths of them should be input to the train and validation csv files

In [17]:
# Install required Python packages
!pip install numpy pandas ffmpeg-python norbert typer httpx



In [1]:
# Import required libraries
import os
import zipfile
from google.colab import drive

In [2]:
# Mount Google Drive to access datasets
drive.mount('/content/drive')

# Unzip the MUSDB18 dataset
zip_path = '/content/drive/MyDrive/musdb18.zip'  # Path to your zip file
unzip_folder = '/content/musdb_unzipped'               # Where to unzip files

Mounted at /content/drive


In [3]:
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(unzip_folder)

print("Dataset unzipped to:", unzip_folder)

Dataset unzipped to: /content/musdb_unzipped


In [4]:
# Define paths for training and validation datasets
train_path = os.path.join(unzip_folder, "train")
validation_path = os.path.join(unzip_folder, "test")

# Limit dataset to a subset (quick training)
train_subset = os.listdir(train_path)[:2]  # Select 2 tracks for training
validation_subset = os.listdir(validation_path)[:1]  # Select 1 track for validation

# Print subset paths
print("Training subset:", train_subset)
print("Validation subset:", validation_subset)

Training subset: ['Night Panther - Fire.stem.mp4', 'Sweet Lights - You Let Me Down.stem.mp4']
Validation subset: ['Lyndsey Ollard - Catching Up.stem.mp4']


In [5]:
# Clone the Spleeter repository to modify for training
!git clone https://github.com/deezer/spleeter.git
%cd spleeter

Cloning into 'spleeter'...
remote: Enumerating objects: 2675, done.[K
remote: Counting objects: 100% (538/538), done.[K
remote: Compressing objects: 100% (110/110), done.[K
remote: Total 2675 (delta 463), reused 428 (delta 428), pack-reused 2137 (from 1)[K
Receiving objects: 100% (2675/2675), 9.38 MiB | 12.81 MiB/s, done.
Resolving deltas: 100% (1722/1722), done.
/content/spleeter


In [20]:
import json

In [19]:
!ls spleeter/model/functions

blstm.py  __init__.py  unet.py


In [25]:
# Path to unet.py file
unet_file = 'spleeter/model/functions/unet.py'

# Read the original file content
with open(unet_file, 'r') as file:
    content = file.read()

# Define sine activation function
sine_activation_code = """
import tensorflow as tf

# Define sine activation function
def sine_activation(x):
    return tf.sin(x)
"""

# Add sine activation definition at the top if not already present
if "def sine_activation" not in content:
    content = sine_activation_code + content

### Replace the get_conv_activation function manually. Otherwise try to get the
### below code to work

# # Replace the _get_conv_activation_layer function
# content = content.replace(
#     """
# def _get_conv_activation_layer(params: Dict) -> Any:
#     \"""
#     Parameters:
#         params (Dict):
#             Model parameters.

#     Returns:
#         Any:
#             Required Activation function.
#     \"""
#     conv_activation: str = str(params.get("conv_activation"))
#     if conv_activation == "ReLU":
#         return ReLU()
#     elif conv_activation == "ELU":
#         return ELU()
#     return LeakyReLU(0.2)
#     """,
#     """
# def _get_conv_activation_layer(params: Dict) -> Any:
#     # Always use sine activation
#     return sine_activation
#     """
# )

# # Replace the _get_deconv_activation_layer function
# content = content.replace(
#     """
# def _get_deconv_activation_layer(params: Dict) -> Any:
#     \"""
#     Parameters:
#         params (Dict):
#             Model parameters.

#     Returns:
#         Any:
#             Required Activation function.
#     \"""
#     deconv_activation: str = str(params.get("deconv_activation"))
#     if deconv_activation == "LeakyReLU":
#         return LeakyReLU(0.2)
#     elif deconv_activation == "ELU":
#         return ELU()
#     return ReLU()
#     """,
#     """
# def _get_deconv_activation_layer(params: Dict) -> Any:
#     # Always use sine activation
#     return sine_activation
#     """
# )

# # Write the updated content back to the file
# with open(unet_file, 'w') as file:
#     file.write(content)

# print("Successfully replaced all activation functions with sine activation in unet.py")



Successfully replaced all activation functions with sine activation in unet.py


In [109]:
import shutil

# Create directories for subset data
train_subset_path = os.path.join(unzip_folder, "train_subset")
validation_subset_path = os.path.join(unzip_folder, "validation_subset")
os.makedirs(train_subset_path, exist_ok=True)
os.makedirs(validation_subset_path, exist_ok=True)

# Copy the subset of files to the new directories
for track in os.listdir(train_path)[:5]:  # Limit to 5 training examples
    shutil.copy(os.path.join(train_path, track), os.path.join(train_subset_path, track))

for track in os.listdir(validation_path)[:3]:  # Limit to 3 validation examples
    shutil.copy(os.path.join(validation_path, track), os.path.join(validation_subset_path, track))

# Update the paths in the configuration
quick_config = {
    "train": {
        "path": train_subset_path,  # Updated to use subset
        "instrument_list": ["vocals", "accompaniment"]
    },
    "validation": {
        "path": validation_subset_path  # Updated to use subset
    },
    "batch_size": 1,  # Stochastic gradient descent with batch size = 1
    "epoch_count": 3,  # Only 3 epochs for quick training
    "optimizer": {
        "type": "sgd",  # Use SGD for optimization
        "learning_rate": 0.01
    },
    "stft": {
        "frame_length": 4096,
        "frame_step": 1024
    },
    "model": {
        "input_shape": [512, 2],
        "output_shape": [512, 2]
    }
}

# Save the updated configuration as a JSON file
config_path = "configs/quick_config.json"
os.makedirs(os.path.dirname(config_path), exist_ok=True)
with open(config_path, "w") as config_file:
    json.dump(quick_config, config_file)

print(f"Quick training configuration saved to {config_path}")



Quick training configuration saved to configs/quick_config.json


### Build the training CSV File: We need to build CSV files that align with the input of the model. This depends on the stems that we choose. This can be mix, vocals, and accompaniment, or mix, vocals, drums, bass, other.

### Create the rough draft of the training_subset.csv file

In [110]:
import pandas as pd
import os

# Paths to original CSV files
original_train_csv = "/content/spleeter/configs/musdb_train.csv"
original_validation_csv = "/content/spleeter/configs/musdb_validation.csv"

# Paths to subset directories
train_subset_path = "/content/musdb_unzipped/train_subset"
validation_subset_path = "/content/musdb_unzipped/validation_subset"

# Paths to subset CSV files
subset_train_csv = "/content/musdb_unzipped/train_subset/train_subset.csv"
subset_validation_csv = "/content/musdb_unzipped/validation_subset/validation_subset.csv"

# Ensure directories exist
os.makedirs(os.path.dirname(subset_train_csv), exist_ok=True)
os.makedirs(os.path.dirname(subset_validation_csv), exist_ok=True)

# Load original CSVs
train_df = pd.read_csv(original_train_csv)
validation_df = pd.read_csv(original_validation_csv)

# Add accompaniment_path if missing
def add_accompaniment_path(df):
    """Add accompaniment_path column if missing."""
    if "accompaniment_path" not in df.columns:
        df["accompaniment_path"] = df["mix_path"].apply(
            lambda x: x.replace("mix.wav", "accompaniment.wav")
        )
    return df

train_df = add_accompaniment_path(train_df)
validation_df = add_accompaniment_path(validation_df)

# Helper function to filter CSV by matching folder names in subset directory
def filter_csv_by_subset(original_df, subset_path, dataset_root, subset_name):
    """Filter rows in the CSV by matching folder names in the subset directory."""
    # Extract folder names from subset (e.g., "Alexander Ross - Velvet Curtain.stem.mp4")
    subset_folders = {
        os.path.splitext(f)[0].replace(".stem", "") for f in os.listdir(subset_path) if f.endswith(".mp4")
    }
    print(f"{subset_name} subset folders: {subset_folders}")

    # Extract folder name and ensure matching
    filtered_df = original_df[
        original_df["mix_path"].apply(
            lambda x: os.path.basename(os.path.dirname(x)) in subset_folders
        )
    ]
    print(f"{subset_name} subset matches found: {len(filtered_df)}")

    # Adjust paths to replace 'train' with 'train_subset'
    for col in ["mix_path", "vocals_path", "accompaniment_path"]:
        filtered_df[col] = filtered_df[col].apply(lambda x: x.replace("train/", "train_subset/"))

    return filtered_df

# Filter the train and validation CSVs based on subset directories
dataset_root = "/content/musdb_unzipped"  # Root directory for unzipped dataset
train_subset_df = filter_csv_by_subset(train_df, train_subset_path, dataset_root, "Training")
# validation_subset_df = filter_csv_by_subset(validation_df, validation_subset_path, dataset_root, "Validation")

# Save the filtered subset CSVs
train_subset_df.to_csv(subset_train_csv, index=False)
# validation_subset_df.to_csv(subset_validation_csv, index=False)

print(f"Training subset saved to {subset_train_csv} with {len(train_subset_df)} entries.")
# print(f"Validation subset saved to {subset_validation_csv} with {len(validation_subset_df)} entries.")



Training subset folders: {'Night Panther - Fire', "The Wrong'Uns - Rothko", 'Snowmine - Curfews', 'Alexander Ross - Velvet Curtain', 'Sweet Lights - You Let Me Down'}
Training subset matches found: 5
Training subset saved to /content/musdb_unzipped/train_subset/train_subset.csv with 5 entries.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df[col] = filtered_df[col].apply(lambda x: x.replace("train/", "train_subset/"))


### Edit the csv file so that the accompaniment is 3rd and the others are gone

In [56]:
import pandas as pd

# Path to the train subset CSV
subset_train_csv = "/content/musdb_unzipped/train_subset/train_subset.csv"

# Load the train subset CSV
train_subset_df = pd.read_csv(subset_train_csv)

# Keep only the necessary columns in the desired order
# Order: mix_path, vocals_path, accompaniment_path, duration
columns_to_keep = ["mix_path", "vocals_path", "accompaniment_path", "duration"]

# Add accompaniment_path if missing
if "accompaniment_path" not in train_subset_df.columns:
    train_subset_df["accompaniment_path"] = train_subset_df["mix_path"].apply(
        lambda x: x.replace("mixture.wav", "accompaniment.wav")
    )

# Reorder columns and drop unnecessary ones
train_subset_df = train_subset_df[columns_to_keep]

# Save the updated train subset CSV
train_subset_df.to_csv(subset_train_csv, index=False)

print(f"Updated training subset saved to {subset_train_csv} with {len(train_subset_df)} entries.")


Updated training subset saved to /content/musdb_unzipped/train_subset/train_subset.csv with 5 entries.


### Build the validation csv file

In [84]:
import pandas as pd
import os

# Paths
validation_subset_path = "/content/musdb_unzipped/validation_subset"
subset_validation_csv = "/content/musdb_unzipped/validation_subset/validation_subset.csv"

# Ensure directories exist
os.makedirs(os.path.dirname(subset_validation_csv), exist_ok=True)

# Placeholder function to estimate duration (e.g., 240 seconds as default)
def estimate_duration(file_path):
    """Estimate the duration of a file in seconds."""
    # You can use actual audio libraries (e.g., librosa) to calculate the duration if required
    return 240  # Placeholder value

# Generate validation subset entries
validation_data = []
for file in os.listdir(validation_subset_path):
    if file.endswith(".stem.mp4"):
        base_name = os.path.splitext(file)[0].replace(".stem", "")
        mix_path = os.path.join("validation_subset", file)
        vocals_path = os.path.join("validation_subset", f"{base_name}/vocals.wav")
        accompaniment_path = os.path.join("validation_subset", f"{base_name}/accompaniment.wav")
        duration = estimate_duration(mix_path)
        validation_data.append({
            "mix_path": mix_path,
            "vocals_path": vocals_path,
            "accompaniment_path": accompaniment_path,
            "duration": duration,
        })

# Save to validation_subset.csv
validation_subset_df = pd.DataFrame(validation_data)
validation_subset_df.to_csv(subset_validation_csv, index=False)

print(f"Validation subset created and saved to {subset_validation_csv} with {len(validation_subset_df)} entries.")


Validation subset created and saved to /content/musdb_unzipped/validation_subset/validation_subset.csv with 3 entries.


### Update the validation csv file to include the correct durations

In [85]:
import ffmpeg
import pandas as pd
import os

# Path to the validation subset CSV
subset_validation_csv = "/content/musdb_unzipped/validation_subset/validation_subset.csv"

# Base directory for unzipped dataset
dataset_root = "/content/musdb_unzipped"

# Function to compute duration using ffmpeg
def get_mp4_duration_ffmpeg(file_path):
    """Get the duration of an mp4 file in seconds using ffmpeg."""
    try:
        probe = ffmpeg.probe(file_path)
        duration = float(probe["format"]["duration"])
        return duration
    except Exception as e:
        print(f"Error processing {file_path}: {e}")
        return None

# Load the validation subset CSV
validation_subset_df = pd.read_csv(subset_validation_csv)

# Prepend the dataset root path to the mix_path
validation_subset_df["absolute_mix_path"] = validation_subset_df["mix_path"].apply(
    lambda x: os.path.join(dataset_root, x)
)

# Calculate and update the duration column
validation_subset_df["duration"] = validation_subset_df["absolute_mix_path"].apply(
    get_mp4_duration_ffmpeg
)

# Drop the temporary absolute_mix_path column
validation_subset_df.drop(columns=["absolute_mix_path"], inplace=True)

# Save the updated validation subset CSV
validation_subset_df.to_csv(subset_validation_csv, index=False)

print(f"Updated validation subset with durations saved to {subset_validation_csv}.")



Updated validation subset with durations saved to /content/musdb_unzipped/validation_subset/validation_subset.csv.


In [39]:
os.makedirs(model_dir, exist_ok=True)

### Separate the audio files into the wav components

In [89]:
!pip install musdb

Collecting musdb
  Downloading musdb-0.4.2-py2.py3-none-any.whl.metadata (10 kB)
Collecting stempeg>=0.2.3 (from musdb)
  Downloading stempeg-0.2.3-py3-none-any.whl.metadata (9.0 kB)
Collecting pyaml (from musdb)
  Downloading pyaml-24.9.0-py3-none-any.whl.metadata (11 kB)
Downloading musdb-0.4.2-py2.py3-none-any.whl (13 kB)
Downloading stempeg-0.2.3-py3-none-any.whl (963 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m963.5/963.5 kB[0m [31m20.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-24.9.0-py3-none-any.whl (24 kB)
Installing collected packages: pyaml, stempeg, musdb
Successfully installed musdb-0.4.2 pyaml-24.9.0 stempeg-0.2.3


In [90]:
import shutil
import os

# Paths to current subsets
train_subset_path = "/content/musdb_unzipped/train_subset"
validation_subset_path = "/content/musdb_unzipped/validation_subset"

# Paths to new "whole" directories
train_subset_whole_path = "/content/musdb_unzipped/train_subset_whole"
validation_subset_whole_path = "/content/musdb_unzipped/validation_subset_whole"

# Move the original directories
shutil.move(train_subset_path, train_subset_whole_path)
shutil.move(validation_subset_path, validation_subset_whole_path)

print(f"Moved train_subset to {train_subset_whole_path}")
print(f"Moved validation_subset to {validation_subset_whole_path}")


Moved train_subset to /content/musdb_unzipped/train_subset_whole
Moved validation_subset to /content/musdb_unzipped/validation_subset_whole


In [None]:
import os
import subprocess

# Paths
input_dir = "/content/musdb_unzipped/train_subset_whole"  # Input directory containing .stem.mp4 files
output_dir = "/content/musdb_unzipped/train_subset"       # Output directory for separated .wav files

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process each .stem.mp4 file
for file_name in os.listdir(input_dir):
    if file_name.endswith(".stem.mp4"):
        input_path = os.path.join(input_dir, file_name)
        track_name = os.path.splitext(file_name)[0]
        track_output_dir = os.path.join(output_dir, track_name)
        os.makedirs(track_output_dir, exist_ok=True)

        print(f"Processing {input_path}")
        # Run FFmpeg command for each stem
        for i, stem in enumerate(["mix", "drums", "bass", "other", "vocals"]):
            output_path = os.path.join(track_output_dir, f"{stem}.wav")
            ffmpeg_command = [
                "ffmpeg", "-i", input_path, "-map", f"0:{i}", output_path, "-y"
            ]
            subprocess.run(ffmpeg_command, check=True)



In [105]:
# Paths
input_dir = "/content/musdb_unzipped/validation_subset_whole"  # Input directory containing .stem.mp4 files
output_dir = "/content/musdb_unzipped/validation_subset"       # Output directory for separated .wav files

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process each .stem.mp4 file
for file_name in os.listdir(input_dir):
    if file_name.endswith(".stem.mp4"):
        input_path = os.path.join(input_dir, file_name)
        track_name = os.path.splitext(file_name)[0]
        track_output_dir = os.path.join(output_dir, track_name)
        os.makedirs(track_output_dir, exist_ok=True)

        print(f"Processing {input_path}")
        # Run FFmpeg command for each stem
        for i, stem in enumerate(["mix", "drums", "bass", "other", "vocals"]):
            output_path = os.path.join(track_output_dir, f"{stem}.wav")
            ffmpeg_command = [
                "ffmpeg", "-i", input_path, "-map", f"0:{i}", output_path, "-y"
            ]
            subprocess.run(ffmpeg_command, check=True)

Processing /content/musdb_unzipped/validation_subset_whole/Lyndsey Ollard - Catching Up.stem.mp4
Processing /content/musdb_unzipped/validation_subset_whole/The Long Wait - Dark Horses.stem.mp4
Processing /content/musdb_unzipped/validation_subset_whole/Carlos Gonzalez - A Place For Us.stem.mp4


In [107]:
import subprocess
import os
import json
from os.path import join

# Define paths for training
dataset_root = "/content/musdb_unzipped"
model_dir = "/content/spleeter_model"
cache_dir = "/content/spleeter_cache"
train_csv = join(dataset_root, "train_subset_whole/train_subset.csv")
validation_csv = join(dataset_root, "validation_subset_whole/validation_subset.csv")

# Define the complete training configuration
TRAIN_CONFIG = {
    "mix_name": "mix",
    "instrument_list": ["vocals", "accompaniment"],  # Updated for your use case
    "sample_rate": 44100,
    "frame_length": 4096,
    "frame_step": 1024,
    "T": 128,
    "F": 128,
    "model_dir": model_dir,
    "cache_dir": cache_dir,
    "train_csv": train_csv,
    "validation_csv": validation_csv,
    "train_path": join(dataset_root, "train_subset"),
    # "train_path": "train",
    "n_channels": 2,
    "chunk_duration": 4,
    "n_chunks_per_song": 1,
    "separation_exponent": 2,
    "mask_extension": "zeros",
    "learning_rate": 1e-4,
    "batch_size": 2,
    "train_max_steps": 10,  # Maximum number of training steps
    "throttle_secs": 20,  # How often summaries are written
    "save_checkpoints_steps": 100,  # Save checkpoints every 100 steps
    "save_summary_steps": 5,  # Save summaries every 5 steps
    "random_seed": 42,  # Fixed random seed for reproducibility
    "model": {
        "type": "unet.unet",
        "params": {
            "conv_activation": "sine",
            "deconv_activation": "sine",
        },
    },
}

# Save the training configuration to a file
config_path = "custom_train_config.json"
with open(config_path, "w") as config_file:
    json.dump(TRAIN_CONFIG, config_file)

# Run the training process using subprocess
try:
    result = subprocess.run(
    [
        "spleeter",
        "train",
        "-p", config_path,
        "-d", dataset_root,
        "--verbose"
    ],
    check=True,
    capture_output=True,
    text=True,
)
    print("Training output:")
    print(result.stdout)
except subprocess.CalledProcessError as e:
    print("Training failed with the following error:")
    print(e.stderr)


Training output:
INFO:tensorflow:Using config: {'_model_dir': '/content/spleeter_model', '_tf_random_seed': 42, '_save_summary_steps': 5, '_save_checkpoints_steps': 100, '_save_checkpoints_secs': None, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.45
}
, '_keep_checkpoint_max': 2, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 10, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
INFO:spleeter:Start model training
INFO:tensorflow:Not using Distribute Coordinator.
INFO:tensorflow:Running training and evaluation locally (non-di

# Below is for testing different features

In [71]:
ls -lh "/content/musdb_unzipped/validation_subset/Carlos Gonzalez - A Place For Us.stem.mp4"


-rw-r--r-- 1 root root 39M Nov 17 03:10 '/content/musdb_unzipped/validation_subset/Carlos Gonzalez - A Place For Us.stem.mp4'


In [32]:
!ls

audio_example_mono.mp3	conda	 images     paper.md	    README.md	    tests
audio_example.mp3	configs  LICENSE    poetry.lock     spleeter
CHANGELOG.md		docker	 paper.bib  pyproject.toml  spleeter.ipynb


In [98]:
mus = musdb.DB(root="/content/musdb_unzipped/")

In [104]:
import os
import subprocess

# Paths
input_dir = "/content/musdb_unzipped/train_subset_whole"  # Input directory containing .stem.mp4 files
output_dir = "/content/musdb_unzipped/train_subset"       # Output directory for separated .wav files

# Ensure the output directory exists
os.makedirs(output_dir, exist_ok=True)

# Process each .stem.mp4 file
for file_name in os.listdir(input_dir):
    if file_name.endswith(".stem.mp4"):
        input_path = os.path.join(input_dir, file_name)
        track_name = os.path.splitext(file_name)[0]
        track_output_dir = os.path.join(output_dir, track_name)
        os.makedirs(track_output_dir, exist_ok=True)

        print(f"Processing {input_path}")
        # Run FFmpeg command for each stem
        for i, stem in enumerate(["mix", "drums", "bass", "other", "vocals"]):
            output_path = os.path.join(track_output_dir, f"{stem}.wav")
            ffmpeg_command = [
                "ffmpeg", "-i", input_path, "-map", f"0:{i}", output_path, "-y"
            ]
            subprocess.run(ffmpeg_command, check=True)


Processing /content/musdb_unzipped/train_subset_whole/Night Panther - Fire.stem.mp4
Processing /content/musdb_unzipped/train_subset_whole/Sweet Lights - You Let Me Down.stem.mp4
Processing /content/musdb_unzipped/train_subset_whole/The Wrong'Uns - Rothko.stem.mp4
Processing /content/musdb_unzipped/train_subset_whole/Snowmine - Curfews.stem.mp4
Processing /content/musdb_unzipped/train_subset_whole/Alexander Ross - Velvet Curtain.stem.mp4
