# Bass Transcriber
This is a program that:
- split an audio (music) file into instrument- or part-based files
- transcribe the bass into sheet music
to aid an easier, smoother access to transcription for practice purposes.

Big thank you to [demucs](https://github.com/facebookresearch/demucs) and [Spleeter](https://github.com/deezer/spleeter) for the amazing open-source source separation library!!
I have also looked into [this bass-boosting program](https://github.com/paarthmadan/bass-boost/blob/master/index.py) to learn about pydub.

By enhancing the bass before splitting the audio, the program solves the issue that the bass sound gets lost and is wabbly/weak in the generated output in the original spleeter sample.

In [None]:
import io
from pathlib import Path
import subprocess as sp
import sys
from typing import Dict, Tuple, Optional, IO
import os
import numpy as np
import librosa
import soundfile as sf
from pydub import AudioSegment
from IPython.display import Audio
import math
import select  # Add this import

# Install dependencies
!pip install demucs

In [None]:
def bass_line_freq(samples):
    est_mean = np.mean(samples)
    est_std = 3 * np.std(samples) / (math.sqrt(2))
    bass_factor = int(round((est_std - est_mean) * 0.005))
    return max(bass_factor, 60)  # Ensure minimum of 60 Hz

def bass_line_freq(samples):
    est_mean = np.mean(samples)
    est_std = 3 * np.std(samples) / (math.sqrt(2))
    bass_factor = int(round((est_std - est_mean) * 0.005))
    return max(bass_factor, 60)  # Ensure minimum of 60 Hz

def copy_process_streams(process: sp.Popen):
    def raw(stream: Optional[IO[bytes]]) -> IO[bytes]:
        assert stream is not None
        if isinstance(stream, io.BufferedIOBase):
            stream = stream.raw
        return stream

    p_stdout, p_stderr = raw(process.stdout), raw(process.stderr)
    stream_by_fd: Dict[int, Tuple[IO[bytes], IO[str]]] = {
        p_stdout.fileno(): (p_stdout, sys.stdout),
        p_stderr.fileno(): (p_stderr, sys.stderr),
    }
    fds = list(stream_by_fd.keys())

    while fds:
        ready, _, _ = select.select(fds, [], [])
        for fd in ready:
            p_stream, std = stream_by_fd[fd]
            raw_buf = p_stream.read(2 ** 16)
            if not raw_buf:
                fds.remove(fd)
                continue
            buf = raw_buf.decode()
            std.write(buf)
            std.flush()

def separate(input_file, output_dir):
    cmd = ["python", "-m", "demucs.separate", "-o", str(output_dir), "-n", model, str(input_file)]
    if mp3:
        cmd += ["--mp3", f"--mp3-bitrate={mp3_rate}"]
    if two_stems is not None:
        cmd += [f"--two-stems={two_stems}"]
    
    print("Separating with command:", " ".join(cmd))
    p = sp.Popen(cmd, stdout=sp.PIPE, stderr=sp.PIPE)
    copy_process_streams(p)
    p.wait()
    if p.returncode != 0:
        print("Command failed, something went wrong.")

In [None]:
# Upload file
print("Please upload an audio file:")
uploaded = files.upload()

if not uploaded:
    raise ValueError("No file was uploaded.")

input_file = list(uploaded.keys())[0]
input_path = f'/content/{input_file}'

print(f"Processing file: {input_file}")

In [None]:
# Configuration
output_dir = 'output/'
model = "htdemucs"
extensions = ["mp3", "wav", "ogg", "flac"]
two_stems = None  # Set to "vocals" if you want to separate vocals from accompaniment
mp3 = True
mp3_rate = 320
bass_boost = 6  # dB of bass boost

In [None]:
# Upload file
print("Please upload an audio file:")
from google.colab import files
uploaded = files.upload()

if not uploaded:
    raise ValueError("No file was uploaded.")

input_file = list(uploaded.keys())[0]
input_path = f'/content/{input_file}'

print(f"Processing file: {input_file}")

# Load the audio file
audio = AudioSegment.from_file(input_path)
samples = np.array(audio.get_array_of_samples())
y, sr = librosa.load(input_path)

# Calculate adaptive bass frequency
bass_freq = bass_line_freq(samples)
print(f"Detected bass frequency: {bass_freq} Hz")

# Compute the Short-time Fourier Transform (STFT)
D = librosa.stft(y)

# Convert to magnitude and phase
S, phase = librosa.magphase(D)

# Define the frequency range for bass (detected_freq to 250 Hz)
freq_range = librosa.fft_frequencies(sr=sr)
bass_range = (freq_range >= bass_freq) & (freq_range <= 250)

# Boost the bass frequencies
S[bass_range] *= 10 ** (bass_boost / 20)  # Convert dB to magnitude

# Reconstruct the signal
y_boosted = librosa.istft(S * phase)

# Save the bass-boosted audio
boosted_file = 'bass_boosted_' + input_file.replace(" ", "_").replace("(", "_").replace(")", "_")
boosted_path = f'/content/{boosted_file}'
sf.write(boosted_path, y_boosted, sr)

print("Original audio:")
display(Audio(input_path))

print("Bass-boosted audio:")
display(Audio(boosted_path))

# Separate audio using Demucs
print("Separating bass-boosted audio...")
separate(boosted_path, output_dir)

# Display results
print("Separated audio files:")
model_output_dir = os.path.join(output_dir, model, os.path.splitext(boosted_file)[0])
if os.path.exists(model_output_dir):
    for file in os.listdir(model_output_dir):
        if file.endswith(tuple(extensions)):
            print(f"- {file}")
            display(Audio(os.path.join(model_output_dir, file)))
    
    # Offer to download the separated files
    print("\nDo you want to download the separated files? (yes/no)")
    if input().lower().startswith('y'):
        for file in os.listdir(model_output_dir):
            if file.endswith(tuple(extensions)):
                files.download(os.path.join(model_output_dir, file))
else:
    print("No output files found. Separation may have failed.")