<a href="https://colab.research.google.com/github/youbkis/YoutubeAudioSeparator/blob/main/Youtube_Audio_Separator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Separate Vocal and Instrument Tracks from Youtube Videos


Based on [Hybrid Demucs](https://colab.research.google.com/drive/1dC9nVxk3V_VPjUADsnFu8EiT-xnU1tGH?usp=sharing) and [ytmp3-dl](https://github.com/poseidon-code/ytmp3-dl)


# Prerequisites

> Run these Cells in order to install dependencies and configure the AI model

In [None]:
!python3 -m pip install -U git+https://github.com/facebookresearch/demucs#egg=demucs
!python -m pip install -U yt-dlp

In [None]:
# Customize the following options!
model = "htdemucs"
extensions = ["mp3", "wav", "ogg", "flac"]  # we will look for all those file types.
two_stems = None   # only separate one stems from the rest, for instance
# two_stems = "vocals"

# Options for the separatedaudio audio.
mp3 = True
mp3_rate = 320
float32 = False  # separatedaudio as float 32 wavs, unsused if 'mp3' is True.
int24 = False    # separatedaudio as int24 wavs, unused if 'mp3' is True.
# You cannot set both `float32 = True` and `int24 = True` !!

In [None]:
import io
from pathlib import Path
import select
from shutil import rmtree,copy
import subprocess as sp
import sys
import os
from typing import Dict, Tuple, Optional, IO
import os
import platform
import shutil
import sys
from pathlib import Path
from typing import List, Tuple
import yt_dlp


# color codes
class color:
    ERROR = '\033[91m'
    ENDC = '\033[0m'


''' Set ffmpeg binary location (-f, --ffmpeg) '''
def get_ffmpeg_path():
    if shutil.which('ffmpeg') != None:
        return shutil.which('ffmpeg')
    
    # else if, use the ffmpeg binaries present with this project
    elif os.path.exists(f'{os.path.abspath(os.getcwd())}/ffmpeg'):
        if platform.system() == 'Windows':
            return f'{os.path.abspath(os.getcwd())}/ffmpeg/windows/ffmpeg.exe'
        elif platform.system() == 'Darwin':
            return f'{os.path.abspath(os.getcwd())}/ffmpeg/darwin/ffmpeg'
        elif platform.system() == 'Linux':
            return f'{os.path.abspath(os.getcwd())}/ffmpeg/linux/ffmpeg'

    # else, if using "ytmp3-dl-base" release version which does not contains ffmpeg binaries,
    # neither a ffmpeg binary location path is passed nor ffmpeg is installed
    else:
        print(f"{color.ERROR}ffmpeg NOT FOUND.{color.ENDC}")
        exit(0)

''' Downloading mp3 for every YouTube video URL passed during execution '''
def download(url,path):
    with yt_dlp.YoutubeDL(yt_dlp_options) as mp3:
        mp3.download([url])


status: List[str] = []
cli_options: List[Tuple[str, str]]
URLS: List[str]

yt_dlp_options = {
        # PERMANENT options
        'quiet': True,
        'format': 'bestaudio/best',
        'keepvideo': False,
        'postprocessors': [{
            'key': 'FFmpegExtractAudio',
            'preferredcodec': 'mp3',
            'preferredquality': '320'
        }],
'ffmpeg_location' : get_ffmpeg_path(),
        'outtmpl':'convertedmp3/%(title)s.webm',
        # OPTIONAL options
        'noplaylist': True,
        'noprogress': True,
}

def find_files(in_path):
    out = []
    for file in Path(in_path).iterdir():
        if file.suffix.lower().lstrip(".") in extensions:
            out.append(file)
    return out

def copy_process_streams(process: sp.Popen):
    def raw(stream: Optional[IO[bytes]]) -> IO[bytes]:
        assert stream is not None
        if isinstance(stream, io.BufferedIOBase):
            stream = stream.raw
        return stream

    p_stdout, p_stderr = raw(process.stdout), raw(process.stderr)
    stream_by_fd: Dict[int, Tuple[IO[bytes], io.StringIO, IO[str]]] = {
        p_stdout.fileno(): (p_stdout, sys.stdout),
        p_stderr.fileno(): (p_stderr, sys.stderr),
    }
    fds = list(stream_by_fd.keys())

    while fds:
        # `select` syscall will wait until one of the file descriptors has content.
        ready, _, _ = select.select(fds, [], [])
        for fd in ready:
            p_stream, std = stream_by_fd[fd]
            raw_buf = p_stream.read(2 ** 16)
            if not raw_buf:
                fds.remove(fd)
                continue
            buf = raw_buf.decode()
            std.write(buf)
            std.flush()

def separate(inp, outp):
    cmd = ["python3", "-m", "demucs.separate", "-o", str(outp), "-n", model]
    if mp3:
        cmd += ["--mp3", f"--mp3-bitrate={mp3_rate}"]
    if float32:
        cmd += ["--float32"]
    if int24:
        cmd += ["--int24"]
    if two_stems is not None:
        cmd += [f"--two-stems={two_stems}"]
    files = [str(f) for f in find_files(inp)]
    if not files:
        print(f"No valid audio files in {inp}")
        return
    print("Going to separate the files:")
    print('\n'.join(files))
    print("With command: ", " ".join(cmd))
    p = sp.Popen(cmd + files, stdout=sp.PIPE, stderr=sp.PIPE)
    copy_process_streams(p)
    p.wait()
    if p.returncode != 0:
        print("Command failed, something went wrong.")
        return False
    return True

def dl_mp3(link):
    clear_paths()
    try:
      download(link,"convertedmp3")
      
    except Exception as e:
      print("Error while downloading yt video", e)
      return None
    if len(os.listdir(os.getcwd()+"convertedmp3")) < 1:
       return None
    print("MP3 download successful")
    return "convertedmp3/"+os.listdir("convertedmp3")[0]

def download_mp3(link):
    mp3 = dl_mp3(link)
    if(mp3 is None):
      return
    os.startfile("convertedmp3")

def clear_paths():
    in_path = Path(os.getcwd()+'convertedmp3')

    if in_path.exists():
        rmtree(in_path)
    in_path.mkdir()
   

def separate_from_link(link,keep_original_mp3=False):
    out_path = Path('separatedaudio')
    if out_path.exists():
        rmtree(out_path)
    out_path.mkdir()

    out_path = Path('separatedaudio')
    in_path = Path('convertedmp3')
    clear_paths()
    success = False
    try:
      print(f"Downloading {link}")
      mp3_path = dl_mp3(link)
    except Exception as e:
      print("Halted download:", e)

    print("Download finished.\nSeparating Tracks:")
    success = separate(in_path, out_path)
    if(success):
      print("Separation finished.\nPacking separated tracks into zip file")
      if keep_original_mp3:
        copy(mp3_path, "separatedaudio")
      os.startfile(os.getcwd() + out_path)
    else:
      print("Separation failed")





# Converters

In [None]:
#@title Edit the youtube Link and run the cell
LINK = "https://www.youtube.com/watch?v=IL1vCQVZQMU"

In [None]:
#@title Download separated Audio Data
keep_original_mp3=True
separate_from_link(LINK,keep_original_mp3)

In [None]:
#@title Download to the full audio file without separation
download_mp3(LINK)