In [None]:
import os
from spleeter.separator import Separator
from pydub import AudioSegment
import numpy as np
import warnings
warnings.filterwarnings('ignore')
import shutil

# GLOBAL VARIABLES
segment_duration = 30 * 1000
cwd = os.getcwd()
artists = ['TaylorSwift', 'ArianaGrande', 'SZA']
dir_base_for_database = './voice_conversion/src'
database_path = dir_base_for_database + "/database"
os.makedirs(database_path, exist_ok=True)
input = './raw_audio'
vocal = './vocal_separation'
os.makedirs(vocal, exist_ok=True)

def prepare_data_for_voice_conversion(database_path, artists, vocal_directory=vocal, 
                                        num_songs=2, segment_duration=segment_duration):
    """
    For every vocal piece, divide into 30-second portions for effective training
    Move vocal pieces into vocal_conversion model directory, classified into corresponding artists

    Parameters
    ----------
    database_path: string
        the directory where we store our 30-second audio pieces 
        for voice conversion models, categorized by artists
    artists: list[string]
        list of artists involved in the model
    vocal_directory: string
        the directory where all vocal separated pieces are stored
    num_songs: int
        number of songs per artist to train the model on 
        (for the sake of experimentally early training the model)
    segment_duration: int
        standard length of each audio piece fed into voice_conversion models
    
    Returns
    --------
    None
    """
    for i, artist in enumerate(artists):
        speaker = database_path + f"/spkr_{i + 1}"
        # make an artist directory under database directory for voice_conversion models
        os.makedirs(speaker, exist_ok=True)
        artist_vocal = os.path.join(vocal_directory, artist, 'vocals')
        # constrain number of songs trained on
        songs = 0
        for f in os.listdir(artist_vocal):
            songs += 1
            if songs > num_songs:
                break
            # divide into smaller data chunks, standardize data length
            song = AudioSegment.from_file(os.path.join(artist_vocal, f), format="wav")
            segments = [song[i:i + segment_duration] for i in range(0, len(song), segment_duration)]
            for i, segment in enumerate(segments):
                if len(segment) != segment_duration:
                    continue
                # export into wav files to correct artist directory under voice_conversion
                segment.export(os.path.join(speaker, f[:-4] + str(i) + '.wav'), format="wav")

def source_separation(artists, input_directory=input, output_directory=vocal):
    """"
    Takes some artists' audio pieces under an input directory
    Performs some source separation to get music and vocal out
    Move all vocal audio pieces to a different output directory, classifed into corresponding artists

    Parameters
    ----------
    artists: list[string]
        List of targeted artists involved in the model. 
        The names of artist will be included in audio pieces' name as well to ensure good labeling
    
    input_directory: string
        Directory where all raw songs are stored

    output_directory: string
        Directory where all vocal separated audio pieces are stored
    
    Returns
    -------
    None
    """
    # Initialize the Spleeter separator
    separator = Separator('spleeter:2stems')

    # Iterate through each artist
    for artist in artists:
        artist_input_directory = os.path.join(input_directory, artist)
        artist_output_directory = os.path.join(output_directory, artist)

        # Create the output directory for the current artist if it doesn't exist
        os.makedirs(artist_output_directory, exist_ok=True)
        
        artist_vocals_directory = os.path.join(artist_output_directory, 'vocals')
        artist_music_directory = os.path.join(artist_output_directory, 'music')

        # Create directories for vocals and music
        os.makedirs(artist_vocals_directory, exist_ok=True)
        os.makedirs(artist_music_directory, exist_ok=True)

        # Iterate through each WAV file in the artist's directory
        for filename in os.listdir(artist_input_directory):
            if filename.endswith('.wav'):
                input_file = os.path.join(artist_input_directory, filename)

                # Use Spleeter to separate vocals and music and save the results to a temporary directory
                separator.separate_to_file(input_file, artist_output_directory)
                print(f'Separated vocals and music for {artist} from {filename}')
                
                vocal = artist_output_directory + '/' + filename[:-4] + '/vocals.wav'
                music = artist_output_directory + '/' + filename[:-4] + '/accompaniment.wav'
                shutil.move(vocal, os.path.join(artist_vocals_directory, filename))
                shutil.move(music, os.path.join(artist_music_directory, filename))
        
                print('File move completed.')

    print('Separation completed.')

source_separation(artists)
# prepare_data_for_voice_conversion(database_path, artists)

  from pandas.core.computation.check import NUMEXPR_INSTALLED


INFO:tensorflow:Using config: {'_model_dir': 'pretrained_models/2stems', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': gpu_options {
  per_process_gpu_memory_fraction: 0.7
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_checkpoint_save_graph_def': True, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}
Instructions for updating:
Use output_signature instead
Instructions for updating:
Use output_signature instead
INFO:tensorflow:Calling model_fn.
INFO:tensorflo