In [None]:
# Librosa is our low-level audio processor
!pip install librosa

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

import librosa
import librosa.display

import soundfile as sf # python's audio processor

np.set_printoptions(suppress=True)

In [2]:
# BPM and Key information of the tracks
track_dict = {'Billy Kenny & Huxley - Sweat': {'BPM': 125, 'Key': 'F major'},
              "Camelphat - Drop it (Mason Maynard Remix)": {'BPM': 125, 'Key': 'F# major'},
              'ANOTR - Help (Extended Mix)': {'BPM': 124, 'Key': 'F# major'},
              'Benihana – Quiero (Original Mix)': {'BPM': 125, 'Key': 'A# major'},
              'Dennis Cruz - El Sueño (feat Martina Camargo)': {'BPM': 124, 'Key': 'B minor'},
              'Dopamine Machine - Club Mix': {'BPM': 124, 'Key': 'D# minor'}
}

track_list = list(track_dict.keys())

# extracted chorus start times (erroneous)
chorus_secs = [257.91957462503075, 316.3291764290701, 324.42883654218963, 194.15972058622108, 345.998458556607, 215.5939096330522]

In [4]:
i = 0 # index of each track

track_name = track_list[i]
chorus_start_sec = chorus_secs[i]

N_bars = 16 # chorus length in bars

BPM = track_dict[track_name]['BPM']
key, typ = track_dict[track_name]['Key'].split(' ')
#scale = scales[key][typ]


# buraları istersen konuşuruz basit aritmetik
beat_length = 60 / BPM
bar_length = 4 * beat_length 
chorus_length = N_bars * bar_length # sec

input_path = os.path.join("data","audio_clips",track_name+".mp3")

fs = 44100
track, sr = librosa.load(input_path, sr=fs) # load the track

#Beat Tracking
beat_proc = RNNBeatProcessor()
tracking_proc = BeatTrackingProcessor(fps=100)

activations = beat_proc(input_path)
beat_positions = tracking_proc(activations)



**Align the chorus with the external beatgrid**

**Deal with pirated adio later**

In [6]:
def align_grid_to_bar(chorus_start_sec, beat_positions, beat_length):
    """
    Aligns the chorus to the nearest bar assuming the track contains only 4 beat long sections.
        
        Parameters:
        -----------
            
            chorus_start_sec (float): start second of the chorus given by pychorus
            beat_positions (array, float): array of beat positions in seconds
            beat_length (float): length of a beat in seconds
            
        Returns:
        --------
        
           idx (int): index of the nearest bar in the track
    """
    
    # Find the closest beats
    indices = np.where(np.abs(chorus_start_sec-beat_positions)<beat_length)[0]
    
    if len(indices) > 2:
        print("Too many beats returned!")
        
    # Choose the even beat
    if not indices[0]   % 2:
        idx = indices[0]
    elif not indices[1]  % 2:
        idx = indices[1]
        
    # if the even idx is not a multiple of 4, make it, by moving to the next bar beginning    
    if idx % 4:
        idx += 2
     
    return idx

def get_aligned_chorus_beats(bar_idx, N_bars, beat_positions):
    """
    Gets N_bars worth of beats from the aligned chorus.
    
    Parameters:
    -----------
    
        bar_idx (int): idx of the start beat in the beat grid for the chorus
        N_bars (int): number of bars to return 
        beat_positions (array, float): array of beat positions in seconds
    
    Returns:
    --------
        
        chorus_beat_positions (array): array of beat positions in time (seconds)
        
    """
    
    chorus_beat_positions = beat_positions[bar_idx:bar_idx+N_bars*4+1]
    
    return chorus_beat_positions

In [None]:
bar_idx = align_grid_to_bar(chorus_start_sec, beat_positions, beat_length)

chorus_beat_positions = get_aligned_chorus_beats(bar_idx, N_bars, beat_positions)

# find bar beginnings for plotting
bar_times = [val for idx,val in enumerate(chorus_beat_positions) if not idx%4] 

# convert time boundaries to indices and align the chorus
chorus_aligned = track[int(chorus_beat_positions[0]*fs):int(chorus_beat_positions[-1]*fs)]

**Plot the Aligned Chorus Spectrogram and Waveform**

In [None]:
# Compute the spectrogram
n_fft = 4096*8
win_length = 4096*2
hop_length = int(win_length/2) 

amplitude_spectrogram = np.abs(librosa.stft(chorus_aligned, n_fft=n_fft, win_length=win_length, hop_length=hop_length))

dB_spectrogram = librosa.amplitude_to_db(amplitude_spectrogram, np.max(amplitude_spectrogram))

#power_spectrogram = librosa.db_to_power(dB_spectrogram, ref=1.0)

# plot the waveform and the spectrogram
fig, ax = plt.subplots(figsize=(20,10), nrows=2, sharex=False)

img = librosa.display.specshow(dB_spectrogram, sr=fs, hop_length=hop_length, x_axis='time', y_axis='log', ax=ax[0])
ax[0].vlines(chorus_beat_positions-chorus_beat_positions[0], 0, 8192, alpha=0.8, color='w',linestyle='-')
ax[0].vlines(bar_times-bar_times[0], 0, 16384, alpha=0.8, color='g',linestyle='-')
ax[0].set_xlim([-0.2, chorus_length+0.2])
ax[0].xaxis.set_ticks(np.arange(0, chorus_length+0.2, 1))

librosa.display.waveplot(chorus_aligned, sr=fs, ax=ax[1])
ax[1].vlines(chorus_beat_positions-chorus_beat_positions[0], -0.9, 0.9, alpha=0.8, color='r',linestyle='-')
ax[1].vlines(bar_times-bar_times[0], -1.1, 1.1, alpha=0.8, color='g',linestyle='-')
ax[1].set_xlim([-0.2, chorus_length+0.2])
ax[1].xaxis.set_ticks(np.arange(0, chorus_length+0.2, 1))

plt.savefig('{}_waveform_spectrogram.png'.format(track_name))
plt.show()

# END OF CHORUS ALIGNMENT

fig, ax = plt.subplots(figsize=(20,8))

librosa.display.waveplot(chorus_aligned, sr=fs, ax=ax)
ax.vlines(chorus_beats_time-chorus_beats_time[0], -1, 1, alpha=0.8, color='w',linestyle='-')
ax.vlines(bar_times-bar_times[0], -1, 1, alpha=0.8, color='g',linestyle='-')
ax.set_xlim([-0.2, 32])

fig, ax = plt.subplots(figsize=(20,8))
img = librosa.display.specshow(dB_spectrogram, sr=fs, hop_length=hop_length, x_axis='time', y_axis='log', ax=ax)
#fig.colorbar(img, ax=ax[0], format="%+2.f dB")
ax.vlines(chorus_beats_time-chorus_beats_time[0], 0, 16384, alpha=0.8, color='w',linestyle='-')
ax.vlines(bar_times-bar_times[0], 0, 16384, alpha=0.8, color='g',linestyle='-')
ax.set_xlim([-0.2, 32])
#plt.savefig('Spec-Beats.png')

fig, ax = plt.subplots(figsize=(20,8), nrows=2, sharex=True)
img = librosa.display.specshow(dB_spectrogram, sr=fs, hop_length=hop_length, x_axis='time', y_axis='log', ax=ax[0])
#fig.colorbar(img, ax=ax[0], format="%+2.f dB")
ax[1].vlines(beats_time, 0, 1, alpha=0.5, color='b',linestyle='--', label='Beats')


**Separate to Stems**

In [None]:
separator = Separator('spleeter:4stems')

In [None]:
chorus_forced = np.expand_dims(chorus_forced,1) # required
prediction = separator.separate(chorus_forced, audio_descriptor='') # WHAT IS AUDIO DESCRIPTOR??????

bassline = prediction['bass']
bassline_mono = np.mean(bassline,axis=1) 

In [None]:
# Write out audio as 24bit PCM WAW
output_name += "_bassline"
output_path = os.path.join(output_dir, output_name+".wav")

sf.write(output_path, bassline, sr, subtype='PCM_24')

**Cut High Frequencies**

In [None]:
fc = max(scale['frequencies']['2']) # adaptive cutoff Hz

print("Highest frequency in Bass Frequency Region: {} Hz".format(fc))

wc = fc / (fs/2) # cutoff radians

lp = signal.firwin(5000, wc)

bassline_cut = signal.convolve(bassline_mono, lp)

In [None]:
# Write out audio as 24bit PCM WAV
output_name += "_LP"
output_path = os.path.join(output_dir, output_name+".wav")

sf.write(output_path, bassline_cut, sr, subtype='PCM_24')

In [None]:
n_fft = 4096*8
win_length = 4096*2
hop_length = int(win_length/2) 


amplitude_spectrogram = np.abs(librosa.stft(bassline_cut, n_fft=n_fft, win_length=win_length, hop_length=hop_length))

dB_spectrogram = librosa.amplitude_to_db(amplitude_spectrogram, np.max(amplitude_spectrogram))

power_spectrogram = librosa.db_to_power(dB_spectrogram, ref=1.0)

In [None]:
fig, ax = plt.subplots(figsize=(20,8))
img = librosa.display.specshow(dB_spectrogram, sr=fs, hop_length=hop_length, x_axis='time', y_axis='log', ax=ax)
fig.colorbar(img, ax=ax, format="%+2.f dB")