In [6]:
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
from IPython.display import Audio, display
import scipy.signal as sps

In [7]:
sr = 22050
n_fft = 1024
hop_length = 256

filename = 'audio_a/Bloop.wav'
y, sr = librosa.load(filename, sr = sr)

In [9]:
def normalize(x):
    return (x - np.min(x)) / (np.max(x) - np.min(x) + 1e-8)

def expand(mask, hop_length, length):
    m = np.repeat(mask, hop_length)
    return m[:length]

In [12]:
centroid = librosa.feature.spectral_centroid(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length)[0]
centroid_n = normalize(centroid)
mask = expand(centroid_n > np.median(centroid_n), hop_length, len(y))

b_low, a_low = sps.butter(4, 800/(sr/2), btype='low')
b_high, a_high = sps.butter(4, 2000/(sr/2), btype='high')

centroid_n = normalize(centroid)
mask = expand(centroid_n > np.median(centroid_n), hop_length, len(y))

b_low, a_low = sps.butter(4, 800 / (sr / 2), btype='low')
b_high, a_high = sps.butter(4, 2000 / (sr / 2), btype='high')

y1 = y.copy()
chunk = int(0.2 * sr)

for i in range(0, len(y), chunk):
    seg = y[i:i + chunk]
    if np.mean(mask[i:i + chunk]) > 0.5:
        y1[i:i + chunk] = sps.lfilter(b_high, a_high, seg)
    else:
        y1[i:i + chunk] = sps.lfilter(b_low, a_low, seg)

y1 /= np.max(np.abs(y1)) + 1e-8
display(Audio(y1, rate=sr))



In [24]:
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95, n_fft=n_fft, hop_length=hop_length)[0]

roll_n = normalize(rolloff)
mask3 = expand(roll_n < 0.45, hop_length, len(y))

b_lp, a_lp = sps.butter(10, 200/(sr/2), btype='low')

y3 = y.copy()
chunk = int(0.05*sr)

for i in range(0, len(y), chunk):
    if np.mean(mask3[i:i+chunk]) > 0.5:
        y3[i:i+chunk] = sps.lfilter(b_lp, a_lp, y[i:i+chunk])

y3 /= np.max(np.abs(y3)) + 1e-8
display(Audio(y3, rate=sr))

In [25]:
contrast = librosa.feature.spectral_contrast(y=y, sr=sr, n_fft=n_fft, hop_length=hop_length)
contrast_mean = np.mean(contrast, axis=0)
contrast_n = normalize(contrast_mean)
mask5 = expand(contrast_n > 0.5, hop_length, len(y))

b_hp, a_hp = sps.butter(4, 3000/(sr/2), btype='high')

y5 = y.copy()
chunk = int(0.2*sr)

for i in range(0, len(y), chunk):
    if np.mean(mask5[i:i+chunk]) > 0.5:
        y5[i:i+chunk] = sps.lfilter(b_hp, a_hp, y[i:i+chunk])

y5 /= np.max(np.abs(y5)) + 1e-8
display(Audio(y5, rate=sr))