In [None]:
!pip install pydub
!pip install librosa

Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [2]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import pydub
from scipy import signal
import librosa
import librosa.display

def get_samples(sound):
    channel_count = sound.channels
    if channel_count == 1:
        samples = np.array(sound.get_array_of_samples())
    elif channel_count == 2:
        sound_mono = sound.split_to_mono()
        samples = np.array(sound_mono[0].get_array_of_samples())
    else:
        raise Exception("More than 2 channels!")
    return samples

def plot_stft(t, f, dB, anchors=None):

    params = {'legend.fontsize': 'x-large','figure.figsize': (25, 6),
         'axes.labelsize': 'x-large','axes.titlesize':'x-large',
         'xtick.labelsize':'x-large','ytick.labelsize':'x-large'}
    plt.rcParams.update(params)

    fig = plt.figure()
    plt.pcolormesh(t, f, dB, cmap='YlOrRd', shading='auto')
    fig.suptitle('Short-time Fourier transform (window duration 50ms, hopsize 10 ms)')
    plt.xlabel('Time (s)')
    plt.ylabel('Frequency (kHz)')
    plt.colorbar(format='%+02.0f dB')
    plt.ylim([0, 5])
    if anchors is not None:
        anchors_t, anchors_f = map(list, zip(*anchors))
        plt.scatter(anchors_t, anchors_f, marker='x', color='black')
    plt.show()
    
def generate_anchors(t, f, dB):
    delta_t = 0.1
    f_num_bands = 25

    t_rate = len(t) / (t[-1] - t[0])
    delta_i = int(delta_t * t_rate)
    if delta_i == 0:
        delta_i = 1
    delta_j = int(len(f) / f_num_bands)

    anchors_t = []
    anchors_f = []
    anchors = []

    for i in range(0, len(t) - delta_i, delta_i):
        for j in range(0, len(f) - delta_j, delta_j):
            tmp = dB[j:j+delta_j, i:i+delta_i]
            ind = np.unravel_index(np.argmax(tmp, axis=None), tmp.shape)
            anchors_t.append(t[i + ind[1]])
            anchors_f.append(f[j + ind[0]])
            anchors.append((t[i + ind[1]], f[j + ind[0]]))
    
    return anchors

def generate_fingerprint(anchors):
    F = []
    t_offset = 0.1
    t_window = 0.5
    f_factors = (2**-0.5, 2**0.5)

    anchors.sort(key=lambda x: x[0])
    for t1, f1 in anchors:
        t_min = t1 + t_offset
        t_max = t1 + t_offset + t_window
        f_min = f1 * f_factors[0]
        f_max = f1 * f_factors[1]
        for t2, f2 in anchors:
            if t2 < t_min or t2 > t_max or f2 < f_min or f2 > f_max:
                continue
            h = (f1, f2, t2 - t1)
            F.append((t1, h))
    
    return F

def generate_matches(F1, F2):
    matches = []
    h_range = range(len(F1[0][1]))
    for t1, h1 in F1:
        for t2, h2 in F2:
            match = True
            for i in h_range:
                if h1[i] != h2[i]:
                    match = False
                    break
            if match:
                matches.append((t1, t2))
    return matches

def generate_num_of_matches(F1, F2):
    num_of_matches = 0
    h_range = range(len(F1[0][1]))
    for t1, h1 in F1:
        for t2, h2 in F2:
            match = True
            for i in h_range:
                if h1[i] != h2[i]:
                    match = False
                    break
            if match:
                num_of_matches = num_of_matches + 1
    return num_of_matches

def plot_matches(matches, self_compare=False):
    params = {'legend.fontsize': 'x-large','figure.figsize': (12, 12),
            'axes.labelsize': 'x-large','axes.titlesize':'x-large',
            'xtick.labelsize':'x-large','ytick.labelsize':'x-large'}
    plt.rcParams.update(params)
    fig = plt.figure()
    matches_t1, matches_t2 = map(list, zip(*matches))
    plt.scatter(matches_t1, matches_t2, marker='x', color='black')
    plt.xlabel('Time in query signal (s)')
    if self_compare:
        plt.ylabel('Time in query signal (s)')
    else:
        plt.ylabel('Time in document signal (s)')
    plt.show()

In [3]:
def get_stft(snd_file, type = 0):
    if type == 0: sound = pydub.AudioSegment.from_file(snd_file, format="wav") 
    if type == 1: sound = pydub.AudioSegment.from_file(snd_file, format="m4a") 
    samples = get_samples(sound)
    duration = sound.duration_seconds
    window_ms = 50.0
    hop_ms = 10.0
    nperseg = (window_ms / 1000.0) * sound.frame_rate
    noverlap = ((window_ms - hop_ms) / 1000.0) * sound.frame_rate
    f, t, Zxx  = signal.stft(samples, fs = sound.frame_rate, window = 'hann', nperseg = nperseg, noverlap = noverlap, nfft = 50*4/1000*sound.frame_rate)
    f /= 1000.0
    dB = 20 * np.log10(np.abs(Zxx / abs(Zxx).max()))
    return t, f, dB

In [4]:
def get_fingerprint(snd_file, type = 0):
    t, f, dB = get_stft(snd_file, type)
    anchors = generate_anchors(t, f, dB)
    Fp = generate_fingerprint(anchors)
    return Fp

In [5]:
import pickle

In [None]:
for j in range(30):
    file = 'kiki' + str(j + 21) + '.pckl'
    f = open(file, 'wb')
    F_kiki_tmp_arr = []
    ran_min = (j + 20) * 10 + 1
    ran_max = (j + 21) * 10 + 1
    for i in range(ran_min, ran_max):
        index = str(i).zfill(4)
        kiki_file = "kiki_" + index + ".wav"
        F_kiki = get_fingerprint(kiki_file, 0)
        print("fingerprint generated: kiki_" + index)
        F_kiki_tmp_arr.append(F_kiki)
    pickle.dump(F_kiki_tmp_arr, f)
    f.close()

fingerprint generated: kiki_0201
fingerprint generated: kiki_0202
fingerprint generated: kiki_0203
fingerprint generated: kiki_0204
fingerprint generated: kiki_0205
fingerprint generated: kiki_0206
fingerprint generated: kiki_0207
fingerprint generated: kiki_0208
fingerprint generated: kiki_0209
fingerprint generated: kiki_0210
fingerprint generated: kiki_0211
fingerprint generated: kiki_0212
fingerprint generated: kiki_0213
fingerprint generated: kiki_0214
fingerprint generated: kiki_0215
fingerprint generated: kiki_0216
fingerprint generated: kiki_0217
fingerprint generated: kiki_0218
fingerprint generated: kiki_0219
fingerprint generated: kiki_0220
fingerprint generated: kiki_0221
fingerprint generated: kiki_0222
fingerprint generated: kiki_0223
fingerprint generated: kiki_0224
fingerprint generated: kiki_0225
fingerprint generated: kiki_0226
fingerprint generated: kiki_0227
fingerprint generated: kiki_0228
fingerprint generated: kiki_0229
fingerprint generated: kiki_0230
fingerprin

In [None]:
!zip -r 'NewFileName1.zip' 'kiki21.pckl'
!zip -r 'NewFileName1.zip' 'kiki22.pckl'
!zip -r 'NewFileName1.zip' 'kiki23.pckl'
!zip -r 'NewFileName1.zip' 'kiki24.pckl'
!zip -r 'NewFileName1.zip' 'kiki25.pckl'
!zip -r 'NewFileName1.zip' 'kiki26.pckl'
!zip -r 'NewFileName1.zip' 'kiki27.pckl'
!zip -r 'NewFileName1.zip' 'kiki28.pckl'
!zip -r 'NewFileName1.zip' 'kiki29.pckl'
!zip -r 'NewFileName1.zip' 'kiki30.pckl'
!zip -r 'NewFileName1.zip' 'kiki31.pckl'
!zip -r 'NewFileName1.zip' 'kiki32.pckl'
!zip -r 'NewFileName1.zip' 'kiki33.pckl'
!zip -r 'NewFileName1.zip' 'kiki34.pckl'
!zip -r 'NewFileName1.zip' 'kiki35.pckl'
!zip -r 'NewFileName1.zip' 'kiki36.pckl'
!zip -r 'NewFileName1.zip' 'kiki37.pckl'
!zip -r 'NewFileName1.zip' 'kiki38.pckl'
!zip -r 'NewFileName1.zip' 'kiki39.pckl'
!zip -r 'NewFileName1.zip' 'kiki40.pckl'
!zip -r 'NewFileName1.zip' 'kiki41.pckl'
!zip -r 'NewFileName1.zip' 'kiki42.pckl'
!zip -r 'NewFileName1.zip' 'kiki43.pckl'
!zip -r 'NewFileName1.zip' 'kiki44.pckl'
!zip -r 'NewFileName1.zip' 'kiki45.pckl'
!zip -r 'NewFileName1.zip' 'kiki46.pckl'
!zip -r 'NewFileName1.zip' 'kiki47.pckl'
!zip -r 'NewFileName1.zip' 'kiki48.pckl'
!zip -r 'NewFileName1.zip' 'kiki49.pckl'
!zip -r 'NewFileName1.zip' 'kiki50.pckl'

  adding: kiki21.pckl (deflated 75%)
  adding: kiki22.pckl (deflated 75%)
  adding: kiki23.pckl (deflated 75%)
  adding: kiki24.pckl (deflated 75%)
  adding: kiki25.pckl (deflated 75%)
  adding: kiki26.pckl (deflated 75%)
  adding: kiki27.pckl (deflated 75%)
  adding: kiki28.pckl (deflated 75%)
  adding: kiki29.pckl (deflated 75%)
  adding: kiki30.pckl (deflated 75%)
  adding: kiki31.pckl (deflated 75%)
  adding: kiki32.pckl (deflated 75%)
  adding: kiki33.pckl (deflated 75%)
  adding: kiki34.pckl (deflated 75%)
  adding: kiki35.pckl (deflated 75%)
  adding: kiki36.pckl (deflated 75%)
  adding: kiki37.pckl (deflated 75%)
  adding: kiki38.pckl (deflated 75%)
  adding: kiki39.pckl (deflated 75%)
  adding: kiki40.pckl (deflated 75%)
  adding: kiki41.pckl (deflated 75%)
  adding: kiki42.pckl (deflated 75%)
  adding: kiki43.pckl (deflated 75%)
  adding: kiki44.pckl (deflated 75%)
  adding: kiki45.pckl (deflated 75%)
  adding: kiki46.pckl (deflated 75%)
  adding: kiki47.pckl (deflated 75%)
 

In [6]:
for j in range(30):
    file = 'bouba' + str(j + 21) + '.pckl'
    f = open(file, 'wb')
    F_bouba_tmp_arr = []
    ran_min = (j + 20) * 10 + 1
    ran_max = (j + 21) * 10 + 1
    for i in range(ran_min, ran_max):
        index = str(i).zfill(4)
        bouba_file = "bouba_" + index + ".wav"
        F_bouba = get_fingerprint(bouba_file, 0)
        print("fingerprint generated: bouba_" + index)
        F_bouba_tmp_arr.append(F_bouba)
    pickle.dump(F_bouba_tmp_arr, f)
    f.close()

fingerprint generated: bouba_0201
fingerprint generated: bouba_0202
fingerprint generated: bouba_0203
fingerprint generated: bouba_0204
fingerprint generated: bouba_0205
fingerprint generated: bouba_0206
fingerprint generated: bouba_0207
fingerprint generated: bouba_0208
fingerprint generated: bouba_0209
fingerprint generated: bouba_0210
fingerprint generated: bouba_0211
fingerprint generated: bouba_0212
fingerprint generated: bouba_0213
fingerprint generated: bouba_0214
fingerprint generated: bouba_0215
fingerprint generated: bouba_0216
fingerprint generated: bouba_0217
fingerprint generated: bouba_0218
fingerprint generated: bouba_0219
fingerprint generated: bouba_0220
fingerprint generated: bouba_0221
fingerprint generated: bouba_0222
fingerprint generated: bouba_0223
fingerprint generated: bouba_0224
fingerprint generated: bouba_0225
fingerprint generated: bouba_0226
fingerprint generated: bouba_0227
fingerprint generated: bouba_0228
fingerprint generated: bouba_0229
fingerprint ge

In [7]:
!zip -r 'NewFileName1.zip' 'bouba21.pckl'
!zip -r 'NewFileName1.zip' 'bouba22.pckl'
!zip -r 'NewFileName1.zip' 'bouba23.pckl'
!zip -r 'NewFileName1.zip' 'bouba24.pckl'
!zip -r 'NewFileName1.zip' 'bouba25.pckl'
!zip -r 'NewFileName1.zip' 'bouba26.pckl'
!zip -r 'NewFileName1.zip' 'bouba27.pckl'
!zip -r 'NewFileName1.zip' 'bouba28.pckl'
!zip -r 'NewFileName1.zip' 'bouba29.pckl'
!zip -r 'NewFileName1.zip' 'bouba30.pckl'
!zip -r 'NewFileName1.zip' 'bouba31.pckl'
!zip -r 'NewFileName1.zip' 'bouba32.pckl'
!zip -r 'NewFileName1.zip' 'bouba33.pckl'
!zip -r 'NewFileName1.zip' 'bouba34.pckl'
!zip -r 'NewFileName1.zip' 'bouba35.pckl'
!zip -r 'NewFileName1.zip' 'bouba36.pckl'
!zip -r 'NewFileName1.zip' 'bouba37.pckl'
!zip -r 'NewFileName1.zip' 'bouba38.pckl'
!zip -r 'NewFileName1.zip' 'bouba39.pckl'
!zip -r 'NewFileName1.zip' 'bouba40.pckl'
!zip -r 'NewFileName1.zip' 'bouba41.pckl'
!zip -r 'NewFileName1.zip' 'bouba42.pckl'
!zip -r 'NewFileName1.zip' 'bouba43.pckl'
!zip -r 'NewFileName1.zip' 'bouba44.pckl'
!zip -r 'NewFileName1.zip' 'bouba45.pckl'
!zip -r 'NewFileName1.zip' 'bouba46.pckl'
!zip -r 'NewFileName1.zip' 'bouba47.pckl'
!zip -r 'NewFileName1.zip' 'bouba48.pckl'
!zip -r 'NewFileName1.zip' 'bouba49.pckl'
!zip -r 'NewFileName1.zip' 'bouba50.pckl'

  adding: bouba21.pckl (deflated 74%)
  adding: bouba22.pckl (deflated 74%)
  adding: bouba23.pckl (deflated 74%)
  adding: bouba24.pckl (deflated 74%)
  adding: bouba25.pckl (deflated 74%)
  adding: bouba26.pckl (deflated 74%)
  adding: bouba27.pckl (deflated 74%)
  adding: bouba28.pckl (deflated 74%)
  adding: bouba29.pckl (deflated 74%)
  adding: bouba30.pckl (deflated 74%)
  adding: bouba31.pckl (deflated 74%)
  adding: bouba32.pckl (deflated 74%)
  adding: bouba33.pckl (deflated 74%)
  adding: bouba34.pckl (deflated 74%)
  adding: bouba35.pckl (deflated 74%)
  adding: bouba36.pckl (deflated 74%)
  adding: bouba37.pckl (deflated 74%)
  adding: bouba38.pckl (deflated 74%)
  adding: bouba39.pckl (deflated 74%)
  adding: bouba40.pckl (deflated 74%)
  adding: bouba41.pckl (deflated 74%)
  adding: bouba42.pckl (deflated 74%)
  adding: bouba43.pckl (deflated 74%)
  adding: bouba44.pckl (deflated 74%)
  adding: bouba45.pckl (deflated 74%)
  adding: bouba46.pckl (deflated 74%)
  adding: bo