In [None]:
import numpy as np
import urllib
import librosa
%matplotlib notebook
import matplotlib.pyplot as plt
import matplotlib.mlab as mlab
from microphone import record_audio
from IPython.display import Audio
from pathlib import Path
from scipy.ndimage.filters import maximum_filter
from scipy.ndimage.morphology import generate_binary_structure, binary_erosion 
from scipy.ndimage.morphology import iterate_structure
import pickle
from collections import Counter, defaultdict
# make this a glboal variable - it is false unless audio_to_digital changes
Checking=False 

In [None]:
def audio_to_digital(*file_path_or_music):
    if(len(file_path_or_music)!= 0):
        if(".mp3" in file_path_or_music[0]):
            #analyze file input if it is there
            from pathlib import Path
            local_song_path = Path(file_path_or_music[0])
            frames, sample_rate = librosa.load(local_song_path, sr=44100, mono=True, duration=5) #Seconds
            audio_data = np.hstack([np.frombuffer(i, np.int16) for i in frames])

        else:
            #analyze file input if it is there
            with open(file_path_or_music[0], 'r') as R:
            # each sample is written to a single line in the text file
            # this reads them in as a single integer-valued numpy array
                audio_data = np.asarray([int(i) for i in R])
    else:
        #or else we are recording what they say
        from microphone import record_audio
        listen_time = 5  # seconds
        frames, sample_rate = record_audio(listen_time)
        # read in the recorded audio, saved as a numpy array of 16-bit integers
        audio_data = np.hstack([np.frombuffer(i, np.int16) for i in frames])
    return audio_data

In [None]:
def digitalToSpecto(audio):
    S, freqs, times = mlab.specgram(audio, NFFT=4096, Fs=44100,
                                  window=mlab.window_hanning,
                                  noverlap=4096 // 2)
    return(S)

In [None]:
def spectrogram_to_peaks(arr):
    """ 
    Creates a boolean array showing peaks, given data from a spectrogram.
    
    Parameters:
        arr: The array produced by the spectrogram from digital_to_spectrogram with shape (N,M)
        
    Returns:
        peaks: A boolean array with shape (N,M). Peaks in the data are where peaks == True.
    """

    # Creating the histogram
    arr_flattened = np.log(arr.flatten() + 1e-20)
    
    N = arr_flattened.size # Number of elements in the array
    cnt, bin_edges = np.histogram(arr_flattened, bins=N//200, density=True)
    bin_width = np.diff(bin_edges) 
    
    ## print(np.sum(cnt*bin_width)) # check that summation = 1
    
    # Creating the cumulative distribution
    cumulative_distr = np.cumsum(cnt*bin_width)
    
    # Defining the cutoff
    frac_cut = 0.9
    bin_index_of_cutoff = np.searchsorted(cumulative_distr, frac_cut)
    
    # given the bin-index, we want the associated log-amplitude value for that bin
    cutoff_log_amplitude = bin_edges[bin_index_of_cutoff]
    
    # Defining the footprint
    fp = generate_binary_structure(rank=2,connectivity=2)
    fp = np.ones((4,5))
    
    peaks = ((arr > cutoff_log_amplitude) & (arr == maximum_filter(arr, footprint=fp)))
    
    return peaks

In [7]:
def peaks_to_dic_newsongs(local_peaks):
    """
    Creates a dictionary of fingerprints given the peaks from the previous function. Every time this function is called,
    it will initialize an empty dictionary and populate it with values from our pickled database
    to be compared against the fingerprint of the peaks that are currently being passed.
    
    Parameters:
        local_peaks: A (N,M) boolean array of peaks in the audio data
        
    Returns:
        None
    """
    
    # Compiling a new dictionary every time this function is called from the contents of the .pkl
    song_dic = {} 
    with open("songs.pkl", mode="rb") as opened_file:
        
        # while the file has values, iterate through the file until you reach the end, and update the empty dict along the way
        while True:
            try:
                entry = pickle.load(opened_file)
            except EOFError:
                break
            else:
                song_dic.update([entry])
    time,freq= np.where(local_peaks)
    song_name=input("Song Name= ")
    fanout = 15
    song_dic = defaultdict(list)
    for i in range(freq.size-fanout):
        for j in range(1,fanout+1):
            finger=(freq[i], freq[i+j], time[i+j]-time[i])
            if finger in song_dic.keys():
                song_dir = (song_name, time[i])
                song_dic[finger].append(song_dir)
#                 previousfingers.append(song_dir) ## this is all extraneous previous code
#                 print((finger, previousfingers))
#                 song_dic.update([(finger, previousfingers)])
            else:
                song_dic[finger]=[(song_name, time[i])]
            
    # open and save to a pkl file, mode = wb for binary storage w/ numpy array
    with open("songs.pkl", mode="ab") as opened_file:
        
        # For every entry in the new dictionary,pickle the entry.
        # TODO: Figure out how to JUST append new items, rather than 
        for key, value in song_dic.items():
            pickle.dump((key, value), opened_file)
    print("Done")

In [30]:
# test cell
song_dic = {1:0,8:0,8:9}
with open("songs.pkl", mode="ab") as opened_file:
    for key, value in song_dic.items():
        pickle.dump((key, value), opened_file)
        
song_dic={}
with open("songs.pkl", mode="rb") as opened_file:
    while True:
        try:
            entry = pickle.load(opened_file)
        except EOFError:
            break
        else:
            song_dic.update([entry])
            
print(song_dic)

{1: 0, 8: 9}


In [17]:
def peaks_to_dic_findsong(local_peaks):
    """
    Creates a dictionary of fingerprints given the peaks from the previous function. Every time this function is called,
    it will initialize an empty dictionary and populate it with values from our pickled database
    to be compared against the fingerprint of the peaks that are currently being passed.
    
    Parameters:
        local_peaks: A (N,M) boolean array of peaks in the audio data
        
    Returns:
        song: the most common song in the database which corresponds to the fingerprints of the audio sample
    """
    # Compiling a new dictionary every time this function is called from the contents of the .pkl
    song_dic2 = {}
    with open("songs.pkl", mode="rb") as opened_file:
        
        # while the file has values, iterate through the file until you reach the end, and update the empty dict along the way
        while True:
            try:
                key, value = pickle.load(opened_file)
            except EOFError:
                break
            else:
                song_dic2.update([(key, value)])
    time, freq= np.where(local_peaks)
    songList = []
    fingerList=[]
    abs_time=[]
    fanout=15
    for i in range(freq.size-fanout):
        for j in range(1,fanout+1):
            finger=(freq[i], freq[i+j], time[i+j]-time[i])
            abs_time.append(time[i])
            fingerList.append(finger)
    for i in range(len(fingerList)):
        if fingerList[i] in song_dic2:
            song_guess=song_dic2[fingerList[i]]
            # for every song associated with that fingerprint, find the absolute dt between that song's timebin
            # and the audio sample's time bin, and append both the name and dt to a list of songs
            for t in range(len(song_guess)):
                songList.append((song_guess[t][0], (song_guess[t][1]-abs_time[i])))
    print(songList[:100])
    Counter_songs= Counter(songList)
    song =Counter_songs.most_common(1)
    return song

In [9]:
def final_function(*song):
    if(len(song)!=0):
        audio=audio_to_digital(song[0])
        arr=digitalToSpecto(audio)
        local_peaks=spectrogram_to_peaks(arr)
        peaks_to_dic_newsongs(local_peaks)
    else:
        audio=audio_to_digital()
        arr=digitalToSpecto(audio)
        local_peaks=spectrogram_to_peaks(arr)
        return peaks_to_dic_findsong(local_peaks)
    

In [18]:
final_function()

Using input device 'Internal Microphone (Conexant S'
Recording ended
[("Don't Stop Me Now", 154), ("Don't Stop Me Now", 769), ("Don't Stop Me Now", 824), ("Don't Stop Me Now", 1325), ("Don't Stop Me Now", 1342), ("Don't Stop Me Now", 1822), ("Don't Stop Me Now", 2048), ("Don't Stop Me Now", 389), ("Don't Stop Me Now", 431), ("Don't Stop Me Now", 1342), ("Don't Stop Me Now", 2019), ("Don't Stop Me Now", 774), ("Don't Stop Me Now", 1267), ('Gondry', 170), ('Gondry', 451), ('Gondry', 1421), ('Gondry', 1958), ('Gondry', 378), ("Don't Stop Me Now", 1725), ('Janet', 2020), ("Don't Stop Me Now", 1342), ("Don't Stop Me Now", 1700), ("Don't Stop Me Now", 1926), ("Don't Stop Me Now", 226), ("Don't Stop Me Now", 558), ("Don't Stop Me Now", 546), ("Don't Stop Me Now", 427), ('Gondry', 1795), ('Gondry', 1072), ('Gondry', 1968), ("Don't Stop Me Now", 612), ("Don't Stop Me Now", 1505), ("Don't Stop Me Now", 521), ("Don't Stop Me Now", 818), ("Don't Stop Me Now", 1129), ("Don't Stop Me Now", 1177), ("

[(("Don't Stop Me Now", 77), 244)]