# Setup Database

Pulling some random music off of my computer and computing fingerprints for it

In [1]:
%load_ext autoreload
%autoreload 2
import numpy as np
import matplotlib.pyplot as plt
from scipy.io import wavfile
from dsp import *

In [2]:
# STFT Parameters
win = 1024
hop = 512

# Constellation Parameters
max_freq = 128
freq_win = 8
time_win = 6
thresh = 1e3

# Fingerprint Parameters
d_center = 86
width = 50
height = 21

In [3]:
import subprocess
import glob
import os
import numpy as np
import pickle

data = [] # List of {"artist":string, "title":string, "fingerprints":(hashes, offsets)}

for idx, f in enumerate(glob.glob("Music/*/*.mp3")):
    name = f.split("/")[-1].split(".")[0]
    parts = name.split("-")
    if len(parts) != 2:
        continue
    [artist, title] = parts
    artist = artist.lstrip().rstrip()
    title = title.lstrip().rstrip()
    
    ## Step 0: Convert to 22050hz sampling and mono, then load audio
    if os.path.exists("temp.wav"):
        os.remove("temp.wav")
    subprocess.call(["ffmpeg", "-i", f, "-ar", "22050", "-ac", "1", "temp.wav"], 
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    sr, x = wavfile.read("temp.wav")
    ## Step 1: Compute STFT
    S = np.abs(stft(x, win, hop))
    ## Step 2: Get constellations
    I, J = get_constellation(S, freq_win, time_win, max_freq, thresh)
    ## Step 3: Get fingerprints
    hashes, offsets = get_fingerprints(I, J, width, height, d_center)
    print(artist, title, "({} points, {} hashes)".format(I.size, hashes.size))
    ## Step 4: Save data
    data.append({"artist":artist, "title":title, "fingerprints":(hashes, offsets)})
    if idx%20 == 0:
        pickle.dump(data, open("database.pkl", "wb"))

ABC Poison Arrow (4325 points, 61592 hashes)
Alphaville Forever Young (5043 points, 78166 hashes)
Animotion Obsession (5222 points, 72664 hashes)
Bananarama Cruel Summer (4281 points, 57912 hashes)
Chaz Jankel Number One (5382 points, 84254 hashes)
Corey Hart Sunglasses At Night (5181 points, 77275 hashes)
Duran Duran Girls On Film (4577 points, 64764 hashes)
Duran Duran Notorious (4507 points, 58287 hashes)
Duran Duran Rio (7094 points, 99696 hashes)
Go West Call Me (5038 points, 68258 hashes)
Human League Don't You Want Me (4689 points, 62946 hashes)
Kenny Loggins Danger Zone (4462 points, 61935 hashes)
Mr Mister Broken Wings (5870 points, 83022 hashes)
Mr Mister Kyrie (5421 points, 75749 hashes)
Oingo Boingo Just Another Day (7021 points, 102307 hashes)
Peter Gabriel Big Time (5259 points, 71176 hashes)
Reflex Politics of Dancing (4973 points, 71346 hashes)
Squeeze Tempted (4609 points, 58938 hashes)
Talk Talk Life's What You Make It (4679 points, 67345 hashes)
Tears for Fears Pale 

Michael Jackson & The Jackson Five My Girl (3284 points, 38728 hashes)
Michael Jackson Wanna Be Starting Something (7217 points, 94558 hashes)
Michael Jackson Another Part Of Me (4493 points, 57889 hashes)
Michael Jackson Bad (4826 points, 61104 hashes)
Michael Jackson Beat It (5099 points, 69381 hashes)
Michael Jackson Billy Jean (6073 points, 84533 hashes)
Michael Jackson Black or White (5318 points, 75669 hashes)
Michael Jackson Dangerous (7789 points, 91450 hashes)
ACDC Back In Black (4672 points, 56604 hashes)
Aerosmith Duke Looks Like A lady (5397 points, 74920 hashes)
Goldfinger Superman (4078 points, 57945 hashes)
Guns N Roses Welcome To the Jungle (5643 points, 78378 hashes)
Jimi Hendrix Purple Haze (3815 points, 45710 hashes)
Led Zeppelin Nobody's Fault But Mine (7445 points, 96156 hashes)
Led Zeppelin Stairway To Heaven (9708 points, 134056 hashes)
Led Zepplin All of My Love (6689 points, 87169 hashes)
LIT My Own Worst Enemy (3655 points, 52537 hashes)
Lynyrd Skynyrd Tuesday

In [4]:
pickle.dump(data, open("database.pkl", "wb"))

In [5]:
print(len(data))

277
