In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from libs.db_sqlite import SqliteDatabase
from argparse import RawTextHelpFormatter
from itertools import zip_longest
from termcolor import colored
from libs.config import get_config
from libs.reader_file import FileReader
from libs import fingerprint

import matplotlib.pyplot as plt

# Deal with db

In [3]:
config = get_config()
db = SqliteDatabase()

[2m[37msqlite - connection opened[0m


# Get file

In [4]:
filename = '00 - Easily.mp3'
path = 'mp3/'

In [5]:
reader = FileReader(path + filename)
audio = reader.parse_audio()
song = db.get_song_by_filehash(audio['file_hash'])
song_id = db.add_song(filename, audio['file_hash'])

In [6]:
data = audio['channels']
Fs = fingerprint.DEFAULT_FS
channel_amount = len(data)

In [45]:
def grouper(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return [list(filter(None, values)) for values
            in zip_longest(fillvalue=fillvalue, *args)]

def find_matches(samples, Fs=fingerprint.DEFAULT_FS):
    hashes = fingerprint.fingerprint(samples, Fs=Fs)
    return return_matches(hashes)

def return_matches(hashes):
    mapper = {}
    for hash, offset in hashes:
        mapper[hash.upper()] = offset
    values = mapper.keys()
    
    for split_values in grouper(values, 1000):
        # @todo move to db related files
        query = """
            SELECT upper(hash), song_fk, offset
            FROM fingerprints
            WHERE upper(hash) IN (%s)
          """
        query = query % ', '.join('?' * len(split_values))
        x = db.executeAll(query, split_values)
        matches_found = len(x)
        
        if matches_found > 0:
            msg = '   ** found %d hash matches (step %d/%d)'
            print(colored(msg, 'green') % (
              matches_found,
              -1,
              len(values)
            ))
        else:
            msg = '   ** not matches found (step %d/%d)'
            print(colored(msg, 'red') % (
              len(split_values),
              len(values)
            ))
        for hash, sid, offset in x:
            # (sid, db_offset - song_sampled_offset)
            yield (sid, offset - mapper[hash])

def align_matches(matches):
    diff_counter = {}
    largest = 0
    largest_count = 0
    song_id = -1
    for tup in matches:
        sid, diff = tup
        
        if diff not in diff_counter:
            diff_counter[diff] = {}
        
        if sid not in diff_counter[diff]:
            diff_counter[diff][sid] = 0
        
        diff_counter[diff][sid] += 1
        
        if diff_counter[diff][sid] > largest_count:
            largest = diff
            largest_count = diff_counter[diff][sid]
            song_id = sid
    
    songM = db.get_song_by_id(song_id)
    
    nseconds = round(float(largest) / fingerprint.DEFAULT_FS *
                     fingerprint.DEFAULT_WINDOW_SIZE *
                     fingerprint.DEFAULT_OVERLAP_RATIO, 5)

    return {
        "SONG_ID" : song_id,
        "SONG_NAME" : songM[1],
        "CONFIDENCE" : largest_count,
        "OFFSET" : int(largest),
        "OFFSET_SECS" : nseconds
    }

# Manual

In [35]:
sample = [11,50]

In [36]:
hashes = list(fingerprint.fingerprint(data[0][Fs*sample[0]: Fs*sample[1]], Fs=Fs))

[2m   local_maxima: 1354 of frequency & time pairs[0m


In [37]:
matches = return_matches(hashes=hashes)

In [38]:
m = [h for h in matches]

[32m   ** found 4 hash matches (step -1/18767)[0m
[32m   ** found 2 hash matches (step -1/18767)[0m
[31m   ** not matches found (step 1000/18767)[0m
[32m   ** found 3 hash matches (step -1/18767)[0m
[32m   ** found 1 hash matches (step -1/18767)[0m
[32m   ** found 1 hash matches (step -1/18767)[0m
[32m   ** found 3 hash matches (step -1/18767)[0m
[32m   ** found 1 hash matches (step -1/18767)[0m
[32m   ** found 2 hash matches (step -1/18767)[0m
[32m   ** found 2 hash matches (step -1/18767)[0m
[31m   ** not matches found (step 1000/18767)[0m
[32m   ** found 3 hash matches (step -1/18767)[0m
[31m   ** not matches found (step 1000/18767)[0m
[32m   ** found 5 hash matches (step -1/18767)[0m
[32m   ** found 2 hash matches (step -1/18767)[0m
[32m   ** found 4 hash matches (step -1/18767)[0m
[32m   ** found 1 hash matches (step -1/18767)[0m
[32m   ** found 3 hash matches (step -1/18767)[0m
[32m   ** found 3 hash matches (step -1/18767)[0m


In [46]:
align_matches(m)

{'SONG_ID': 1,
 'SONG_NAME': '00 - Easily.mp3',
 'CONFIDENCE': 1,
 'OFFSET': 961,
 'OFFSET_SECS': 44.62875}

In [40]:
dict_hashes = {h: hv for (h, hv) in hashes}

In [12]:
nb = 100000
query = """
            SELECT upper(hash), song_fk, offset
            FROM fingerprints
            WHERE upper(hash) IN (%s)
          """
query = query % ', '.join('?' * nb)
db.executeAll(query, [k[0] for k, _ in zip(hashes, range(nb))])

[('48752978135356367235', 1, b'\xe9\x02\x00\x00\x00\x00\x00\x00'),
 ('05835073779111655026', 1, b'\x99\x03\x00\x00\x00\x00\x00\x00'),
 ('90195636954544455046', 1, b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 ('03522669584382427413', 1, b'\x13\x04\x00\x00\x00\x00\x00\x00'),
 ('73021731275182487565', 1, b'\x0b\x04\x00\x00\x00\x00\x00\x00'),
 ('23921454263754211485', 1, b'\x98\x03\x00\x00\x00\x00\x00\x00'),
 ('65299988631383985027', 1, b'\xf7\x06\x00\x00\x00\x00\x00\x00')]

In [13]:
[k[0] for k, _ in zip(hashes, range(nb))]

['0f1c672282b5bd31d61e',
 '146371d65e18cd933439',
 '92f9d27f3d30df5c008b',
 '9d4b94570705f57a6a71',
 '2930e18edff7df47135e',
 '5017991fe520d5f1c9d8',
 'e8c34c9de10293473a5c',
 '92b4adb682473546e183',
 '36dccc22a96aa2f6a89d',
 'f86d955ebc8aa05834db',
 'b9b4d269b9c20a48a57f',
 'acd9435169a3a95af0aa',
 '4cd9f46ea6ecb47ffa9e',
 'ff31d9d098f528271835',
 '3fb25445f6b210c08093',
 'b89dd72f4ac60a2f7909',
 '6e5d722e3cda4aafd1e6',
 'c1683ac4f8de135a887e',
 'a402dc5a5c378f2def61',
 'b83e53c53b3215576d94',
 '9cc9cb6d6090098298d8',
 '36db9d1ed125f10c35cf',
 '1214843364687d987a96',
 'b58b21fd996372190441',
 '036cae402432696a3cb1',
 '58a4de5e594724d2dc7f',
 '1ec3e3a176f20d90654a',
 'eab415127364dcacb679',
 'ca07ffe02ae616ff993e',
 '7cbdd6a78115aa793300',
 '11877b8707c68cf91d0e',
 '3aa5db84a1b3e3dbda45',
 '77271232258d88313803',
 'c4463582578becee091a',
 '177e6aebe07b58452271',
 '43e8bc268ecbd8756ee2',
 'e9b39446ec6d5ef48187',
 '851d138a9ff15deb4ede',
 'eab83ecf78e97c5481e9',
 '59528a5fcd98c6bb2101',


In [15]:
db.findOne(db.TABLE_FINGERPRINTS, {'song_fk':10})

In [10]:
db.findAll(db.TABLE_FINGERPRINTS, {'offset': b'\xe6\x00\x00\x00\x00\x00\x00\x00'})

[(2087, 1, '893ace4ef6414e388911', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (2369, 1, '203d17007dfe84698dbe', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (11433, 1, '5ef56b3236bb8c35b5e4', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (12841, 1, '5ac12ce784b0c0a0e111', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (13913, 1, '4641fa4396a428d9969a', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (14147, 1, 'b15da05ddba9ccd1392d', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (16864, 1, 'fdeaf05816ecaa39ef4a', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (17179, 1, 'a63a9565403672f3052d', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (17652, 1, '45d66c369f8178ee4688', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (19437, 1, '91cb1d3ebff71cc44b3f', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (22024, 1, '555d74a992439520cf1f', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (22980, 1, '903405b28e47d39f7791', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (25074, 1, '075156f14df2404fb34f', b'\xe6\x00\x00\x00\x00\x00\x00\x00'),
 (26163, 1, '8540c8ccd3d48506be42', b'\x

In [31]:
int('\xe6\x00\x00\x00\x00\x00\x00\x00')

ValueError: invalid literal for int() with base 10: 'æ\x00\x00\x00\x00\x00\x00\x00'

In [8]:
tmp = db.cur.execute('PRAGMA TABLE_INFO(fingerprints)')
tmp.fetchall()

[(0, 'id', 'INTEGER', 0, None, 1),
 (1, 'song_fk', 'INTEGER', 0, None, 0),
 (2, 'hash', 'TEXT', 0, None, 0),
 (3, 'offset', 'INTEGER', 0, None, 0)]

In [36]:
int('0x00', 16)

0

In [55]:
db.insertMany('fingerprints', ['song_fk', 'hash', 'offset'], [(10, '0000000000000000000{0}'.format(s+1),1) for s in range(6)])

In [9]:
db.findAll('fingerprints', {'hash':'00000000000000000004'})

[]

# MWE ?

In [14]:
song_id = 10
values = [(song_id, hash, offset) for hash, offset in hashes]
# [h for h, _ in zip(values, range(10))]

In [15]:
db.store_fingerprints(values)

In [17]:
from tqdm import tqdm_notebook as tqdm
import numpy as np
tmp = []
nb = len(values)
for ind in tqdm(np.random.choice(nb, 300), total = nb):
    _, tmp_hash, tmp_offset = values[ind]
    r = db.findOne(db.TABLE_FINGERPRINTS, {'song_fk': 10, 'hash': tmp_hash})[3]
    tmp.append([r, tmp_offset])

HBox(children=(IntProgress(value=0, max=100765), HTML(value='')))




In [18]:
binay = [t[0] for t in tmp]
decimal = [t[1] for t in tmp]

In [19]:
for s, s_ref,_ in zip(binay, decimal, np.random.choice(nb, 300)):
    db.cur.execute("SELECT CAST(? as numerical)", (s,))
    print(db.cur.fetchall()[0][0], s_ref)

0 1644
0 1572
0 1253
6 1078
0 1003
0 1090
0 121
0 1035
0 1277
0 644
0 367
0 92
0 620
0 805
0 9
0 1205
0 1133
0 1038
0 1423
0 144
0 378
8 1336
0 276
0 681
0 170
0 1035
0 1584
0 993
0 910
0 688
0 1719
0 1387
0 797
0 329
0 926
9 1337
0 932
0 291
0 1072
0 1790
0 249
0 1022
0 765
7 1079
0 168
0 1206
4 52
0 1427
0 71
0 1068
0 253
0 1088
0 764
0 368
0 1448
0 183
0 39
0 217
0 767
0 792
0 705
0 454
0 122
0 1443
-0.0 1581
0 165
0 770
0 1421
0 1447
0 911
0 1177
0 1061
0 1418
0 666
2 306
0 214
0 849
0 835
0 959
0 1780
0 1143
0 1040
0 577
0 103
0 144
0 326
0 1532
0 1204
0 383
0 97
0 272
0 1165
0 1183
0 1003
0 833
0 962
0 144
0 1314
0 1067
0 710
0 1114
0 797
0 1529
0 892
0 708
0 123
0 501
0 1446
0 445
0 553
0 892
0 1112
0 1708
0 723
0 1341
0 79
0 667
0 1744
1 1329
0 519
0 1403
0 107
0 161
0 1723
-0.0 1325
0 757
0 479
0 1205
9 569
0 1071
0 687
0 289
0 146
0 324
0 705
0 775
0 734
0 847
0 1361
0 1235
0 1264
0 526
0 1487
0 558
0 920
0 1708
0 1797
0 1740
0 193
0 1340
0 1810
0 956
0 849
0 1374
0 909
0 145

In [29]:
db.insertMany('fingerprints', ['song_fk', 'hash', 'offset'], [(10, '0000000000000000000{0}'.format(s),int(o))
                                                              for o,s in zip(decimal, range(6))])

In [30]:
db.findAll('fingerprints', {'hash':'00000000000000000005'})

[(539101, 10, '00000000000000000005', b'B\x04\x00\x00\x00\x00\x00\x00'),
 (539107, 10, '00000000000000000005', 1),
 (539113, 10, '00000000000000000005', 1090)]

In [22]:
db.insertMany('fingerprints', ['song_fk', 'hash', 'offset'], [(10, '0000000000000000000{0}'.format(s),1)
                                                              for o,s in zip(decimal, range(6))])

In [28]:
print(decimal[0:6])
db.cur.execute("SELECT CAST(? as blob)", (1090,))
db.cur.fetchall()

[1644, 1572, 1253, 1078, 1003, 1090]


[(b'1090',)]

In [73]:
':\x07\x00\x00\x00\x00\x00\x00'.split("\x")

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-1: truncated \xXX escape (<ipython-input-73-1b1a6828059f>, line 1)

In [90]:
b':\x07\x00\x00\x00\x00\x00\x00'.decode('utf8')[1:].replace('\\x', '')

SyntaxError: (unicode error) 'unicodeescape' codec can't decode bytes in position 0-1: truncated \xXX escape (<ipython-input-90-18907b142bd8>, line 1)