## Inference

In [1]:
# imports
import numpy as np
import random
import pickle

In [2]:
experiment_name = "experiment_defaults" # experiment directory
random.seed(42)

### Load Tokenizer and Embeddings

In [3]:
# load tokenizer
tokenizer = pickle.load(open(experiment_name + "/data/tokenizer.pkl", "rb"))
random.sample(list(tokenizer.word_index.keys()), 5)

['the pigeon detectives - unforgettable',
 'young money - bedrock',
 'you me at six - fireworks',
 'gin wigmore - devil in me',
 'snow patrol - chasing cars']

In [4]:
# search for tracks / artists
for track_name in tokenizer.word_index.keys():
    if "bloc party" in track_name:
        print(track_name)

bloc party - one month off
bloc party - helicopter
bloc party - hunting for witches
bloc party - i still remember
bloc party - on
bloc party - signs


In [5]:
# load embeddings
embedding_weights = pickle.load(open(experiment_name + "/embeddings.pkl", "rb"))
print("embeddings shape:", embedding_weights.shape)

embeddings shape: (8630, 100)


### Get top-n most similar tracks

In [6]:
# specify track name and top n
track_name = "bloc party - helicopter"
n = 10

In [7]:
# get track embedding
track_idx = tokenizer.word_index[track_name]
track_vector = embedding_weights[track_idx, :].reshape(1, -1)

# compute similarities against other tracks
similarities = np.dot(track_vector, embedding_weights.T) / (np.linalg.norm(track_vector) * np.linalg.norm(embedding_weights, axis=1))
similarities = similarities.reshape(-1)

# get most similar tracks' indices
most_similar_idxs = np.argpartition(similarities, -(n+1))[-(n+1):]
most_similar_idxs = most_similar_idxs[np.argsort(similarities[most_similar_idxs])][::-1][1:]

# print most similar tracks, along with their positions in training data
print("top {} tracks most similar to '{}' (pos. {}):".format(n, track_name, track_idx))
for idx in most_similar_idxs:
    print("- (sim. {:.3f}): '{}' (pos. {})".format(similarities[idx], tokenizer.index_word[idx], idx))

top 10 tracks most similar to 'bloc party - helicopter' (pos. 5053):
- (sim. 0.492): 'pierce the veil - hell above' (pos. 657)
- (sim. 0.411): 'kanye west - hell of a life' (pos. 5055)
- (sim. 0.382): 'markus krunegård - hela livet var ett disco' (pos. 5050)
- (sim. 0.344): 'filthy - magnolian' (pos. 6351)
- (sim. 0.335): 'massive attack - angel' (pos. 273)
- (sim. 0.335): 'foo fighters - cheer up, boys (your make up is running)' (pos. 3663)
- (sim. 0.323): 'the novaks - rain, rain, rain' (pos. 7297)
- (sim. 0.318): 'daft punk - teachers' (pos. 1448)
- (sim. 0.315): 'the airborne toxic event - hell and back' (pos. 5054)
- (sim. 0.310): 'foster the people - helena beat' (pos. 5052)
