# **Importing Necessary Libraries**

In [1]:
import numpy as np

In [2]:
from utils.utils import load_json, write_json
from utils.similarity_score import cosine, match, manhattan_distance, inverse_manhattan_distance

In [3]:
songs = load_json('data/vector_data.json')

# **Creating the Similarity Function**

1. in_movie - `match`
2. lyrics -
3. language - `cosine`
4. topics -
5. sentiment - 
6. artist - `match`
7. album_type - `match`
8. album_name - `match`
9. album_artist - `match percentage`
10. popularity - `inverse manhattan distance`
11. age - `inverse manhattan distance`
12. explicit - `match`
13. duration - `inverse manhattan distance`
14. listener_play_ratio - `manhattan distance` 
15. genre - `cosine`
16. acousticness - `manhattan distance`
17. danceability - `manhattan distance`
18. energy - `manhattan distance`
19. instrumentalness - `manhattan distance`
20. key - `match`
21. liveness - `manhattan distance`
22. loudness - `inverse manhattan distance`
23. mode - `match`
24. speechiness - `manhattan distance`
25. tempo - `inverse manhattan distance`
26. valence - `manhattan distance`
27. num_sections - `scaled manhattan distance`
28. num_segments - `scaled manhattan distance`
29. artist_vector - `cosine`

In [4]:
function_map = {
    'in_movie': match,
    'lyrics': cosine,
    'language': cosine,
    'topics': cosine,
    'sentiment': cosine,
    'title': match,
    'artist': match,
    'album_type': match,
    'album_name': match,
    'album_artist': match,
    'popularity': inverse_manhattan_distance,
    'age': inverse_manhattan_distance,
    'explicit': match,
    'duration': inverse_manhattan_distance,
    'listener_play_ratio': manhattan_distance,
    'genre': cosine,
    'acousticness': manhattan_distance,
    'danceability': manhattan_distance,
    'energy': manhattan_distance,
    'instrumentalness': manhattan_distance,
    'key': match,
    'liveness': manhattan_distance,
    'loudness': inverse_manhattan_distance,
    'mode': match,
    'speechiness': manhattan_distance,
    'tempo': inverse_manhattan_distance,
    'valence': manhattan_distance,
    'num_sections': cosine,
    'num_segments': cosine,
    'artist_vector': cosine
}

In [5]:
scale = {
    'tempo': 100,
    'loudness': 10,
    'duration': 1e5,
    'age': 25,
    'popularity': 100,
}

In [6]:
def similarity(song1, song2):
    global function_map, scale
    scores = []
    features = set(song1.keys()).intersection(song2.keys())
    
    for feature in features:
        func = function_map[feature]

        if feature in scale:
            score = func(song1[feature], song2[feature], scale=scale[feature])
        else:
            score = func(song1[feature], song2[feature])
        
        print(f'{feature}: {score}')
        scores.append(score)
    
    print()
    sim = sum(scores) / len(scores)
    
    return sim

In [20]:
similarity(songs[1966], songs[6829])

genre: 1.0
album_name: 0
listener_play_ratio: 0.9999915392733356
album_artist: 0.5
lyrics: 1.0
title: 1
topics: 1.0
duration: 1.0
sentiment: 0.9999999999999999
language: 1.0
in_movie: 1
artist_vector: 0.9999999999574619
artist: 1
explicit: 1
album_type: 0
age: 0.9259259259259258
popularity: 0.9615384615384615



0.8463209368644226