# Collaborative filtering model

In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
materials_root = "../materials"
working_root = "../working"

## Load data

**Load `userid-artist-counts.csv`** for training the model.

In [3]:
data = pd.read_csv(os.path.join(working_root, "userid-artist-counts.csv"))
data.head()

Unnamed: 0,userid,artist_mbids,count
0,1,000fc734-b7e1-4a01-92d1-f544261b43f5,1
1,1,00eb9d25-0465-49e3-8e7a-3eacbd9ebb0d,3
2,1,012b5f71-b079-48f1-892e-af645c6576cb,1
3,1,01ce0542-1e29-4ab6-bffe-ba0e2fb61ce8,2
4,1,0383dadf-2a4e-4d10-a46a-e9e041da8eb3,1


**Load `musicbrainz_artist.csv` for `artist_mbids` to `name` mapping.**

In [4]:
musicbrainz_artist = pd.read_csv(os.path.join(materials_root, "musicbrainz_artist.csv"))
musicbrainz_artist.set_index("artist_mbid", inplace=True)
musicbrainz_artist.head()

Unnamed: 0_level_0,name
artist_mbid,Unnamed: 1_level_1
fadeb38c-833f-40bc-9d8c-a6383b38b1be,Доктор Сатана
49add228-eac5-4de8-836c-d75cde7369c3,Pete Moutso
165a49a0-2b3b-4078-a3c1-905afdc07c0a,Babyglock
7b4a548e-a01a-49b7-82e7-b49efeb9732c,Aric Leavitt
60aca66f-e91a-4cb5-9308-b6e293cd833e,Fonograff


**Process the date for training**

In [5]:
users = data["userid"].astype("category")
artists = data["artist_mbids"].astype("category")
plays = coo_matrix(
        (data["count"].astype(np.float32),
        (
            users.cat.codes.copy(),
            artists.cat.codes.copy()
        ),
    )
).tocsr()

artist_user_plays = bm25_weight(plays, K1=100, B=0.8)
user_plays = artist_user_plays.T.tocsr()

## Model training

In [6]:
model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
model.fit(artist_user_plays.tocsr())

100%|███████████████████████████████████████████████████████████████████████████████████| 15/15 [00:03<00:00,  4.81it/s]


## Generate recommendations

**Recommendation example 1**

In [7]:
userid = 123
ids, scores = model.recommend(userid, user_plays[userid], N=10)

In [8]:
print(f'Recommendations for user: {userid}')
recommendation = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
recommendation.reset_index(drop=True, inplace=True)
recommendation

Recommendations for user: 123


Unnamed: 0,artist,score
0,Siouxsie Sioux,1.225429
1,John Butler Trio,1.173827
2,Sharon Van Etten,1.164155
3,Electric Guest,1.152812
4,Paul Johnson,1.148144
5,RÜFÜS DU SOL,1.094499
6,Coi Leray,1.085531
7,William Orbit,1.074847
8,The Outcasts,1.052629
9,KAUAN,1.035424


**Recommendation example 2**

In [9]:
userid = 1234
ids, scores = model.recommend(userid, user_plays[userid], N=10)

In [10]:
print(f'Recommendations for user: {userid}')
recommendation = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
recommendation.reset_index(drop=True, inplace=True)
recommendation

Recommendations for user: 1234


Unnamed: 0,artist,score
0,Jean‐Michel Blais,1.206659
1,Happy Go Lumpy's Bass & Friendship Club,1.177446
2,Yung Joc,1.146596
3,Godspeed You! Black Emperor,1.118446
4,Terry Riley,1.112939
5,Joey Bada$$,1.103775
6,Rocketship,1.095238
7,Wizzard,1.093191
8,PVRIS,1.089574
9,New Order,1.085535


## Find similar artists

**Similar artists example 1**

In [11]:
artistid = "03f93de6-6d62-4710-bcc7-9b3d7c3d95f5"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [12]:
ids, scores = model.similar_items(artist_idx, N=10)

In [13]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to Z‐Trip


Unnamed: 0,artist,score
0,Z‐Trip,1.0
1,近藤浩治,0.99914
2,Rupie Edwards,0.997418
3,Roxy Music,0.995829
4,Van Halen,0.992228
5,Irène Drésel,0.968606
6,Tantric,0.968323
7,Iron & Wine,0.96476
8,AlgoRythmiK,0.964217
9,Pearl Jam,0.957564


## Tests

**Find similar artists to A. R. Rahman `MBID:e0bba708-bdd3-478d-84ea-c706413bedab`**

<img src='../scratch/AR_Rahman_At_The_‘Marvel_Anthem’_Launch_(3x4_cropped).jpg' width=200>

In [22]:
artistid = "e0bba708-bdd3-478d-84ea-c706413bedab"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [23]:
ids, scores = model.similar_items(artist_idx, N=10)

In [24]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to A. R. Rahman


Unnamed: 0,artist,score
0,A. R. Rahman,1.0
1,JMSN,0.857455
2,Giraffage,0.857455
3,The Merry Thoughts,0.857455
4,Faith No More,0.857454
5,Katy Carr,0.857454
6,Lustre,0.857454
7,Low Roar,0.857454
8,Mokhov,0.854862
9,Limp Bizkit,0.850914


**Find similar artists to Goran Bregović `MBID:883ece23-2779-4091-b527-62eb07ee79d4`**

<img src='../scratch/20220710-Rudolstadt-Festival-2022-Goran-Bregovic-7849_(cropped).jpg' width=200>

In [25]:
artistid = "883ece23-2779-4091-b527-62eb07ee79d4"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [26]:
ids, scores = model.similar_items(artist_idx, N=10)

In [27]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to Goran Bregović


Unnamed: 0,artist,score
0,Goran Bregović,1.0
1,T. Rex,0.975317
2,Pomplamoose,0.975315
3,The Mavericks,0.975315
4,Marshall Jefferson,0.975315
5,DNCE,0.975315
6,Waldeck,0.975315
7,Technohead,0.975315
8,Julee Cruise,0.975315
9,Alejandro Escovedo,0.973412


**Find similar artists to Linkin park `MBID:f59c5520-5f46-4d2c-b2c4-822eabf53419`**

<img src='../scratch/1920px-LinkinParkBerlin2010.jpg' width=400>

In [31]:
artistid = "f59c5520-5f46-4d2c-b2c4-822eabf53419"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [32]:
ids, scores = model.similar_items(artist_idx, N=10)

In [33]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to Linkin Park


Unnamed: 0,artist,score
0,Linkin Park,1.0
1,Kid Cudi,0.913123
2,Stone Sour,0.913123
3,Wau Wau Collectif,0.91284
4,Ray Charles,0.871346
5,Elwood,0.625813
6,Jonathan Groff,0.612072
7,Gorgon,0.563189
8,London Grammar,0.555969
9,Infected Mushroom,0.533872
