# Collaborative filtering model

In [2]:
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares
import os

In [3]:
materials_root = "../materials"
working_root = "../working"

## Load data

**Load `userid-artist-counts.csv`** for training the model.

In [4]:
data = pd.read_csv(os.path.join(working_root, "userid-artist-counts.csv"))
data.head()

Unnamed: 0,userid,artist_mbids,count
0,1,000fc734-b7e1-4a01-92d1-f544261b43f5,1
1,1,00eb9d25-0465-49e3-8e7a-3eacbd9ebb0d,3
2,1,012b5f71-b079-48f1-892e-af645c6576cb,1
3,1,01ce0542-1e29-4ab6-bffe-ba0e2fb61ce8,2
4,1,0383dadf-2a4e-4d10-a46a-e9e041da8eb3,1


**Load `musicbrainz_artist.csv` for `artist_mbids` to `name` mapping.**

In [5]:
musicbrainz_artist = pd.read_csv(os.path.join(materials_root, "musicbrainz_artist.csv"))
musicbrainz_artist.set_index("artist_mbid", inplace=True)
musicbrainz_artist.head()

Unnamed: 0_level_0,name
artist_mbid,Unnamed: 1_level_1
fadeb38c-833f-40bc-9d8c-a6383b38b1be,Доктор Сатана
49add228-eac5-4de8-836c-d75cde7369c3,Pete Moutso
165a49a0-2b3b-4078-a3c1-905afdc07c0a,Babyglock
7b4a548e-a01a-49b7-82e7-b49efeb9732c,Aric Leavitt
60aca66f-e91a-4cb5-9308-b6e293cd833e,Fonograff


**Process the date for training**

In [6]:
users = data["userid"].astype("category")
artists = data["artist_mbids"].astype("category")
plays = coo_matrix(
        (data["count"].astype(np.float32),
        (
            users.cat.codes.copy(),
            artists.cat.codes.copy()
        ),
    )
).tocsr()

artist_user_plays = bm25_weight(plays, K1=100, B=0.8)
user_plays = artist_user_plays.T.tocsr()

## Model training

In [7]:
model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
model.fit(artist_user_plays.tocsr())

100%|███████████████████████████████████████████████████████████████████████████████████| 15/15 [00:03<00:00,  4.08it/s]


## Generate recommendations

**Recommendation example 1**

In [8]:
userid = 123
ids, scores = model.recommend(userid, user_plays[userid], N=10)

In [9]:
print(f'Recommendations for user: {userid}')
recommendation = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
recommendation.reset_index(drop=True, inplace=True)
recommendation

Recommendations for user: 123


Unnamed: 0,artist,score
0,Siouxsie Sioux,1.139609
1,John Butler Trio,1.064256
2,RÜFÜS DU SOL,1.053225
3,Sharon Van Etten,1.036735
4,The Outcasts,1.017945
5,Clara Luzia,1.015443
6,Mudvayne,1.00767
7,＊(Asterisk),1.004273
8,Happyness,1.003862
9,KAUAN,0.988825


**Recommendation example 2**

In [10]:
userid = 1234
ids, scores = model.recommend(userid, user_plays[userid], N=10)

In [11]:
print(f'Recommendations for user: {userid}')
recommendation = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
recommendation.reset_index(drop=True, inplace=True)
recommendation

Recommendations for user: 1234


Unnamed: 0,artist,score
0,Jean‐Michel Blais,1.180534
1,Happy Go Lumpy's Bass & Friendship Club,1.176325
2,New Order,1.158957
3,Yung Joc,1.143705
4,Terry Riley,1.137661
5,Wizzard,1.120222
6,Godspeed You! Black Emperor,1.107837
7,Joey Bada$$,1.092055
8,Aquilo,1.085626
9,Andy Williams,1.072765


## Find similar artists

**Similar artists example 1**

In [12]:
artistid = "03f93de6-6d62-4710-bcc7-9b3d7c3d95f5"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [13]:
ids, scores = model.similar_items(artist_idx, N=10)

In [14]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to Z‐Trip


Unnamed: 0,artist,score
0,Z‐Trip,1.0
1,近藤浩治,0.999216
2,Rupie Edwards,0.997636
3,Roxy Music,0.996187
4,Van Halen,0.992861
5,Tantric,0.970684
6,AlgoRythmiK,0.966787
7,Iron & Wine,0.965888
8,Irène Drésel,0.96357
9,nobigdyl.,0.96289


**Similar artists example 2**

In [15]:
artistid = "8f92558c-2baa-4758-8c38-615519e9deda"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [16]:
ids, scores = model.similar_items(artist_idx, N=10)

In [17]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to The Clash


Unnamed: 0,artist,score
0,Im Baek Hun,1.0
1,Nanowar of Steel,1.0
2,The Doors,1.0
3,Raekwon,1.0
4,The Ronettes,1.0
5,The Moody Blues,1.0
6,bignic,1.0
7,Kid Thomas and His Creole Jazz Band,1.0
8,Marc van Linden,1.0
9,Goldie,1.0
