# Collaborative filtering model

In [55]:
import numpy as np
import pandas as pd
from scipy.sparse import coo_matrix
from implicit.nearest_neighbours import bm25_weight
from implicit.als import AlternatingLeastSquares
import os

In [56]:
materials_root = "materials"
working_root = "working"

## Load data

**Load `userid-artist-counts.csv`** for training the model.

In [57]:
data = pd.read_csv(os.path.join(working_root, "userid-artist-counts.csv"))
data.head()

Unnamed: 0,userid,artist_mbids,count
0,1,002e9f6e-13af-4347-83c5-f5ace70e0ec4,1
1,1,01252145-c9e8-4de5-a480-9b2bed05450a,14
2,1,02a7de68-2681-4d7e-8c36-6f2fdb37c07d,1
3,1,0383dadf-2a4e-4d10-a46a-e9e041da8eb3,12
4,1,03f93de6-6d62-4710-bcc7-9b3d7c3d95f5,1


**Load `musicbrainz_artist.csv` for `artist_mbids` to `name` mapping.**

In [58]:
musicbrainz_artist = pd.read_csv(os.path.join(materials_root, "musicbrainz_artist.csv"))
musicbrainz_artist.set_index("artist_mbid", inplace=True)
musicbrainz_artist.head()

Unnamed: 0_level_0,name
artist_mbid,Unnamed: 1_level_1
fadeb38c-833f-40bc-9d8c-a6383b38b1be,Доктор Сатана
49add228-eac5-4de8-836c-d75cde7369c3,Pete Moutso
165a49a0-2b3b-4078-a3c1-905afdc07c0a,Babyglock
7b4a548e-a01a-49b7-82e7-b49efeb9732c,Aric Leavitt
60aca66f-e91a-4cb5-9308-b6e293cd833e,Fonograff


**Process the date for training**

In [59]:
users = data["userid"].astype("category")
artists = data["artist_mbids"].astype("category")
plays = coo_matrix(
        (data["count"].astype(np.float32),
        (
            users.cat.codes.copy(),
            artists.cat.codes.copy()
        ),
    )
).tocsr()

artist_user_plays = bm25_weight(plays, K1=100, B=0.8)
user_plays = artist_user_plays.T.tocsr()

## Model training

In [60]:
model = AlternatingLeastSquares(factors=64, regularization=0.05, alpha=2.0)
model.fit(artist_user_plays.tocsr())

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 15/15 [00:01<00:00, 11.81it/s]


## Generate recommendations

**Recommendation example 1**

In [61]:
userid = 123
ids, scores = model.recommend(userid, user_plays[userid], N=10)

In [62]:
print(f'Recommendations for user: {userid}')
recommendation = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
recommendation.reset_index(drop=True, inplace=True)
recommendation

Recommendations for user: 123


Unnamed: 0,artist,score
0,Телевизор,1.20232
1,Heart,0.934325
2,TSHA,0.917736
3,Max Cooper,0.915201
4,上海アリス幻樂団,0.883574
5,Victor Ruiz,0.865356
6,The Felice Brothers,0.857405
7,J Mascis,0.853091
8,Lit,0.847989
9,Siouxsie and the Banshees,0.826713


**Recommendation example 2**

In [63]:
userid = 1234
ids, scores = model.recommend(userid, user_plays[userid], N=10)

In [64]:
print(f'Recommendations for user: {userid}')
recommendation = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
recommendation.reset_index(drop=True, inplace=True)
recommendation

Recommendations for user: 1234


Unnamed: 0,artist,score
0,The National,1.091855
1,Cole Porter,1.034392
2,Jeuce,0.980928
3,Beastie Boys,0.930126
4,Sybreed,0.924293
5,Omar Khorshid,0.923141
6,The Jesus and Mary Chain,0.896257
7,Silicone Soul,0.896157
8,Beyoncé,0.884745
9,Billy Idol,0.880577


## Find similar artists

**Similar artists example 1**

In [65]:
artistid = "03f93de6-6d62-4710-bcc7-9b3d7c3d95f5"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [66]:
ids, scores = model.similar_items(artist_idx, N=10)

In [67]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to Z‐Trip


Unnamed: 0,artist,score
0,STÖNER,1.0
1,Pardon Moi,1.0
2,The Zutons,1.0
3,Kunal Ganjawala,1.0
4,Alizée,1.0
5,DJ Krush,1.0
6,Skarazula,1.0
7,Orquesta Sublime,1.0
8,Des’ree,1.0
9,N*E*R*D,1.0


**Similar artists example 2**

In [68]:
artistid = "8f92558c-2baa-4758-8c38-615519e9deda"
artistname = musicbrainz_artist.loc[artistid]['name']
artist_idx = artists.index[artists == artistid].tolist()[0]

In [69]:
ids, scores = model.similar_items(artist_idx, N=10)

In [70]:
print(f'Artists similar to {artistname}')
similar_artists = pd.DataFrame({"artist": musicbrainz_artist.loc[artists[ids]]['name'].tolist(), "score": scores})
similar_artists.reset_index(drop=True, inplace=True)
similar_artists

Artists similar to The Clash


Unnamed: 0,artist,score
0,The Clash,1.0
1,Labrinth,1.0
2,Peggy Gou,1.0
3,Cappella,1.0
4,Endel,1.0
5,Otu,1.0
6,Greg Puciato,1.0
7,Fleet Foxes,1.0
8,Merle Haggard,1.0
9,OceanLab,1.0
