In [207]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy import sparse

eps = 7. / 3 - 4. / 3 - 1


def L2_norm_row(X):
    return sparse.spdiags(1. / (np.sqrt(sum(X * X, 2)) + eps), 0, len(X), len(X)) * X


def tr(A, B):
    x = np.multiply(A, B)
    return (x.sum(axis=0)).sum(axis=0)


def construct_A(X, k, binary=False):

    nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X)
    if binary:
        return nbrs.kneighbors_graph(X)
    else:
        return nbrs.kneighbors_graph(X, mode='distance')


def LCE(R, Xu, A, k=15, alpha=0.1, beta=0.05, lamb=0.001, epsilon=0.01, maxiter=150, verbose=True):

    n = R.shape[0]
    v1 = R.shape[1]
    v2 = Xu.shape[1]

    W = np.abs(np.random.rand(n, k))
    Hi = np.abs(np.random.rand(k, v1))
    Hu = np.abs(np.random.rand(k, v2))

    D = sparse.dia_matrix((A.sum(axis=0), 0), A.shape)

    gamma = 1. - alpha
    trRtR = tr(R, R)
    trXutXu = tr(Xu, Xu)

    WtW = W.T.dot(W)
    WtR = W.T.dot(R)
    WtXu = W.T.dot(Xu)
    WtWHi = WtW.dot(Hi)
    WtWHu = WtW.dot(Hu)
    DW = D.dot(W)
    AW = A.dot(W)

    itNum = 1
    delta = 2.0 * epsilon

    ObjHist = []

    while True:

        # update H
        Hi_1 = np.divide(
            (alpha * WtR), np.maximum(alpha * WtWHi + lamb * Hi, 1e-10))
        Hi = np.multiply(Hi, Hi_1)

        Hu_1 = np.divide(
            (gamma * WtXu), np.maximum(gamma * WtWHu + lamb * Hu, 1e-10))
        Hu = np.multiply(Hu, Hu_1)

        # update W
        W_t1 = alpha * R.dot(Hi.T) + gamma * Xu.dot(Hu.T) + beta * AW
        W_t2 = alpha * W.dot(Hi.dot(Hi.T)) + gamma * \
            W.dot(Hu.dot(Hu.T)) + beta * DW + lamb * W
        W_t3 = np.divide(W_t1, np.maximum(W_t2, 1e-10))
        W = np.multiply(W, W_t3)

        # calculate objective function
        WtW = W.T.dot(W)
        WtR = W.T.dot(R)
        WtXu = W.T.dot(Xu)
        WtWHi = WtW.dot(Hi)
        WtWHu = WtW.dot(Hu)
        DW = D.dot(W)
        AW = A.dot(W)

        tr1 = alpha * (trRtR - 2. * tr(Hi, WtR) + tr(Hi, WtWHi))
        tr2 = gamma * (trXutXu - 2. * tr(Hu, WtXu) + tr(Hu, WtWHu))
        tr3 = beta * (tr(W, DW) - tr(W, AW))
        tr4 = lamb * (np.trace(WtW) + tr(Hi, Hi) + tr(Hu, Hu))

        Obj = tr1 + tr2 + tr3 + tr4
        ObjHist.append(Obj)

        if itNum > 1:
            delta = abs(ObjHist[-1] - ObjHist[-2])
            if verbose:
                print ("Iteration: ", itNum, "Objective: ", Obj, "Delta: ", delta)
            if itNum > maxiter or delta < epsilon:
                break

        itNum += 1

    return W, Hu, Hi

def get_recommendations(x_u, Hu, Hi, topk=10):
    w = np.linalg.lstsq(Hu.T, x_u, rcond=-1)[0]
    r = (Hi.T.dot(w)).T
    return np.flip(np.argsort(r)[-topk:], axis=0)

In [9]:
import pandas as pd

In [101]:
data_dir = '/home/vadim/playlist_generation/data/random_data'

In [151]:
tracks = pd.read_csv('{}/tracks.csv'.format(data_dir), index_col=0)
artists = pd.read_csv('{}/artists.csv'.format(data_dir), index_col=0)
albums = pd.read_csv('{}/albums.csv'.format(data_dir), index_col=0)
transactions = pd.read_csv('{}/transactions.csv'.format(data_dir), index_col=0)
playlists = pd.read_csv('{}/playlists.csv'.format(data_dir), index_col=0)

In [152]:
albums.shape

(2089, 2)

In [153]:
tracks.shape

(2200, 4)

In [154]:
artists.shape

(1817, 2)

In [155]:
playlists.name.unique().shape

(854,)

In [156]:
transactions.shape

(5706, 2)

In [157]:
def get_new_items(items):
    items_new_old = pd.DataFrame({'new': np.arange(items.shape[0]), 'old': items.iloc[:, 0].values})
    ids = np.arange(items.shape[0]).reshape(-1, 1)
    data = items.iloc[:, 1:].values
    new_items = pd.DataFrame(np.hstack((ids, data)), columns=items.columns)
    return new_items, items_new_old

In [158]:
def get_new_tracks(tracks, albums_old_new, artists_old_new):
    ar_ids = []
    al_ids = []
    for el in tracks.itertuples():
        ar_ids.append(artists_old_new[artists_old_new.old == el.artistid].new.values[0])
        al_ids.append(albums_old_new[albums_old_new.old == el.albumid].new.values[0])
    new_tracks = tracks.copy()
    new_tracks.artistid = ar_ids
    new_tracks.albumid = al_ids
    return get_new_items(new_tracks)

In [159]:
def get_new_transactions(transactions, tracks_old_new, playlists_old_new):
    p_ids = []
    t_ids = []
    for el in transactions.itertuples():
        p_ids.append(playlists_old_new[playlists_old_new.old == el.userid].new.values[0])
        t_ids.append(tracks_old_new[tracks_old_new.old == el.trackid].new.values[0])
    return pd.DataFrame({'pid': p_ids, 'trackid': t_ids})

In [175]:
def get_R_and_Xu(tracks, albums, artists, playlists, transactions):
    R = np.zeros((playlists.shape[0], tracks.shape[0]))
    for el in transactions.itertuples():
        R[el.pid, el.trackid] = 1
    Xu = np.zeros((playlists.shape[0], artists.shape[0]))
    for pid in playlists['pid']:
        for track in tracks.itertuples():
            if R[pid, track.trackid] == 1:
                Xu[pid, track.artistid] = 1
    return R, Xu

In [161]:
new_albums, albums_old_new = get_new_items(albums)

In [162]:
new_albums.head()

Unnamed: 0,albumid,popular
0,0,19
1,1,960
2,2,242
3,3,22750
4,4,2


In [163]:
albums_old_new.head()

Unnamed: 0,new,old
0,0,65575
1,1,7353
2,2,13527
3,3,357
4,4,148399


In [164]:
new_artists, artists_old_new = get_new_items(artists)

In [165]:
new_playlists, playlists_old_new = get_new_items(playlists)

In [166]:
new_playlists.head()

Unnamed: 0,pid,name,num_followers
0,0,going out,1
1,1,FALL '16,5
2,2,content,1
3,3,Rap,1
4,4,Woods,1


In [167]:
playlists_old_new.head()

Unnamed: 0,new,old
0,0,822314
1,1,822595
2,2,822608
3,3,732129
4,4,732405


In [170]:
new_tracks, tracks_old_new = get_new_tracks(tracks, albums_old_new, artists_old_new)

In [171]:
new_tracks.head()

Unnamed: 0,trackid,popular,artistid,albumid
0,0,4862,385,1279
1,1,7706,1643,1710
2,2,473,1295,1086
3,3,174,1313,484
4,4,6656,66,833


In [172]:
new_transactions = get_new_transactions(transactions, tracks_old_new, playlists_old_new)

In [173]:
new_transactions.head()

Unnamed: 0,pid,trackid
0,0,243
1,0,389
2,0,2084
3,1,178
4,2,869


In [176]:
R, Xu = get_R_and_Xu(new_tracks, new_albums, new_artists, new_playlists, new_transactions)

In [177]:
R.shape

(1000, 2200)

In [178]:
Xu.shape

(1000, 1817)

In [179]:
A = construct_A(Xu, 10, binary=True)

In [182]:
W, Hu, Hi = LCE(R, Xu, A)

Iteration:  2 Objective:  955.5861173056187 Delta:  2386.6267236437625
Iteration:  3 Objective:  -392.00176742361793 Delta:  1347.5878847292365
Iteration:  4 Objective:  -439.4319381218032 Delta:  47.43017069818529
Iteration:  5 Objective:  128.69620109775713 Delta:  568.1281392195604
Iteration:  6 Objective:  928.6899217561408 Delta:  799.9937206583837
Iteration:  7 Objective:  1699.5117826112364 Delta:  770.8218608550956
Iteration:  8 Objective:  2343.3830283454517 Delta:  643.8712457342153
Iteration:  9 Objective:  2859.5645931555346 Delta:  516.1815648100828
Iteration:  10 Objective:  3278.7073536108546 Delta:  419.14276045532006
Iteration:  11 Objective:  3630.394854737739 Delta:  351.6875011268844
Iteration:  12 Objective:  3934.1572927571947 Delta:  303.76243801945566
Iteration:  13 Objective:  4200.539636110372 Delta:  266.3823433531775
Iteration:  14 Objective:  4434.553436545745 Delta:  234.013800435373
Iteration:  15 Objective:  4638.586912757755 Delta:  204.0334762120101
It

Iteration:  119 Objective:  4516.909367505341 Delta:  0.31869449725672894
Iteration:  120 Objective:  4516.499742073666 Delta:  0.40962543167461263
Iteration:  121 Objective:  4515.988275523314 Delta:  0.511466550352452
Iteration:  122 Objective:  4515.364576398482 Delta:  0.6236991248315462
Iteration:  123 Objective:  4514.614339549944 Delta:  0.7502368485384068
Iteration:  124 Objective:  4513.7138302123885 Delta:  0.900509337555377
Iteration:  125 Objective:  4512.634892520332 Delta:  1.07893769205657
Iteration:  126 Objective:  4511.329212679315 Delta:  1.3056798410170813
Iteration:  127 Objective:  4509.728917935614 Delta:  1.6002947437009425
Iteration:  128 Objective:  4507.798918100378 Delta:  1.9299998352362309
Iteration:  129 Objective:  4505.624734439524 Delta:  2.174183660853487
Iteration:  130 Objective:  4503.356816461068 Delta:  2.267917978456353
Iteration:  131 Objective:  4501.112494661745 Delta:  2.2443217993231883
Iteration:  132 Objective:  4498.9756884993385 Delta: 

In [227]:
get_recommendations(Xu[5], Hu, Hi, topk=10)

array([2140,  402, 1286,  988,  875,    8, 1415, 1762,  367, 1963])

In [228]:
new_transactions[new_transactions.pid == 5]

Unnamed: 0,pid,trackid
15,5,420
16,5,2026
17,5,629


In [206]:
np.linalg.lstsq([[1,2,3], [4,5,6], [7,8,9]], [6,15,24], rcond=-1)

(array([1., 1., 1.]),
 array([], dtype=float64),
 2,
 array([1.68481034e+01, 1.06836951e+00, 4.41842475e-16]))