In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from scipy import sparse

eps = 7. / 3 - 4. / 3 - 1


def L2_norm_row(X):
    return sparse.spdiags(1. / (np.sqrt(sum(X * X, 2)) + eps), 0, len(X), len(X)) * X


def tr(A, B):
    x = A.multiply(B)
    return (x.sum(axis=0)).sum(axis=1)


def construct_A(X, k, binary=False):

    nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X)
    if binary:
        return nbrs.kneighbors_graph(X)
    else:
        return nbrs.kneighbors_graph(X, mode='distance')


def LCE(R, Xu, A, k=15, alpha=0.1, beta=0.05, lamb=0.001, epsilon=0.01, maxiter=150, verbose=True):

    n = R.shape[0]
    v1 = R.shape[1]
    v2 = Xu.shape[1]

    W = np.abs(np.random.rand(n, k))
    Hi = np.abs(np.random.rand(k, v1))
    Hu = np.abs(np.random.rand(k, v2))

    D = sparse.dia_matrix((A.sum(axis=0), 0), A.shape)

    gamma = 1. - alpha
    trRtR = tr(R, R)
    trXutXu = tr(Xu, Xu)

    WtW = W.T.dot(W)
    WtR = W.T.dot(R)
    WtXu = W.T.dot(Xu)
    WtWHi = WtW.dot(Hi)
    WtWHu = WtW.dot(Hu)
    DW = D.dot(W)
    AW = A.dot(W)

    itNum = 1
    delta = 2.0 * epsilon

    ObjHist = []

    while True:

        # update H
        Hi_1 = np.divide(
            (alpha * WtR), np.maximum(alpha * WtWHi + lamb * Hi, 1e-10))
        Hi = np.multiply(Hi, Hi_1)

        Hu_1 = np.divide(
            (gamma * WtXu), np.maximum(gamma * WtWHu + lamb * Hu, 1e-10))
        Hu = np.multiply(Hu, Hu_1)

        # update W
        W_t1 = alpha * R.dot(Hi.T) + gamma * Xu.dot(Hu.T) + beta * AW
        W_t2 = alpha * W.dot(Hi.dot(Hi.T)) + gamma * \
            W.dot(Hu.dot(Hu.T)) + beta * DW + lamb * W
        W_t3 = np.divide(W_t1, np.maximum(W_t2, 1e-10))
        W = np.multiply(W, W_t3)

        # calculate objective function
        WtW = W.T.dot(W)
        WtR = W.T.dot(R)
        WtXu = W.T.dot(Xu)
        WtWHi = WtW.dot(Hi)
        WtWHu = WtW.dot(Hu)
        DW = D.dot(W)
        AW = A.dot(W)

        tr1 = alpha * (trRtR - 2. * tr(Hi, WtR) + tr(Hi, WtWHi))
        tr2 = gamma * (trXutXu - 2. * tr(Hu, WtXu) + tr(Hu, WtWHu))
        tr3 = beta * (tr(W, DW) - tr(W, AW))
        tr4 = lamb * (np.trace(WtW) + tr(Hi, Hi) + tr(Hu, Hu))

        Obj = tr1 + tr2 + tr3 + tr4
        ObjHist.append(Obj)

        if itNum > 1:
            delta = abs(ObjHist[-1] - ObjHist[-2])
            if verbose:
                print ("Iteration: ", itNum, "Objective: ", Obj, "Delta: ", delta)
            if itNum > maxiter or delta < epsilon:
                break

        itNum += 1

    return W, Hu, Hi

def get_recommendations(x_u, Hu, Hi, topk=10):
    w = np.linalg.lstsq(Hu.T, x_u, rcond=-1)[0]
    r = (Hi.T.dot(w)).T
    return np.flip(np.argsort(r)[-topk:], axis=0)

In [2]:
import pandas as pd

In [3]:
data_dir = '/home/vadim/playlist_generation/data/random_data'

In [4]:
tracks = pd.read_csv('{}/tracks.csv'.format(data_dir), index_col=0)
artists = pd.read_csv('{}/artists.csv'.format(data_dir), index_col=0)
albums = pd.read_csv('{}/albums.csv'.format(data_dir), index_col=0)
transactions = pd.read_csv('{}/transactions.csv'.format(data_dir), index_col=0)
playlists = pd.read_csv('{}/playlists.csv'.format(data_dir), index_col=0)

In [8]:
def get_full_dataset(transactions, tracks, playlists):
    full_dataset = pd.merge(transactions, tracks, how='left', on='trackid')
    full_dataset = pd.merge(full_dataset, playlists, how='left', on='pid')
    return full_dataset

In [9]:
full_dataset = get_full_dataset(transactions, tracks, playlists)
full_dataset.head()

Unnamed: 0,pid,trackid,popular,artistid,albumid,name,num_followers
0,822314,1149,6600,636,821,going out,1
1,822314,230,5313,95,138,going out,1
2,822314,4937,4965,876,3355,going out,1
3,822595,23973,96,7340,14054,FALL '16,5
4,822595,4171,694,1845,2862,FALL '16,5


In [10]:
from polara.recommender.models import RecommenderModel

In [25]:
class LocalCollectiveEmbedings(RecommenderModel):
    import scipy.sparse
    
    def __init__(self, *args, **kwargs):
        self.method = 'LCE'
        
    def get_data(self, data):
        self.data = data
        
    def reindex(self, col, sort=True, inplace=True):
        grouper = self.data.groupby(col, sort=sort).grouper
        new_val = grouper.group_info[1]
        old_val = grouper.levels[0]
        val_transform = pd.DataFrame({'old': old_val, 'new': new_val})
        new_data = grouper.group_info[0]

        if inplace:
            result = val_transform
            self.data.loc[:, col] = new_data
        else:
            result = (new_data, val_transform)
        return result
        
    def reindex_some_columns(self, columns):
        self.index = {}
        for col in columns:
            self.index[col] = self.reindex(col)

    def get_matrixs(self):
        idx_R = self.data[['pid', 'trackid']].values
        val = np.ones(self.data.shape[0])
        shp = tuple(idx_R.max(axis=0) + 1)
        
        R = sparse.csr_matrix((val, (idx_R[:, 0], idx_R[:, 1])), 
                       shape=shp, 
                       dtype=val.dtype)
        
        idx_artist = self.data['artistid'].values
        idx_album = self.data['albumid'].values
        idx_pid = self.data['pid'].values
        
        Xu1 = sparse.csr_matrix((val, (idx_pid, idx_artist)), 
                             shape=(idx_pid.max() + 1, idx_artist.max() + 1))
        
        Xu2 = sparse.csr_matrix((val, (idx_pid, idx_album)), 
                             shape=(idx_pid.max() + 1, idx_album.max() + 1))
        
        Xu = sparse.hstack((Xu1, Xu2))
        
        return R, Xu
        
        
    def construct_A(self, X, k=15, binary=False):
        nbrs = NearestNeighbors(n_neighbors=1 + k).fit(X)
        if binary:
            A = nbrs.kneighbors_graph(X)
        else:
            A = nbrs.kneighbors_graph(X, mode='distance')
            
        return A
    
    def build(self, k=15, alpha=0.1, beta=0.05, lamb=0.001, epsilon=0.01, maxiter=150, verbose=True):
        
        R, Xu = self.get_matrixs()
        A = self.construct_A(X=Xu)
        
        n = R.shape[0]
        v1 = R.shape[1]
        v2 = Xu.shape[1]
        
        W = np.abs(sparse.rand(n, k, 0.5, 'csr', dtype=R.dtype))
        Hi = np.abs(sparse.rand(k, v1, 0.5, 'csr', dtype=R.dtype))
        Hu = np.abs(sparse.rand(k, v2, 0.5, 'csr', dtype=R.dtype))
            
        D = sparse.dia_matrix((A.sum(axis=0), 0), A.shape)

        
        gamma = 1. - alpha
        
        trRtR = tr(R, R)
        trXutXu = tr(Xu, Xu)

        WtW = W.T.dot(W)
        WtR = W.T.dot(R)
        WtXu = W.T.dot(Xu)
        WtWHi = WtW.dot(Hi)
        WtWHu = WtW.dot(Hu)
        DW = D.dot(W)
        AW = A.dot(W)

        itNum = 1
        delta = 2.0 * epsilon

        ObjHist = []

        while True:
            
            # update H
            Hi_1 = np.divide(
                (alpha * WtR), (alpha * WtWHi + lamb * Hi).maximum(1e-6))
    
            Hi = Hi.multiply(Hi_1)
            
            Hu_1 = np.divide(
                (gamma * WtXu), (gamma * WtWHu + lamb * Hu).maximum(1e-6))
            Hu = Hu.multiply(Hu_1)
            
            # update W
            W_t1 = alpha * R.dot(Hi.T) + gamma * Xu.dot(Hu.T) + beta * AW
            
            W_t2 = alpha * W.dot(Hi.dot(Hi.T)) + gamma * \
            W.dot(Hu.dot(Hu.T)) + beta * DW + lamb * W
            
            W_t3 = np.divide(W_t1, (W_t2).maximum(1e-6))
            W = W.multiply(W_t3)

            # calculate objective function
            WtW = W.T.dot(W)
            WtR = W.T.dot(R)
            WtXu = W.T.dot(Xu)
            WtWHi = WtW.dot(Hi)
            WtWHu = WtW.dot(Hu)
            DW = D.dot(W)
            AW = A.dot(W)

            tr1 = alpha * (trRtR - 2. * tr(Hi, WtR) + tr(Hi, WtWHi))
            tr2 = gamma * (trXutXu - 2. * tr(Hu, WtXu) + tr(Hu, WtWHu))
            tr3 = beta * (tr(W, DW) - tr(W, AW))
            tr4 = lamb * (WtW.diagonal().sum() + tr(Hi, Hi) + tr(Hu, Hu))

            Obj = tr1 + tr2 + tr3 + tr4
            ObjHist.append(Obj)

            if itNum > 1:
                delta = abs(ObjHist[-1] - ObjHist[-2])
                if verbose:
                    print ("Iteration: ", itNum, "Objective: ", Obj, "Delta: ", delta)
                if itNum > maxiter or delta < epsilon:
                    break

            itNum += 1
            
        self.W = W 
        self.Hu = Hu 
        self.Hi = Hi

In [26]:
a = LocalCollectiveEmbedings()
a.get_data(full_dataset)
a.reindex_some_columns(['pid', 'trackid', 'artistid', 'albumid'])

In [27]:
a.build()



Iteration:  2 Objective:  [[15734.60134476]] Delta:  [[1817.22545502]]
Iteration:  3 Objective:  [[58587.79573407]] Delta:  [[42853.19438931]]
Iteration:  4 Objective:  [[43357.07760429]] Delta:  [[15230.71812978]]
Iteration:  5 Objective:  [[17927.41089491]] Delta:  [[25429.66670939]]
Iteration:  6 Objective:  [[14309.87755584]] Delta:  [[3617.53333907]]
Iteration:  7 Objective:  [[13410.89965857]] Delta:  [[898.97789726]]
Iteration:  8 Objective:  [[12975.60548507]] Delta:  [[435.2941735]]
Iteration:  9 Objective:  [[12895.08783363]] Delta:  [[80.51765144]]
Iteration:  10 Objective:  [[13121.5947088]] Delta:  [[226.50687518]]
Iteration:  11 Objective:  [[13313.13270016]] Delta:  [[191.53799135]]
Iteration:  12 Objective:  [[13279.15791689]] Delta:  [[33.97478327]]
Iteration:  13 Objective:  [[13169.75454478]] Delta:  [[109.40337211]]
Iteration:  14 Objective:  [[13072.60758285]] Delta:  [[97.14696193]]
Iteration:  15 Objective:  [[13031.56884844]] Delta:  [[41.03873441]]
Iteration:  

Iteration:  126 Objective:  [[12595.73287561]] Delta:  [[0.55855934]]
Iteration:  127 Objective:  [[12595.55199901]] Delta:  [[0.18087661]]
Iteration:  128 Objective:  [[12594.98517522]] Delta:  [[0.56682378]]
Iteration:  129 Objective:  [[12594.77096767]] Delta:  [[0.21420755]]
Iteration:  130 Objective:  [[12594.14754377]] Delta:  [[0.62342389]]
Iteration:  131 Objective:  [[12593.8479214]] Delta:  [[0.29962238]]
Iteration:  132 Objective:  [[12593.11515318]] Delta:  [[0.73276822]]
Iteration:  133 Objective:  [[12592.70404375]] Delta:  [[0.41110943]]
Iteration:  134 Objective:  [[12591.864459]] Delta:  [[0.83958476]]
Iteration:  135 Objective:  [[12591.31234911]] Delta:  [[0.55210988]]
Iteration:  136 Objective:  [[12590.27216723]] Delta:  [[1.04018188]]
Iteration:  137 Objective:  [[12589.4533652]] Delta:  [[0.81880204]]
Iteration:  138 Objective:  [[12588.09474999]] Delta:  [[1.35861521]]
Iteration:  139 Objective:  [[12586.92204685]] Delta:  [[1.17270314]]
Iteration:  140 Objectiv