In [1]:
import argparse
from utils.io import load_numpy
import numpy as np
from experiment.latent_analysis import latent_analysis
from experiment.popular_analysis import popular_overlapping
from utils.argument import shape
from utils.argument import check_float_positive, check_int_positive, shape

from models.lrec import embedded_lrec_items
from models.weighted_lrec import weighted_lrec_items
from models.pure_svd import pure_svd, eigen_boosted_pure_svd
from models.als import als
from models.pmi_lrec import pmi_lrec_items
from models.weighted_pmi_lrec import weighted_pmi_lrec_items
from models.chainitemitem import chain_item_item
from models.predictor import predict
from experiment.latent_analysis import latent_case_study
import pandas as pd

  from ._conv import register_converters as _register_converters

Widget registration using a string name has been deprecated. Widget registration now uses a plain `@register` decorator.



In [2]:
argpath = 'datax/'
argtrain = 'Rtrain.npz'
argvalid = 'Rvalid.npz'
argindex = 'Index.npy'
argside = 'ml-1m/movies.dat'

In [3]:
R_train = load_numpy(path=argpath, name=argtrain)
Index = np.load(argpath+argindex)
Side_info = pd.read_csv(argpath+argside, delimiter='::', names=['index', 'name', 'type'], encoding='utf-8')





In [4]:
Side_info['notes'] = Side_info['name'] + '<br>' + Side_info['type']

In [5]:
Side_info

Unnamed: 0,index,name,type,notes
0,1,Toy Story (1995),Animation|Children's|Comedy,Toy Story (1995)<br>Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy,Jumanji (1995)<br>Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance,Grumpier Old Men (1995)<br>Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama,Waiting to Exhale (1995)<br>Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy,Father of the Bride Part II (1995)<br>Comedy
5,6,Heat (1995),Action|Crime|Thriller,Heat (1995)<br>Action|Crime|Thriller
6,7,Sabrina (1995),Comedy|Romance,Sabrina (1995)<br>Comedy|Romance
7,8,Tom and Huck (1995),Adventure|Children's,Tom and Huck (1995)<br>Adventure|Children's
8,9,Sudden Death (1995),Action,Sudden Death (1995)<br>Action
9,10,GoldenEye (1995),Action|Adventure|Thriller,GoldenEye (1995)<br>Action|Adventure|Thriller


In [6]:
params = {
    'models': {"PLR": embedded_lrec_items,
               "PMI-PLR": pmi_lrec_items,
               "ALS": als
               },
    'alphas': 1,
    'rank': 50,
    'lambda': 1,
    'topK': 10,
    'iter': 7,
    'metric': ['R-Precision', 'NDCG'],
}


In [21]:
item_popularity = np.array(np.sum(R_train, axis=0)).flatten()

In [7]:
RQ, Yt, _ = params['models']['PMI-PLR'](R_train, embeded_matrix=np.empty((0)),
                                         iteration=params['iter'], rank=params['rank'],
                                         lam=params['lambda'], alpha=params['alphas'], seed=1)

Y = Yt.T

  4%|▍         | 256/6040 [00:00<00:02, 2556.50it/s]

############### Create PMI matrix ################



100%|██████████| 6040/6040 [00:02<00:00, 2979.96it/s]


################# Randomized SVD #################

Elapsed: 00:00:02
######## Closed-Form Linear Optimization #########

Elapsed: 00:00:00


In [8]:
Y

array([[-0.00290444, -0.00323393, -0.00190585, ..., -0.0062295 ,
         0.01954152,  0.00492285],
       [ 0.00286207, -0.00462039, -0.00052791, ...,  0.00522759,
        -0.00407772,  0.01437379],
       [ 0.00250544, -0.0036631 , -0.00324649, ...,  0.00043443,
         0.00968267, -0.0053311 ],
       ...,
       [ 0.00029565,  0.00050795, -0.00020962, ...,  0.0045831 ,
         0.00085058,  0.00160361],
       [ 0.00032323,  0.00097482, -0.00014559, ...,  0.00527033,
         0.0011431 ,  0.00117179],
       [ 0.00056806,  0.00090544, -0.00122308, ...,  0.00311419,
         0.00293191,  0.01422698]])

In [9]:
from sklearn.manifold import TSNE

In [10]:
Y_embedded = TSNE(n_components=2).fit_transform(Y)

In [11]:
import plotly.plotly as py
import plotly
plotly.tools.set_credentials_file(username='wuga', api_key='gJlTOHX9OA6h7rlUlkWo')
import plotly.graph_objs as go

In [23]:
item_popularity

array([7019, 1553,  877, ...,   70,  101,  854])

In [27]:
trace0 = go.Scatter(
    x=Y_embedded[:, 0],
    y=Y_embedded[:, 1],
    mode='markers',
        marker = dict(
        size = 8,
        color = item_popularity,
        line = dict(
            width = 2,
        )
    ),
    #text=Side_info[Side_info['index'].isin(Index)]['type'].as_matrix()
    text=Side_info[Side_info['index'].isin(Index)]['notes'].str.decode('iso-8859-1').str.encode('utf-8').tolist()
)

data = [trace0]
py.iplot(data, filename='text')


High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~wuga/0 or inside your plot.ly account where it is named 'text'


In [13]:
RQ, Yt, _ = params['models']['ALS'](R_train, embeded_matrix=np.empty((0)),
                                         iteration=params['iter'], rank=params['rank'],
                                         lam=params['lambda'], alpha=params['alphas'], seed=1)

Y = Yt.T

###### Alternative Item-wised Optimization #######

------------------ Iteration: 0 ------------------



100%|██████████| 6040/6040 [00:12<00:00, 482.22it/s]
100%|██████████| 3706/3706 [00:18<00:00, 205.44it/s]
  1%|          | 46/6040 [00:00<00:13, 453.80it/s]

------------------ Iteration: 1 ------------------



100%|██████████| 6040/6040 [00:13<00:00, 451.31it/s]
100%|██████████| 3706/3706 [00:15<00:00, 240.94it/s]
  1%|          | 56/6040 [00:00<00:10, 555.38it/s]

------------------ Iteration: 2 ------------------



100%|██████████| 6040/6040 [00:10<00:00, 574.46it/s]
100%|██████████| 3706/3706 [00:16<00:00, 220.79it/s]
  1%|          | 44/6040 [00:00<00:13, 436.68it/s]

------------------ Iteration: 3 ------------------



100%|██████████| 6040/6040 [00:11<00:00, 526.62it/s]
100%|██████████| 3706/3706 [00:15<00:00, 233.35it/s]
  1%|          | 56/6040 [00:00<00:10, 553.48it/s]

------------------ Iteration: 4 ------------------



100%|██████████| 6040/6040 [00:11<00:00, 522.02it/s]
100%|██████████| 3706/3706 [00:12<00:00, 294.84it/s]
  1%|          | 53/6040 [00:00<00:11, 529.81it/s]

------------------ Iteration: 5 ------------------



100%|██████████| 6040/6040 [00:12<00:00, 472.69it/s]
100%|██████████| 3706/3706 [00:16<00:00, 224.83it/s]
  1%|          | 50/6040 [00:00<00:12, 492.11it/s]

------------------ Iteration: 6 ------------------



100%|██████████| 6040/6040 [00:10<00:00, 563.59it/s]
100%|██████████| 3706/3706 [00:15<00:00, 240.17it/s]


In [14]:
Y_als_embedded = TSNE(n_components=2).fit_transform(Y)

In [29]:
trace0 = go.Scatter(
    x=Y_als_embedded[:, 0],
    y=Y_als_embedded[:, 1],
    mode='markers',
    marker = dict(
        size = 8,
        color = item_popularity,
        line = dict(
            width = 2,
        )
    ),
    #text=Side_info[Side_info['index'].isin(Index)]['type'].as_matrix()
    text=Side_info[Side_info['index'].isin(Index)]['notes'].str.decode('iso-8859-1').str.encode('utf-8').tolist()
)

data = [trace0]
py.iplot(data, filename='text')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~wuga/0 or inside your plot.ly account where it is named 'text'


In [18]:
RQ, Yt, _ = params['models']['PLR'](R_train, embeded_matrix=np.empty((0)),
                                         iteration=params['iter'], rank=params['rank'],
                                         lam=params['lambda'], alpha=params['alphas'], seed=1)

Y = Yt.T

################# Randomized SVD #################

Elapsed: 00:00:01
######## Closed-Form Linear Optimization #########

Elapsed: 00:00:00


In [19]:
Y_plrec_embedded = TSNE(n_components=2).fit_transform(Y)

In [28]:
trace0 = go.Scatter(
    x=Y_plrec_embedded[:, 0],
    y=Y_plrec_embedded[:, 1],
    mode='markers',
    marker = dict(
        size = 8,
        color = item_popularity,
        line = dict(
            width = 2,
        )
    ),
    #text=Side_info[Side_info['index'].isin(Index)]['type'].as_matrix()
    text=Side_info[Side_info['index'].isin(Index)]['notes'].str.decode('iso-8859-1').str.encode('utf-8').tolist()
)

data = [trace0]
py.iplot(data, filename='text')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~wuga/0 or inside your plot.ly account where it is named 'text'
