In [1]:
import argparse
from utils.io import load_numpy
import numpy as np
from experiment.latent_analysis import latent_analysis
from experiment.popular_analysis import popular_overlapping
from utils.argument import shape
from utils.argument import check_float_positive, check_int_positive, shape

from models.lrec import embedded_lrec_items
from models.weighted_lrec import weighted_lrec_items
from models.pure_svd import pure_svd, eigen_boosted_pure_svd
from models.als import als
from models.pmi_lrec import pmi_lrec_items
from models.weighted_pmi_lrec import weighted_pmi_lrec_items
from models.chainitemitem import chain_item_item
from models.predictor import predict
import pandas as pd


Widget registration using a string name has been deprecated. Widget registration now uses a plain `@register` decorator.



In [2]:
argpath = 'datax/'
argtrain = 'Rtrain.npz'
argvalid = 'Rvalid.npz'
argindex = 'Index.npy'
argside = 'ml-1m/movies.dat'

In [3]:
R_train = load_numpy(path=argpath, name=argtrain)
Index = np.load(argpath+argindex)
Side_info = pd.read_csv(argpath+argside, delimiter='::', names=['index', 'name', 'type'], encoding='utf-8')





In [4]:
R_train.shape

(6038, 3533)

In [5]:
Side_info.size

11649

In [6]:
np.asarray(np.sum(R_train,axis=0)).reshape(-1).shape

(3533,)

In [8]:
Side_info = Side_info[Side_info['index'].isin(Index)].reset_index(drop=True)

In [9]:
Side_info['popularity'] = np.asarray(np.sum(R_train,axis=0)).reshape(-1)

In [10]:
Side_info['notes'] = Side_info['name'] + '<br>' + Side_info['type'] + '<br>Popularity:' +  Side_info['popularity'].astype(str)

In [11]:
Side_info

Unnamed: 0,index,name,type,popularity,notes
0,1,Toy Story (1995),Animation|Children's|Comedy,1047.0,Toy Story (1995)<br>Animation|Children's|Comed...
1,2,Jumanji (1995),Adventure|Children's|Fantasy,108.0,Jumanji (1995)<br>Adventure|Children's|Fantasy...
2,3,Grumpier Old Men (1995),Comedy|Romance,59.0,Grumpier Old Men (1995)<br>Comedy|Romance<br>P...
3,4,Waiting to Exhale (1995),Comedy|Drama,18.0,Waiting to Exhale (1995)<br>Comedy|Drama<br>Po...
4,5,Father of the Bride Part II (1995),Comedy,28.0,Father of the Bride Part II (1995)<br>Comedy<b...
5,6,Heat (1995),Action|Crime|Thriller,364.0,Heat (1995)<br>Action|Crime|Thriller<br>Popula...
6,7,Sabrina (1995),Comedy|Romance,72.0,Sabrina (1995)<br>Comedy|Romance<br>Popularity...
7,8,Tom and Huck (1995),Adventure|Children's,7.0,Tom and Huck (1995)<br>Adventure|Children's<br...
8,9,Sudden Death (1995),Action,12.0,Sudden Death (1995)<br>Action<br>Popularity:12.0
9,10,GoldenEye (1995),Action|Adventure|Thriller,219.0,GoldenEye (1995)<br>Action|Adventure|Thriller<...


In [131]:
params = {
    'models': {"PLRec": embedded_lrec_items,
               "PmiPLRec": pmi_lrec_items,
               "ALS": als
               },
    'alphas': 10,
    'rank': 100,
    'lambda': 0.01,
    'topK': 10,
    'iter': 7,
    'metric': ['R-Precision', 'NDCG'],
}


In [13]:
item_popularity = np.array(np.sum(R_train, axis=0)).flatten()

In [14]:
RQ, Yt, _ = params['models']['PmiPLRec'](R_train, embeded_matrix=np.empty((0)),
                                         iteration=params['iter'], rank=params['rank'],
                                         lam=params['lambda'], alpha=params['alphas'], seed=1)
RQ_pmi = RQ
Y_pmi = Yt.T

  0%|          | 0/6038 [00:00<?, ?it/s]

############### Create PMI matrix ################



100%|██████████| 6038/6038 [00:03<00:00, 1713.21it/s]


################# Randomized SVD #################

Elapsed: 00:00:00
######## Closed-Form Linear Optimization #########

Elapsed: 00:00:00


In [15]:
from sklearn.manifold import TSNE

In [16]:
Y_embedded = TSNE(n_components=2).fit_transform(Y_pmi)

In [17]:
import plotly.plotly as py
import plotly
plotly.tools.set_credentials_file(username='wuga', api_key='gJlTOHX9OA6h7rlUlkWo')
import plotly.graph_objs as go

In [18]:
item_popularity

array([1047.,  108.,   59., ...,    6.,   15.,  130.])

In [19]:
trace0 = go.Scatter(
    x=Y_embedded[:, 0],
    y=Y_embedded[:, 1],
    mode='markers',
        marker = dict(
        size = 8,
        color = item_popularity,
        line = dict(
            width = 2,
        )
    ),
    #text=Side_info[Side_info['index'].isin(Index)]['type'].as_matrix()
    text=Side_info['notes'].str.decode('iso-8859-1').str.encode('utf-8').tolist()
)

data = [trace0]
py.iplot(data, filename='PMIPLREC')


High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~wuga/0 or inside your plot.ly account where it is named 'PMIPLREC'


In [20]:
# RQ, Yt, _ = params['models']['ALS'](R_train, embeded_matrix=np.empty((0)),
#                                          iteration=params['iter'], rank=params['rank'],
#                                          lam=params['lambda'], alpha=params['alphas'], seed=1)
# RQ_als = RQ
# Y_als = Yt.T

###### Alternative Item-wised Optimization #######

------------------ Iteration: 0 ------------------



100%|██████████| 6038/6038 [00:08<00:00, 728.65it/s]
100%|██████████| 3533/3533 [00:07<00:00, 469.26it/s]
  1%|          | 75/6038 [00:00<00:07, 746.19it/s]

------------------ Iteration: 1 ------------------



100%|██████████| 6038/6038 [00:08<00:00, 751.13it/s]
100%|██████████| 3533/3533 [00:07<00:00, 478.78it/s]
  1%|          | 75/6038 [00:00<00:07, 745.41it/s]

------------------ Iteration: 2 ------------------



100%|██████████| 6038/6038 [00:07<00:00, 755.91it/s]
100%|██████████| 3533/3533 [00:07<00:00, 475.52it/s]
  1%|▏         | 76/6038 [00:00<00:07, 753.64it/s]

------------------ Iteration: 3 ------------------



100%|██████████| 6038/6038 [00:07<00:00, 765.76it/s]
100%|██████████| 3533/3533 [00:07<00:00, 460.10it/s]
  1%|          | 71/6038 [00:00<00:08, 703.53it/s]

------------------ Iteration: 4 ------------------



100%|██████████| 6038/6038 [00:08<00:00, 719.89it/s]
100%|██████████| 3533/3533 [00:07<00:00, 466.13it/s]
  1%|          | 69/6038 [00:00<00:08, 687.15it/s]

------------------ Iteration: 5 ------------------



100%|██████████| 6038/6038 [00:08<00:00, 751.62it/s]
100%|██████████| 3533/3533 [00:07<00:00, 492.13it/s]
  1%|▏         | 79/6038 [00:00<00:07, 787.44it/s]

------------------ Iteration: 6 ------------------



100%|██████████| 6038/6038 [00:07<00:00, 756.18it/s]
100%|██████████| 3533/3533 [00:07<00:00, 466.15it/s]


In [None]:
RQ_als = np.load('latent/U_{0}_{1}.npy'.format("ALS", params['rank']))
Y_als = np.load('latent/V_{0}_{1}.npy'.format("ALS", params['rank']))

In [21]:
Y_als_embedded = TSNE(n_components=2).fit_transform(Y_als)

In [22]:
trace1 = go.Scatter(
    x=Y_als_embedded[:, 0],
    y=Y_als_embedded[:, 1],
    mode='markers',
    marker = dict(
        size = 8,
        color = item_popularity,
        line = dict(
            width = 2,
        )
    ),
    #text=Side_info[Side_info['index'].isin(Index)]['type'].as_matrix()
    text=Side_info['notes'].str.decode('iso-8859-1').str.encode('utf-8').tolist()
)

data = [trace1]
py.iplot(data, filename='ALS')

In [23]:
RQ, Yt, _ = params['models']['PLR'](R_train, embeded_matrix=np.empty((0)),
                                         iteration=params['iter'], rank=params['rank'],
                                         lam=params['lambda'], alpha=params['alphas'], seed=1)
RQ_lrec = RQ
Y_lrec = Yt.T

################# Randomized SVD #################

Elapsed: 00:00:00
######## Closed-Form Linear Optimization #########

Elapsed: 00:00:00


In [24]:
Y_plrec_embedded = TSNE(n_components=2).fit_transform(Y_lrec)

In [25]:
trace2 = go.Scatter(
    x=Y_plrec_embedded[:, 0],
    y=Y_plrec_embedded[:, 1],
    mode='markers',
    marker = dict(
        size = 8,
        color = item_popularity,
        line = dict(
            width = 2,
        )
    ),
    #text=Side_info[Side_info['index'].isin(Index)]['type'].as_matrix()
    text=Side_info['notes'].str.decode('iso-8859-1').str.encode('utf-8').tolist()
)

data = [trace2]
py.iplot(data, filename='PLREC')

# KNN

In [26]:
def getNearestNeighbors(query, embedding, k):
    names = (Side_info['notes']
             .str.decode('iso-8859-1')
             .str.encode('utf-8').tolist())
    indices = [i for i, s in enumerate(names) if query in s][0]
    rank = np.argsort(np.sum(np.square(embedding-embedding[indices]), axis=1))
    return [names[n] for n in rank[:k+1]]

## Titanic

In [27]:
getNearestNeighbors('Titanic (1997)', Y_pmi, 10)

['Titanic (1997)<br>Drama|Romance<br>Popularity:502.0',
 'Jerry Maguire (1996)<br>Drama|Romance<br>Popularity:412.0',
 'Ever After: A Cinderella Story (1998)<br>Drama|Romance<br>Popularity:118.0',
 'Bridges of Madison County, The (1995)<br>Drama|Romance<br>Popularity:64.0',
 'Far and Away (1992)<br>Drama|Romance<br>Popularity:84.0',
 'Rob Roy (1995)<br>Drama|Romance|War<br>Popularity:147.0',
 'Ghost (1990)<br>Comedy|Romance|Thriller<br>Popularity:215.0',
 'Notting Hill (1999)<br>Comedy|Romance<br>Popularity:182.0',
 'Pretty Woman (1990)<br>Comedy|Romance<br>Popularity:239.0',
 'Sabrina (1995)<br>Comedy|Romance<br>Popularity:72.0',
 'It Could Happen to You (1994)<br>Drama|Romance<br>Popularity:50.0']

In [28]:
getNearestNeighbors('Titanic (1997)', Y_als, 10)

['Titanic (1997)<br>Drama|Romance<br>Popularity:502.0',
 'Jerry Maguire (1996)<br>Drama|Romance<br>Popularity:412.0',
 "Angela's Ashes (1999)<br>Drama<br>Popularity:65.0",
 'Ever After: A Cinderella Story (1998)<br>Drama|Romance<br>Popularity:118.0',
 'Liar Liar (1997)<br>Comedy<br>Popularity:119.0',
 'Few Good Men, A (1992)<br>Crime|Drama<br>Popularity:331.0',
 'Sense and Sensibility (1995)<br>Drama|Romance<br>Popularity:359.0',
 'City of Angels (1998)<br>Romance<br>Popularity:50.0',
 "Mr. Holland's Opus (1995)<br>Drama<br>Popularity:125.0",
 'Dumb & Dumber (1994)<br>Comedy<br>Popularity:116.0',
 'Dances with Wolves (1990)<br>Adventure|Drama|Western<br>Popularity:484.0']

In [29]:
getNearestNeighbors('Titanic (1997)', Y_lrec, 10)

['Titanic (1997)<br>Drama|Romance<br>Popularity:502.0',
 'Jerry Maguire (1996)<br>Drama|Romance<br>Popularity:412.0',
 'Ever After: A Cinderella Story (1998)<br>Drama|Romance<br>Popularity:118.0',
 'American President, The (1995)<br>Comedy|Drama|Romance<br>Popularity:326.0',
 "Mr. Holland's Opus (1995)<br>Drama<br>Popularity:125.0",
 'Sliding Doors (1998)<br>Drama|Romance<br>Popularity:123.0',
 'Notting Hill (1999)<br>Comedy|Romance<br>Popularity:182.0',
 'City of Angels (1998)<br>Romance<br>Popularity:50.0',
 'It Could Happen to You (1994)<br>Drama|Romance<br>Popularity:50.0',
 'Sabrina (1995)<br>Comedy|Romance<br>Popularity:72.0',
 'Bridges of Madison County, The (1995)<br>Drama|Romance<br>Popularity:64.0']

## Star War

In [30]:
getNearestNeighbors('Star War', Y_pmi, 10)

['Star Wars: Episode IV - A New Hope (1977)<br>Action|Adventure|Fantasy|Sci-Fi<br>Popularity:1894.0',
 'Star Wars: Episode V - The Empire Strikes Back (1980)<br>Action|Adventure|Drama|Sci-Fi|War<br>Popularity:1839.0',
 'Smoke (1995)<br>Drama<br>Popularity:45.0',
 'Death and the Maiden (1994)<br>Drama|Thriller<br>Popularity:27.0',
 'True Grit (1969)<br>Adventure|Western<br>Popularity:84.0',
 'Beautiful Thing (1996)<br>Drama|Romance<br>Popularity:39.0',
 "Darby O'Gill and the Little People (1959)<br>Adventure|Children's|Fantasy<br>Popularity:30.0",
 'Man Who Would Be King, The (1975)<br>Adventure<br>Popularity:115.0',
 "Nobody's Fool (1994)<br>Drama<br>Popularity:49.0",
 'My Own Private Idaho (1991)<br>Drama<br>Popularity:37.0',
 'He Got Game (1998)<br>Drama<br>Popularity:27.0']

In [31]:
getNearestNeighbors('Star War', Y_als, 10)

['Star Wars: Episode IV - A New Hope (1977)<br>Action|Adventure|Fantasy|Sci-Fi<br>Popularity:1894.0',
 'Star Wars: Episode V - The Empire Strikes Back (1980)<br>Action|Adventure|Drama|Sci-Fi|War<br>Popularity:1839.0',
 'Raiders of the Lost Ark (1981)<br>Action|Adventure<br>Popularity:1588.0',
 'Alien (1979)<br>Action|Horror|Sci-Fi|Thriller<br>Popularity:1094.0',
 'Terminator, The (1984)<br>Action|Sci-Fi|Thriller<br>Popularity:1015.0',
 'Matrix, The (1999)<br>Action|Sci-Fi|Thriller<br>Popularity:1490.0',
 'Duel in the Sun (1946)<br>Western<br>Popularity:26.0',
 'Picnic at Hanging Rock (1975)<br>Drama|Mystery<br>Popularity:53.0',
 'Blowup (1966)<br>Drama|Mystery<br>Popularity:90.0',
 'Our Town (1940)<br>Drama<br>Popularity:13.0',
 'Jurassic Park (1993)<br>Action|Adventure|Sci-Fi<br>Popularity:1133.0']

In [32]:
getNearestNeighbors('Star War', Y_lrec, 10)

['Star Wars: Episode IV - A New Hope (1977)<br>Action|Adventure|Fantasy|Sci-Fi<br>Popularity:1894.0',
 "Willy Wonka and the Chocolate Factory (1971)<br>Adventure|Children's|Comedy|Fantasy<br>Popularity:384.0",
 'Lawrence of Arabia (1962)<br>Adventure|War<br>Popularity:404.0',
 'Great Escape, The (1963)<br>Adventure|War<br>Popularity:311.0',
 'Treasure of the Sierra Madre, The (1948)<br>Adventure<br>Popularity:206.0',
 'Man Who Would Be King, The (1975)<br>Adventure<br>Popularity:115.0',
 'Dazed and Confused (1993)<br>Comedy<br>Popularity:195.0',
 'Splash (1984)<br>Comedy|Fantasy|Romance<br>Popularity:223.0',
 'Jerk, The (1979)<br>Comedy<br>Popularity:129.0',
 'Breaking Away (1979)<br>Drama<br>Popularity:196.0',
 'Sixteen Candles (1984)<br>Comedy<br>Popularity:182.0']

## Lion King

In [33]:
getNearestNeighbors('Lion King', Y_pmi, 10)

["Lion King, The (1994)<br>Animation|Children's|Musical<br>Popularity:325.0",
 "Mulan (1998)<br>Animation|Children's<br>Popularity:135.0",
 "Tarzan (1999)<br>Animation|Children's<br>Popularity:129.0",
 "Beauty and the Beast (1991)<br>Animation|Children's|Musical<br>Popularity:366.0",
 "Hunchback of Notre Dame, The (1996)<br>Animation|Children's|Musical<br>Popularity:77.0",
 "Little Mermaid, The (1989)<br>Animation|Children's|Comedy|Musical|Romance<br>Popularity:286.0",
 "Robin Hood (1973)<br>Animation|Children's<br>Popularity:87.0",
 "James and the Giant Peach (1996)<br>Animation|Children's|Musical<br>Popularity:129.0",
 "Aladdin (1992)<br>Animation|Children's|Comedy|Musical<br>Popularity:482.0",
 "Antz (1998)<br>Animation|Children's<br>Popularity:142.0",
 "Alice in Wonderland (1951)<br>Animation|Children's|Musical<br>Popularity:136.0"]

In [34]:
getNearestNeighbors('Lion King', Y_als, 10)

["Lion King, The (1994)<br>Animation|Children's|Musical<br>Popularity:325.0",
 "Beauty and the Beast (1991)<br>Animation|Children's|Musical<br>Popularity:366.0",
 "Mulan (1998)<br>Animation|Children's<br>Popularity:135.0",
 "Antz (1998)<br>Animation|Children's<br>Popularity:142.0",
 "Tarzan (1999)<br>Animation|Children's<br>Popularity:129.0",
 "Hunchback of Notre Dame, The (1996)<br>Animation|Children's|Musical<br>Popularity:77.0",
 "Cinderella (1950)<br>Animation|Children's|Musical<br>Popularity:143.0",
 "Aladdin (1992)<br>Animation|Children's|Comedy|Musical<br>Popularity:482.0",
 "Anastasia (1997)<br>Animation|Children's|Musical<br>Popularity:71.0",
 "Sleeping Beauty (1959)<br>Animation|Children's|Musical<br>Popularity:112.0",
 "Muppet Christmas Carol, The (1992)<br>Children's|Musical<br>Popularity:51.0"]

In [35]:
getNearestNeighbors('Lion King', Y_lrec, 10)

["Lion King, The (1994)<br>Animation|Children's|Musical<br>Popularity:325.0",
 "Mulan (1998)<br>Animation|Children's<br>Popularity:135.0",
 "Beauty and the Beast (1991)<br>Animation|Children's|Musical<br>Popularity:366.0",
 "Peter Pan (1953)<br>Animation|Children's|Fantasy|Musical<br>Popularity:151.0",
 "Tarzan (1999)<br>Animation|Children's<br>Popularity:129.0",
 "Hunchback of Notre Dame, The (1996)<br>Animation|Children's|Musical<br>Popularity:77.0",
 "Little Mermaid, The (1989)<br>Animation|Children's|Comedy|Musical|Romance<br>Popularity:286.0",
 "Cinderella (1950)<br>Animation|Children's|Musical<br>Popularity:143.0",
 "Robin Hood (1973)<br>Animation|Children's<br>Popularity:87.0",
 "Charlotte's Web (1973)<br>Animation|Children's<br>Popularity:129.0",
 "Antz (1998)<br>Animation|Children's<br>Popularity:142.0"]

## Mask

In [36]:
getNearestNeighbors('Mask', Y_pmi, 10)

['Mask, The (1994)<br>Comedy|Crime|Fantasy<br>Popularity:217.0',
 "Santa Clause, The (1994)<br>Children's|Comedy|Fantasy<br>Popularity:115.0",
 'Nutty Professor, The (1996)<br>Comedy|Fantasy|Romance|Sci-Fi<br>Popularity:137.0',
 'Liar Liar (1997)<br>Comedy<br>Popularity:119.0',
 'Heart and Souls (1993)<br>Comedy|Fantasy<br>Popularity:57.0',
 'Legend (1985)<br>Adventure|Fantasy|Romance<br>Popularity:63.0',
 'Cool Runnings (1993)<br>Comedy<br>Popularity:53.0',
 'Mother (1996)<br>Comedy<br>Popularity:38.0',
 'Cable Guy, The (1996)<br>Comedy<br>Popularity:25.0',
 'Black Sheep (1996)<br>Comedy<br>Popularity:23.0',
 'Man Who Knew Too Little, The (1997)<br>Comedy|Mystery<br>Popularity:29.0']

In [37]:
getNearestNeighbors('Mask', Y_als, 10)

['Mask, The (1994)<br>Comedy|Crime|Fantasy<br>Popularity:217.0',
 "Santa Clause, The (1994)<br>Children's|Comedy|Fantasy<br>Popularity:115.0",
 'Liar Liar (1997)<br>Comedy<br>Popularity:119.0',
 'Nutty Professor, The (1996)<br>Comedy|Fantasy|Romance|Sci-Fi<br>Popularity:137.0',
 "Small Soldiers (1998)<br>Animation|Children's|Fantasy|War<br>Popularity:40.0",
 "Borrowers, The (1997)<br>Adventure|Children's|Comedy|Fantasy<br>Popularity:28.0",
 "Jumanji (1995)<br>Adventure|Children's|Fantasy<br>Popularity:108.0",
 "FairyTale: A True Story (1997)<br>Children's|Drama|Fantasy<br>Popularity:22.0",
 'That Thing You Do! (1996)<br>Comedy<br>Popularity:96.0',
 "Indian in the Cupboard, The (1995)<br>Adventure|Children's|Fantasy<br>Popularity:69.0",
 'Legend (1985)<br>Adventure|Fantasy|Romance<br>Popularity:63.0']

In [38]:
getNearestNeighbors('Mask', Y_lrec, 10)

['Mask, The (1994)<br>Comedy|Crime|Fantasy<br>Popularity:217.0',
 "Santa Clause, The (1994)<br>Children's|Comedy|Fantasy<br>Popularity:115.0",
 'Cool Runnings (1993)<br>Comedy<br>Popularity:53.0',
 "Small Soldiers (1998)<br>Animation|Children's|Fantasy|War<br>Popularity:40.0",
 'Sister Act (1992)<br>Comedy|Crime<br>Popularity:89.0',
 'Nutty Professor, The (1996)<br>Comedy|Fantasy|Romance|Sci-Fi<br>Popularity:137.0',
 'Toys (1992)<br>Action|Comedy|Fantasy<br>Popularity:47.0',
 "Flubber (1997)<br>Children's|Comedy|Fantasy<br>Popularity:29.0",
 "Borrowers, The (1997)<br>Adventure|Children's|Comedy|Fantasy<br>Popularity:28.0",
 "Jumanji (1995)<br>Adventure|Children's|Fantasy<br>Popularity:108.0",
 "Indian in the Cupboard, The (1995)<br>Adventure|Children's|Fantasy<br>Popularity:69.0"]

## Amityville

In [39]:
getNearestNeighbors('Amityville Horror', Y_pmi, 10)

['Amityville Horror, The (1979)<br>Horror<br>Popularity:41.0',
 'Exorcist III, The (1990)<br>Horror<br>Popularity:15.0',
 'Masque of the Red Death, The (1964)<br>Horror<br>Popularity:11.0',
 'Hunger, The (1983)<br>Horror<br>Popularity:28.0',
 'Dracula (1958)<br>Horror<br>Popularity:28.0',
 'House on Haunted Hill, The (1999)<br>Horror<br>Popularity:22.0',
 "Mummy's Curse, The (1944)<br>Horror<br>Popularity:6.0",
 'Curse of Frankenstein, The (1957)<br>Horror<br>Popularity:10.0',
 'Mummy, The (1959)<br>Horror<br>Popularity:16.0',
 'Tales from the Crypt Presents: Bordello of Blood (1996)<br>Horror<br>Popularity:17.0',
 'Friday the 13th: The Final Chapter (1984)<br>Horror<br>Popularity:7.0']

In [40]:
getNearestNeighbors('Amityville Horror', Y_als, 10)

['Amityville Horror, The (1979)<br>Horror<br>Popularity:41.0',
 'Masque of the Red Death, The (1964)<br>Horror<br>Popularity:11.0',
 'Pit and the Pendulum (1961)<br>Horror<br>Popularity:25.0',
 'Exorcist III, The (1990)<br>Horror<br>Popularity:15.0',
 'Hunger, The (1983)<br>Horror<br>Popularity:28.0',
 'Nightmare on Elm Street 4: The Dream Master, A (1988)<br>Horror<br>Popularity:12.0',
 'Fog, The (1980)<br>Horror<br>Popularity:21.0',
 'Tales from the Crypt Presents: Bordello of Blood (1996)<br>Horror<br>Popularity:17.0',
 'Friday the 13th: The Final Chapter (1984)<br>Horror<br>Popularity:7.0',
 'Friday the 13th (1980)<br>Horror<br>Popularity:45.0',
 'Mummy, The (1932)<br>Horror|Romance<br>Popularity:27.0']

In [41]:
getNearestNeighbors('Amityville Horror', Y_lrec, 10)

['Amityville Horror, The (1979)<br>Horror<br>Popularity:41.0',
 'Dracula (1958)<br>Horror<br>Popularity:28.0',
 'Exorcist III, The (1990)<br>Horror<br>Popularity:15.0',
 'Hunger, The (1983)<br>Horror<br>Popularity:28.0',
 'Masque of the Red Death, The (1964)<br>Horror<br>Popularity:11.0',
 'Cujo (1983)<br>Horror|Thriller<br>Popularity:12.0',
 'Pet Sematary (1989)<br>Horror<br>Popularity:34.0',
 'Cemetery Man (Dellamorte Dellamore) (1994)<br>Comedy|Horror<br>Popularity:27.0',
 'Fog, The (1980)<br>Horror<br>Popularity:21.0',
 'Curse of Frankenstein, The (1957)<br>Horror<br>Popularity:10.0',
 'Hellbound: Hellraiser II (1988)<br>Horror<br>Popularity:23.0']

## Star Trek

In [42]:
getNearestNeighbors('Star Trek', Y_pmi, 10)

['Star Trek: Generations (1994)<br>Action|Adventure|Sci-Fi<br>Popularity:238.0',
 'Star Trek: Insurrection (1998)<br>Action|Sci-Fi<br>Popularity:187.0',
 'Star Trek VI: The Undiscovered Country (1991)<br>Action|Adventure|Sci-Fi<br>Popularity:283.0',
 'Star Trek III: The Search for Spock (1984)<br>Action|Adventure|Sci-Fi<br>Popularity:204.0',
 'Star Trek V: The Final Frontier (1989)<br>Action|Adventure|Sci-Fi<br>Popularity:57.0',
 'Stargate (1994)<br>Action|Adventure|Sci-Fi<br>Popularity:297.0',
 'Star Trek: The Motion Picture (1979)<br>Action|Adventure|Sci-Fi<br>Popularity:137.0',
 'Lost in Space (1998)<br>Action|Sci-Fi|Thriller<br>Popularity:59.0',
 'X-Files: Fight the Future, The (1998)<br>Mystery|Sci-Fi|Thriller<br>Popularity:237.0',
 'Sphere (1998)<br>Adventure|Sci-Fi|Thriller<br>Popularity:48.0',
 'Moonraker (1979)<br>Action|Romance|Sci-Fi<br>Popularity:106.0']

In [43]:
getNearestNeighbors('Star Trek', Y_als, 10)

['Star Trek: Generations (1994)<br>Action|Adventure|Sci-Fi<br>Popularity:238.0',
 'Star Trek: Insurrection (1998)<br>Action|Sci-Fi<br>Popularity:187.0',
 'Star Trek VI: The Undiscovered Country (1991)<br>Action|Adventure|Sci-Fi<br>Popularity:283.0',
 'Star Trek: First Contact (1996)<br>Action|Adventure|Sci-Fi<br>Popularity:485.0',
 'Stargate (1994)<br>Action|Adventure|Sci-Fi<br>Popularity:297.0',
 'X-Files: Fight the Future, The (1998)<br>Mystery|Sci-Fi|Thriller<br>Popularity:237.0',
 'Star Trek V: The Final Frontier (1989)<br>Action|Adventure|Sci-Fi<br>Popularity:57.0',
 'Star Trek: The Motion Picture (1979)<br>Action|Adventure|Sci-Fi<br>Popularity:137.0',
 'Timecop (1994)<br>Action|Sci-Fi<br>Popularity:59.0',
 'Lost in Space (1998)<br>Action|Sci-Fi|Thriller<br>Popularity:59.0',
 'Star Trek III: The Search for Spock (1984)<br>Action|Adventure|Sci-Fi<br>Popularity:204.0']

In [44]:
getNearestNeighbors('Star Trek', Y_lrec, 10)

['Star Trek: Generations (1994)<br>Action|Adventure|Sci-Fi<br>Popularity:238.0',
 'Star Trek: Insurrection (1998)<br>Action|Sci-Fi<br>Popularity:187.0',
 'Star Trek VI: The Undiscovered Country (1991)<br>Action|Adventure|Sci-Fi<br>Popularity:283.0',
 'Stargate (1994)<br>Action|Adventure|Sci-Fi<br>Popularity:297.0',
 'Star Trek III: The Search for Spock (1984)<br>Action|Adventure|Sci-Fi<br>Popularity:204.0',
 'Star Trek: The Motion Picture (1979)<br>Action|Adventure|Sci-Fi<br>Popularity:137.0',
 'Star Trek V: The Final Frontier (1989)<br>Action|Adventure|Sci-Fi<br>Popularity:57.0',
 'X-Files: Fight the Future, The (1998)<br>Mystery|Sci-Fi|Thriller<br>Popularity:237.0',
 'Lost in Space (1998)<br>Action|Sci-Fi|Thriller<br>Popularity:59.0',
 'Moonraker (1979)<br>Action|Romance|Sci-Fi<br>Popularity:106.0',
 'Star Trek IV: The Voyage Home (1986)<br>Action|Adventure|Sci-Fi<br>Popularity:372.0']

In [108]:
np.argpartition(np.asarray(np.sum(R_train,axis=1)).reshape(-1)[np.asarray(np.sum(R_train,axis=1)).reshape(-1)>=2], 10)[:10]

array([1614, 5150, 4544, 3957, 3235, 4293, 4362, 1620,   20, 3487])

## Case Study 1

In [110]:
case = 1614

In [111]:
Side_info['notes'].values[R_train[case].nonzero()[1]]

array(['North by Northwest (1959)<br>Drama|Thriller<br>Popularity:799.0',
       'Almost Famous (2000)<br>Comedy|Drama<br>Popularity:429.0',
       'Requiem for a Dream (2000)<br>Drama<br>Popularity:64.0'],
      dtype=object)

In [112]:
pd.set_option('display.max_colwidth', -1)

In [113]:
def getRecommendation(R_train, RQ, Y, index, k):
    user = RQ[index]
    idx = np.argsort(-Y.dot(user))
    filtered = np.delete(idx, np.isin(idx, R_train[index].nonzero()[1]).nonzero()[0])
    return Side_info['notes'].values[filtered][:k]

In [114]:
getRecommendation(R_train, RQ_pmi, Y_pmi, case, 5)

array(['Airplane! (1980)<br>Comedy<br>Popularity:750.0',
       'High Fidelity (2000)<br>Comedy<br>Popularity:565.0',
       'Magnolia (1999)<br>Drama<br>Popularity:471.0',
       'Best in Show (2000)<br>Comedy<br>Popularity:237.0',
       'Rear Window (1954)<br>Mystery|Thriller<br>Popularity:505.0'],
      dtype=object)

In [115]:
getRecommendation(R_train, RQ_als, Y_als, case, 5)

array(['Best in Show (2000)<br>Comedy<br>Popularity:237.0',
       'Nurse Betty (2000)<br>Comedy|Thriller<br>Popularity:173.0',
       'Meet the Parents (2000)<br>Comedy<br>Popularity:258.0',
       'High Fidelity (2000)<br>Comedy<br>Popularity:565.0',
       'Contender, The (2000)<br>Drama|Thriller<br>Popularity:130.0'],
      dtype=object)

In [116]:
getRecommendation(R_train, RQ_lrec, Y_lrec, case, 5)

array(['High Fidelity (2000)<br>Comedy<br>Popularity:565.0',
       'Airplane! (1980)<br>Comedy<br>Popularity:750.0',
       'Magnolia (1999)<br>Drama<br>Popularity:471.0',
       'Rear Window (1954)<br>Mystery|Thriller<br>Popularity:505.0',
       'Stand by Me (1986)<br>Adventure|Comedy|Drama<br>Popularity:886.0'],
      dtype=object)

## Case Study 2

In [119]:
case = 4544

In [120]:
Side_info['notes'].values[R_train[case].nonzero()[1]]

array(['Jurassic Park (1993)<br>Action|Adventure|Sci-Fi<br>Popularity:1133.0',
       u"Bug's Life, A (1998)<br>Animation|Children's|Comedy<br>Popularity:761.0",
       'American Beauty (1999)<br>Comedy|Drama<br>Popularity:2265.0'],
      dtype=object)

In [121]:
getRecommendation(R_train, RQ_pmi, Y_pmi, case, 10)

array(['Braveheart (1995)<br>Action|Drama|War<br>Popularity:1391.0',
       'Men in Black (1997)<br>Action|Adventure|Comedy|Sci-Fi<br>Popularity:950.0',
       u"Toy Story (1995)<br>Animation|Children's|Comedy<br>Popularity:1047.0",
       'Terminator 2: Judgment Day (1991)<br>Action|Sci-Fi|Thriller<br>Popularity:1419.0',
       'Being John Malkovich (1999)<br>Comedy<br>Popularity:1270.0',
       u"Toy Story 2 (1999)<br>Animation|Children's|Comedy<br>Popularity:803.0",
       'Stand by Me (1986)<br>Adventure|Comedy|Drama<br>Popularity:886.0',
       'American Pie (1999)<br>Comedy<br>Popularity:527.0',
       'Shakespeare in Love (1998)<br>Comedy|Romance<br>Popularity:1243.0',
       'Clerks (1994)<br>Comedy<br>Popularity:628.0'], dtype=object)

In [122]:
getRecommendation(R_train, RQ_als, Y_als, case, 10)

array(['Braveheart (1995)<br>Action|Drama|War<br>Popularity:1391.0',
       u"Toy Story (1995)<br>Animation|Children's|Comedy<br>Popularity:1047.0",
       'Men in Black (1997)<br>Action|Adventure|Comedy|Sci-Fi<br>Popularity:950.0',
       'Being John Malkovich (1999)<br>Comedy<br>Popularity:1270.0',
       u"Toy Story 2 (1999)<br>Animation|Children's|Comedy<br>Popularity:803.0",
       'Saving Private Ryan (1998)<br>Action|Drama|War<br>Popularity:1673.0',
       'Terminator 2: Judgment Day (1991)<br>Action|Sci-Fi|Thriller<br>Popularity:1419.0',
       'Star Wars: Episode VI - Return of the Jedi (1983)<br>Action|Adventure|Romance|Sci-Fi|War<br>Popularity:1585.0',
       "Babe (1995)<br>Children's|Comedy|Drama<br>Popularity:759.0",
       'American Pie (1999)<br>Comedy<br>Popularity:527.0'], dtype=object)

In [123]:
getRecommendation(R_train, RQ_lrec, Y_lrec, case, 10)

array(['Men in Black (1997)<br>Action|Adventure|Comedy|Sci-Fi<br>Popularity:950.0',
       u"Toy Story (1995)<br>Animation|Children's|Comedy<br>Popularity:1047.0",
       u"Toy Story 2 (1999)<br>Animation|Children's|Comedy<br>Popularity:803.0",
       'American Pie (1999)<br>Comedy<br>Popularity:527.0',
       'Terminator 2: Judgment Day (1991)<br>Action|Sci-Fi|Thriller<br>Popularity:1419.0',
       'Austin Powers: The Spy Who Shagged Me (1999)<br>Comedy<br>Popularity:421.0',
       'Forrest Gump (1994)<br>Comedy|Romance|War<br>Popularity:961.0',
       'Total Recall (1990)<br>Action|Adventure|Sci-Fi|Thriller<br>Popularity:770.0',
       'Casablanca (1942)<br>Drama|Romance|War<br>Popularity:967.0',
       'Abyss, The (1989)<br>Action|Adventure|Sci-Fi|Thriller<br>Popularity:632.0'],
      dtype=object)

## Case Study 3

In [124]:
case = 3957

In [125]:
Side_info['notes'].values[R_train[case].nonzero()[1]]

array(["One Flew Over the Cuckoo's Nest (1975)<br>Drama<br>Popularity:1097.0",
       'Gladiator (2000)<br>Action|Drama<br>Popularity:867.0',
       'Shanghai Noon (2000)<br>Action<br>Popularity:298.0'], dtype=object)

In [126]:
getRecommendation(R_train, RQ_pmi, Y_pmi, case, 10)

array(['Patriot, The (2000)<br>Action|Drama|War<br>Popularity:460.0',
       'X-Men (2000)<br>Action|Sci-Fi<br>Popularity:612.0',
       'Mission: Impossible 2 (2000)<br>Action|Thriller<br>Popularity:330.0',
       u"Chicken Run (2000)<br>Animation|Children's|Comedy<br>Popularity:589.0",
       'Erin Brockovich (2000)<br>Drama<br>Popularity:536.0',
       'U-571 (2000)<br>Action|Thriller<br>Popularity:263.0',
       'Godfather, The (1972)<br>Action|Crime|Drama<br>Popularity:1438.0',
       'Perfect Storm, The (2000)<br>Action|Adventure|Thriller<br>Popularity:236.0',
       'Braveheart (1995)<br>Action|Drama|War<br>Popularity:1391.0',
       'High Fidelity (2000)<br>Comedy<br>Popularity:565.0'], dtype=object)

In [127]:
getRecommendation(R_train, RQ_als, Y_als, case, 10)

array(['Patriot, The (2000)<br>Action|Drama|War<br>Popularity:460.0',
       'X-Men (2000)<br>Action|Sci-Fi<br>Popularity:612.0',
       u"Chicken Run (2000)<br>Animation|Children's|Comedy<br>Popularity:589.0",
       'Mission: Impossible 2 (2000)<br>Action|Thriller<br>Popularity:330.0',
       'Perfect Storm, The (2000)<br>Action|Adventure|Thriller<br>Popularity:236.0',
       "Schindler's List (1993)<br>Drama|War<br>Popularity:1505.0",
       'High Fidelity (2000)<br>Comedy<br>Popularity:565.0',
       'U-571 (2000)<br>Action|Thriller<br>Popularity:263.0',
       'Remember the Titans (2000)<br>Drama<br>Popularity:159.0',
       'Galaxy Quest (1999)<br>Adventure|Comedy|Sci-Fi<br>Popularity:735.0'],
      dtype=object)

In [128]:
getRecommendation(R_train, RQ_lrec, Y_lrec, case, 10)

array(['Patriot, The (2000)<br>Action|Drama|War<br>Popularity:460.0',
       'X-Men (2000)<br>Action|Sci-Fi<br>Popularity:612.0',
       u"Chicken Run (2000)<br>Animation|Children's|Comedy<br>Popularity:589.0",
       'Mission: Impossible 2 (2000)<br>Action|Thriller<br>Popularity:330.0',
       'Erin Brockovich (2000)<br>Drama<br>Popularity:536.0',
       'L.A. Confidential (1997)<br>Crime|Film-Noir|Mystery|Thriller<br>Popularity:1268.0',
       'Perfect Storm, The (2000)<br>Action|Adventure|Thriller<br>Popularity:236.0',
       'U-571 (2000)<br>Action|Thriller<br>Popularity:263.0',
       'Braveheart (1995)<br>Action|Drama|War<br>Popularity:1391.0',
       'To Kill a Mockingbird (1962)<br>Drama<br>Popularity:473.0'],
      dtype=object)

In [65]:
Side_info['notes'][1058]

'GoodFellas (1990)<br>Crime|Drama<br>Popularity:857.0'