### Goal: Build functions to create user and rating matrices for creating recommendations for new users.

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_colwidth', -1)

## Get modeled item factors

In [3]:
comic_factors = pd.read_pickle('support_data/comics_factors_20190922.pkl')

## Get user's comics

Run through a few ID's and get lay of the land

In [4]:
comic_ids = [2171, 2637]

In [5]:
comic_factors.loc[2171]

features       [0.342639684677124, 0.03126022219657898, 0.0, 0.0, 0.0, 0.045879755169153214, 0.17573542892932892, 0.0, 0.0, 0.0, 0.0, 0.05083069950342178, 0.05866716429591179, 0.28402474522590637, 0.06713326275348663, 0.0, 0.0, 0.07417029142379761, 0.08532290160655975, 0.02406342327594757, 0.0, 0.07196555286645889, 0.03717091679573059, 0.0, 0.0, 0.07209593802690506, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.10203561186790466, 0.04380370303988457, 0.0, 0.0, 0.9630250930786133, 0.011226463131606579, 0.1936074048280716, 0.0, 0.0, 0.07123088091611862, 0.0, 0.026249762624502182, 0.0, 0.0, 0.0, 0.0, 0.03487923741340637, 0.05583704262971878, 0.0, 0.0, 0.16586895287036896, 0.0, 0.0, 0.0, 0.0, 0.0, 0.04705991595983505, 0.0, 0.0, 0.0, 0.04671696573495865, 0.0, 0.04685097932815552, 0.13326099514961243, 0.012874503619968891, 0.0, 0.21109816431999207, 0.0, 0.09588998556137085, 0.07766803354024887, 0.0, 0.09917107969522476, 0.17262506484985352]
comic_title    Fables (Vertigo)                                  

In [6]:
comic_factors.loc[2637]

features       [0.0, 0.0, 0.08305294811725616, 0.012130191549658775, 0.0, 0.0, 0.0031040313187986612, 0.0289621502161026, 0.34123972058296204, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.10401508957147598, 0.04464671388268471, 0.6006190776824951, 0.0, 0.026817915961146355, 0.0, 0.4803149700164795, 0.04294275864958763, 0.003088663099333644, 0.0, 0.0, 0.05587412044405937, 0.0, 0.0977565124630928, 0.0, 0.05319792777299881, 0.07775762677192688, 0.01118086650967598, 0.0, 0.0, 0.08575610816478729, 0.0, 0.0, 0.0, 0.08722236007452011, 0.0, 0.0, 0.0, 0.0, 0.05645575001835823, 0.6026164293289185, 0.0, 0.0, 0.0, 0.05969555303454399, 0.020829834043979645, 0.0, 0.0, 0.06889747083187103, 0.0, 0.0, 0.0, 0.008961442857980728, 0.04409479349851608, 0.0, 0.04654395207762718, 0.0, 0.29225966334342957, 0.028942612931132317, 0.043887875974178314, 0.0, 0.0024360367096960545, 0.0, 0.0, 0.0, 0.2581982910633087, 0.0, 0.15150916576385498, 0.10771816223859787]
comic_title    Gideon Falls (Image)                         

## Support Functions

In [7]:
def create_user_item_matrix(comic_ids, comic_factors):
    """
    Given list of user's comic preferences
    and a pandas df with item (comic) factors
    create a item matrix for the user
    """
    # Get rank
    num_latent_factors = len(comic_factors.features.iloc[0])

    # Initialize matrix
    comics_mtx = np.zeros(shape=(len(comic_ids), num_latent_factors))

    for index, comic in enumerate(comic_ids):
        comics_mtx[index, :] = np.array(comic_factors.loc[comic, 'features'])

    return comics_mtx

##### Let's test

In [8]:
item_mtx = create_user_item_matrix(comic_ids=comic_ids,
                                   comic_factors=comic_factors)

In [9]:
item_mtx.shape

(2, 75)

In [10]:
def create_user_impl_rate_matrix(comic_ids, ratings_list=None):
    """
    Given item matrix
    create implicit ratings matrix
    """
    if ratings_list is None:
        n = len(comic_ids)
        imp_rat_mtx = np.ones((n, 1), 'int')
    else:
        imp_rat_mtx = np.array((ratings_list, )).T

    return imp_rat_mtx

##### Let's test

In [11]:
rate_mtx = create_user_impl_rate_matrix(comic_ids=comic_ids, )

In [12]:
rate_mtx

array([[1],
       [1]])

In [13]:
def create_user_util_matrix(comics_matrix, user_rating_matrix):

    util_mtx = np.linalg.lstsq(comics_matrix, user_rating_matrix, rcond=None)

    # We just want the factors
    util_mtx = util_mtx[0].reshape((comics_matrix.shape[1], ))

    return util_mtx

##### Let's test

In [14]:
util_mtx = create_user_util_matrix(comics_matrix=item_mtx,
                                   user_rating_matrix=rate_mtx)

In [15]:
util_mtx

array([0.20815568, 0.01899077, 0.05363835, 0.00783408, 0.        ,
       0.02787223, 0.10876502, 0.01870472, 0.22038392, 0.        ,
       0.        , 0.03087996, 0.03564066, 0.17254675, 0.04078386,
       0.        , 0.06717639, 0.07389323, 0.43973388, 0.01461868,
       0.0173199 , 0.04371951, 0.33278488, 0.02773386, 0.00199476,
       0.04379872, 0.        , 0.03608536, 0.        , 0.06313439,
       0.        , 0.03435698, 0.1122057 , 0.03383199, 0.        ,
       0.        , 0.64042771, 0.00682014, 0.11761767, 0.        ,
       0.05633109, 0.04327319, 0.        , 0.01594689, 0.        ,
       0.03646099, 0.38918966, 0.        , 0.02118935, 0.03392134,
       0.03855337, 0.0134526 , 0.10076639, 0.        , 0.04449627,
       0.        , 0.        , 0.        , 0.03437678, 0.02847788,
       0.        , 0.03005963, 0.02838084, 0.18875097, 0.04715435,
       0.10930111, 0.00782134, 0.00157327, 0.12824341, 0.        ,
       0.05825375, 0.21393681, 0.        , 0.15809667, 0.17443

In [16]:
util_mtx.shape

(75,)

## Candidate Functions

In [17]:
def make_n_comic_recommendations(comics, comic_factors, top_n):
    # Create item matrix
    comic_matrix = create_user_item_matrix(comic_ids=comics,
                                           comic_factors=comic_factors)

    # Create user matrix
    user_matrix = create_user_impl_rate_matrix(comic_ids=comics)

    # Create utility matrix
    utility_matrix = create_user_util_matrix(comic_matrix, user_matrix)

    # Update comic_factors dataframe for this user -> predicted scores!
    cf = comic_factors.copy()
    u = utility_matrix
    cf['pred'] = cf['features'].apply(lambda x: np.dot(x, u))

    # Get recommendations
    top_n_df = cf.sort_values(by=['pred'], ascending=False).head(top_n).copy()
    top_n_df.reset_index(inplace=True)
    top_n_df = top_n_df.loc[:, ['comic_id', 'comic_title', 'img_url']].copy()

    return top_n_df

In [18]:
# This one, include score so can compare preds
def make_n_comic_recommendations_dev(comics, comic_factors, top_n):
    # Create item matrix
    comic_matrix = create_user_item_matrix(comic_ids=comics,
                                           comic_factors=comic_factors
                                           )

    # Create user matrix
    user_matrix = create_user_impl_rate_matrix(comic_ids=comics)

    # Create utility matrix
    utility_matrix = create_user_util_matrix(comic_matrix, user_matrix)

    # Update comic_factors dataframe for this user -> predicted scores!
    cf = comic_factors.copy()
    u = utility_matrix
    cf['pred'] = cf['features'].apply(lambda x: np.dot(x, u))

    # Get recommendations
    top_n_df = cf.sort_values(by=['pred'], ascending=False).head(top_n).copy()
    top_n_df.reset_index(inplace=True)
    top_n_df = ( top_n_df.loc[:, ['comic_id', 'comic_title', 'img_url', 'pred']]
                .copy())

    return top_n_df

In [19]:
def make_n_comic_recommendations(comics, comic_factors, top_n):
    """
    Make n comic recommendations
    Parameters
    ----------
    comics = list of comic ids (integers)
    comic_factors = pandas dataframe with comic factors
    top_n = integer, # of comic recommendations desired by user
    Results
    -------
    Pandas Dataframe of n comic recommendations, sorted descending
    by utility
    """
    # Create item matrix
    comic_matrix = create_user_item_matrix(comic_ids=comics,
                                           comic_factors=comic_factors
                                           )
    # Create user matrix
    user_matrix = create_user_impl_rate_matrix(comic_ids=comics)
    # Create utility matrix
    utility_matrix = create_user_util_matrix(comic_matrix, user_matrix)

    # Update comic_factors dataframe for this user -> predicted scores!
    cf = comic_factors.copy()
    u = utility_matrix
    cf['pred'] = cf['features'].apply(lambda x: np.dot(x, u))

    # Get recommendations
    top_n_df = cf.sort_values(by=['pred'], ascending=False).head(top_n+3).copy()
    top_n_df.reset_index(inplace=True)
    top_n_df = top_n_df.loc[:, ['comic_id', 'comic_title', 'img_url', 'pred']].copy()
    top_n_df = top_n_df[~top_n_df['comic_id'].isin(comics)].head(top_n).copy()

    return top_n_df

##### Let's test

In [33]:
comic_ids

[2171, 2637]

In [39]:
t_df = make_n_comic_recommendations(comics=comic_ids,
                                        comic_factors=comic_factors,
                                        top_n=10)

In [42]:
all_recs = t_df['comic_id'].to_list()

In [43]:
all_recs

[3917, 3908, 3913, 5177, 3912, 87, 2170, 804, 88, 375]

In [44]:
list(set(comic_ids).intersection(set(all_recs)))

[]

In [45]:
t_df.loc[t_df['comic_id'].isin(comic_ids)]

Unnamed: 0,comic_id,comic_title,img_url,pred


Well, the model definitely got the actuals correct!

What about other preferred?

In [47]:
t_df.sort_values(by=['pred'],ascending=False).head(20)

Unnamed: 0,comic_id,comic_title,img_url,pred
2,3917,Locke & Key Omega (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.825663
3,3908,Locke & Key Alpha (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.810999
4,3913,Locke & Key Guide To Known Ke (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.798321
5,5177,Royal City (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/royal_city.jpg,0.786595
6,3912,Locke & Key Grindhouse One Sh (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.777381
7,87,Ad After Death Book 01 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.759182
8,2170,Fables the Wolf Among Us (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers_large/fables_the_wolf_among_us.jpg,0.749015
9,804,Black Hammer (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers_large/black_hammer.jpg,0.740564
10,88,Ad After Death Book 02 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.728179
11,375,Ascender (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/ascender.jpg,0.71943


### Get recommendations!


In [48]:
top_n = 8

In [50]:
results = make_n_comic_recommendations(comic_ids, comic_factors, top_n)

results

Unnamed: 0,comic_id,comic_title,img_url,pred
2,3917,Locke & Key Omega (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.825663
3,3908,Locke & Key Alpha (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.810999
4,3913,Locke & Key Guide To Known Ke (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.798321
5,5177,Royal City (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/royal_city.jpg,0.786595
6,3912,Locke & Key Grindhouse One Sh (IDW),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.777381
7,87,Ad After Death Book 01 (of 3 (Image),https://comrx.s3-us-west-2.amazonaws.com/covers_large/_no_cover_.jpg,0.759182
8,2170,Fables the Wolf Among Us (Vertigo),https://comrx.s3-us-west-2.amazonaws.com/covers_large/fables_the_wolf_among_us.jpg,0.749015
9,804,Black Hammer (Dark Horse),https://comrx.s3-us-west-2.amazonaws.com/covers_large/black_hammer.jpg,0.740564
