In [1]:
import pickle
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

with open("data/best_model/lfm_with_itemfeats_best.pkl", "rb") as f:
    model = pickle.load(f)
with open("data/best_model/train_user_items.pkl", "rb") as f:
    train_user_items = pickle.load(f)
with open("data/best_model/item_features.pkl", "rb") as f:
    item_features = pickle.load(f)
with open("data/best_model/encode_streamer.pkl", "rb") as f:
    encode_streamer = pickle.load(f)
    
decode_streamer = {encode_streamer[x]:x for x in encode_streamer}
n_items = 1904



In [2]:
# I retrieved my follows from the Twitch API and saved them to `data/my_follows.txt`
with open("data/my_follows.txt", "r") as f:
    my_follows = f.read().split("\n")

Note: LightFM does not define a method to get similar items like the Implicit package. We implement it below by following the answer here https://github.com/lyst/lightfm/issues/244.

In [3]:
def similar_items(item_id, item_features, model, N=5):
    N += 1 # generate N+1 recommendations and remove the item_id
    item_representations = item_features.dot(model.item_embeddings)

    # Cosine similarity
    scores = item_representations.dot(item_representations[item_id, :])
    item_norms = np.linalg.norm(item_representations, axis=1)
    scores /= item_norms

    best = np.argpartition(scores, -N)[-N:]
    best_tuples = sorted(zip(best, scores[best] / item_norms[item_id]), key=lambda x: -x[1])
    return [decode_streamer[x] for x,i in best_tuples if x != item_id][:N]

similar_items(item_id=encode_streamer["xQcOW"], item_features=item_features, model=model, N=5)

['Trainwreckstv', 'pokelawls', 'Mizkif', 'm0xyy', 'Lacari']

In [4]:
def recommend_by_user_CF(follows_ID, N=5):
    # generate interaction vector
    interactions = np.zeros(n_items)
    for ind in follows_ID:
        interactions[ind] = 1

    # get most similar user in training data
    user_sim_scores = cosine_similarity(interactions.reshape(1,-1), train_user_items)
    most_sim_user_ID = np.argmax(user_sim_scores)

    # make recommendations
    recs = model.predict(user_ids=[most_sim_user_ID], item_ids=list(range(n_items)), \
                    user_features=None, item_features=item_features)
    recs = [(decode_streamer[i],x) for i,x in enumerate(recs) if i not in follows_ID] # convert streamer_ID back to streamer_name
    recs = sorted(recs, key=lambda x: -x[1])[:N] # sort by score
    recs = list(list(zip(*recs))[0]) # remove the scores 
    
    return recs

recommend_by_user_CF(follows_ID=[encode_streamer[x] for x in my_follows if x in encode_streamer])

['Voyboy', 'Scarra', 'Jankos', 'Gosu', 'shroud']

In [5]:
def recommend_by_user_CF2(follows_ID, N=5, m=5):
    """ A generalization of recommend_by_user_CF where we identify the top m similar users
        and compute final recommendations by weighting each similar users recommendations. We use the
        formula: $final_recs = \sum_{u=1}^m \frac{recs_u}{u}$. That is, users which are deemed less
        similar are weighted less.
        
        When m=1, this is equivalent to recommend_by_user_CF.
    """
    
    # generate interaction vector
    interactions = np.zeros(n_items)
    for ind in follows_ID:
        interactions[ind] = 1

    # get most similar user in training data
    user_sim_scores = cosine_similarity(interactions.reshape(1,-1), train_user_items).reshape(-1)
    most_sim_user_IDs = np.argsort(-user_sim_scores)[:m] # top m similar users
   
    # make recommendations
    d = np.zeros(n_items)
    for scale,ID in enumerate(most_sim_user_IDs, start=1):
        recs = model.predict(user_ids=ID, item_ids=np.arange(n_items), \
                        user_features=None, item_features=item_features)
        d += recs/scale
        
    recs = d  
    recs = [(decode_streamer[i],x) for i,x in enumerate(recs) if i not in follows_ID] # convert streamer_ID back to streamer_name
    recs = sorted(recs, key=lambda x: -x[1])[:N] # sort by score
    recs = list(list(zip(*recs))[0]) # remove the scores 
    
    return recs

recommend_by_user_CF2(follows_ID=[encode_streamer[x] for x in my_follows if x in encode_streamer], m=10)

['shroud', 'Voyboy', 'Gosu', 'Jankos', 'tarzaned']

In [24]:
def recommend(follows, N=5):
    if len(follows) < 1:
        return "Enter a streamer"
    
    else:
        follows_ID = [encode_streamer[x] for x in follows if x in encode_streamer]
        if len(follows_ID) < 1:
            return "No streamers found in our database"
        elif len(follows_ID) == 1:
            return similar_items(item_id=follows_ID[0], item_features=item_features, model=model, N=N)
        else:
            return recommend_by_user_CF(follows_ID, N=N)

In [25]:
# test 1: 0 follows
print(recommend([]))

# test 2: 1 follow
print(recommend(["xQcOW"] ))

# test 3: > 1 follow
print(recommend(my_follows))

Enter a streamer
['Trainwreckstv', 'pokelawls', 'Mizkif', 'm0xyy', 'Lacari']
['Voyboy', 'Scarra', 'Jankos', 'Gosu', 'shroud']
