In [12]:
import numpy as np
import scipy.stats as st
from tqdm.notebook import trange

from itertools import combinations
import torch.nn.functional as F
import torch

In [2]:
class StaticVars:
    FLOAT_MAX = np.finfo(np.float32).max
    INT_MAX = np.iinfo(np.int32).max

In [11]:
class InteractionsInfo:
    score = 0
#     interactions = []
#     complete_interactions = []
#     iter_found = -1
    y_loss = 1.0
    proximity_loss = StaticVars.FLOAT_MAX
#     total_loss = StaticVars.FLOAT_MAX

    def __init__(self, uid, iid=-1, p=-1, fobj=True, fconstraint=True):
        self.user_id = uid
        self.item_id = iid

        self.satisfy_objective = fobj
        self.satisfy_contraints = fconstraint

        self.recommendation = None
        self.interactions = dict(original=None, initial=None, best=None)
        self.loss = dict(initial=StaticVars.FLOAT_MAX, best=StaticVars.FLOAT_MAX)
        self.iter_no = dict(initial=-1, best=-1, total=0)

        self.solution_found = False
        self.pos = StaticVars.INT_MAX
        self.cfs_dist = 0

    def __str__(self):
        sorted_recommended_items = [
            (n[0], n[1].detach().numpy().flatten()[0]) if isinstance(n[1], torch.Tensor)
            else (n[0], n[1]) for n in self.recommendation
        ]

        return (f'\n'
                f'user_id: {self.user_id}, item_id: {self.item_id}\n'
                f'yloss: {round(self.y_loss, 4)}, proximity_loss: {int(self.proximity_loss)}\n'
                f'Item {self.item_id} is in position {self.pos} now!!!\n'
                f'Found in iteration {self.iter_no["best"]} and the interacted items are {self.interactions["best"]}\n'
                f'10-best recommended items {sorted_recommended_items}\n')

    def set_flags(self, do_objective, do_contraints):
        self.satisfy_objective = do_objective
        self.satisfy_contraints = do_contraints

    def needs_update(self, loss):
        if len(loss):
            does_contraints = (not self.satisfy_contraints or self.y_loss > loss['yloss'])
            does_objective = (not self.satisfy_objective or self.proximity_loss >= loss['proximity'])

            if does_contraints and does_objective: return True

        return False

    def set_values(self, predictions, interacted_items, tot_interacted_items, loss, iter_no, k=10):

        # get the ranking position of selected item in the list
        rk_data = st.rankdata(-predictions, method='ordinal')
        self.pos = rk_data[self.item_id]
#         self.recommends = sorted(enumerate(predictions), key=lambda x: x[1], reverse=True)[:k]
        accepted_preds = (rk_data <= k).nonzero()
        self.recommends = sorted(
            zip(predictions[accepted_preds], *accepted_preds), 
            key=lambda x: x[0], reverse=True)
        self.iter_found = iter_no
        self.y_loss = loss[0]
        self.proximity_loss = loss[1]
        self.interactions = interacted_items
        self.complete_interactions = tot_interacted_items

        self.solution_found = True

    def update_values(self, predictions, ranking, interacted_items, original_input, loss, iter_no, k=10):
        if ranking[self.item_id] > k and ranking[self.item_id] <= self.pos and loss < self.loss['best']:
            # get the ranking position of selected item in the list
            # rk_data = st.rankdata(-predictions, method='ordinal')
            self.pos = ranking[self.item_id]
    #         self.recommends = sorted(enumerate(predictions), key=lambda x: x[1], reverse=True)[:k]
            accepted_preds = (ranking <= k).nonzero()
            self.recommendation = sorted(
                zip(predictions[accepted_preds], *accepted_preds),
                key=lambda x: x[0], reverse=True)

            self.iter_no['best'] = iter_no
            self.loss['best'] = loss
            self.interactions['best'] = interacted_items
            if not self.solution_found:
                self.iter_no['initial'] = iter_no
                self.loss['initial'] = loss
                self.interactions['initial'] = interacted_items

            self.interactions['original'] = original_input
            self.cfs_dist = len(self.interactions['original']) - len(self.interactions['best'])

            self.solution_found = True

        self.iter_no['total'] = iter_no

In [4]:
def load_model(model_type='pooling', path='../models'):
    ofile = f'{model_type}_model_1m_20interactions.pt'
    return torch.load(os.path.join(path, ofile))

In [5]:
def save_model(model, model_type='pooling', path='../models'):
    ofile = f'{model_type}_model_1m_20interactions.pt'
    return torch.save(model, os.path.join(path, ofile))

In [6]:
def find_cfs(dataset, model, excluded_item_pos, no_users=None, max_allowed_permutations=None, top_k=10, total_CFs=1):
    num_users = no_users or max(dataset.users_ids) + 1
    max_perms = max_allowed_permutations or dataset.max_sequence_length

    best_tot_loss_data = []
    best_yloss_data = []

    for user_id in trange(1, num_users):  # dataset.num_users):

        seq_size = len(dataset.sequences[dataset.user_ids==user_id])
        _total_loss = [None] * seq_size
        _yloss = [None] * seq_size

        for j in range(seq_size):    
            if all(v > 0 for v in dataset.sequences[dataset.user_ids==user_id][j]):    
                items_interacted = dataset.sequences[dataset.user_ids==user_id][j]
                predictions = -model.predict(items_interacted)
                predictions[items_interacted] = StaticVars.FLOAT_MAX

                kth_item = predictions.argsort()[top_k - 1]
                rec_item_exclude = predictions.argsort()[min(top_k, int(excluded_item_pos)) - 1]

                _total_loss[j] = InteractionsInfo(user_id, rec_item_exclude)
                _yloss[j] = InteractionsInfo(user_id, rec_item_exclude, fobj=False)

                counter = 1        

                for l in range(len(items_interacted) - 1, max(0, len(items_interacted) - max_perms), -1):
                    if _total_loss[j].solution_found: break

                    # produce permutations of various interactions
                    perm = combinations(items_interacted, l)

                    for i in perm:
                        # predict next top-k items about to be selected        
                        preds = model.predict(i)
                        
                        # convert logits produced by model, i.e., the probability distribution before normalization, 
                        # by using softmax
                        tensor = torch.from_numpy(preds).float()
                        preds = F.softmax(tensor, dim=0)

                        yloss = compute_yloss(preds.numpy()[rec_item_exclude], preds.numpy()[kth_item], total_CFs)
                        proximity_loss = compute_proximity_loss(np.asarray(i)[np.newaxis, :], items_interacted, total_CFs)
                        
                        # keep info about the best solution found depending on an objective function
                        if _total_loss[j].needs_update(dict(yloss=yloss, proximity=proximity_loss)):                        
                            _total_loss[j].set_values(
                                preds, i, items_interacted, [yloss, proximity_loss], counter, top_k)
                            
#                         if _yloss[j].needs_update(dict(yloss=yloss, proximity=proximity_loss)):
#                             _yloss[j].set_values(
#                                 preds, i, items_interacted, [yloss, proximity_loss], counter, k)                 

                        counter += 1 

        best_tot_loss_data.append(_total_loss)
        best_yloss_data.append(_yloss)
        
    return (best_tot_loss_data, best_yloss_data)

In [1]:
def gpu_embeddings_to_cosine_similarity_matrix(E):
    """ 
    Converts a tensor of n embeddings to an (n, n) tensor of similarities.
    """
    dot = E @ E.t()
    norm = torch.norm(E, 2, 1)
    x = torch.div(dot, norm)
    x = torch.div(x, torch.unsqueeze(norm, -1))
    return x

In [7]:
from torch.nn.functional import cosine_similarity


def embeddings_to_cosine_similarity_matrix(E):
    """ 
    Converts a a tensor of n embeddings to an (n, n) tensor of similarities.
    """
    similarities = [[cosine_similarity(a, b, dim=0) for a in E] for b in E]
#     similarities = list(map(torch.cat, similarities))
    similarities = list(map(lambda x: torch.stack(x, dim=-1), similarities))
    return torch.stack(similarities)

In [1]:
from scipy.spatial.distance import pdist, squareform


def compute_sim_matrix(dataset, metric='jaccard', adjusted=False):
    # compute the item-item similarity matrix utilizing implicit feedback,
    # i.e., whether interacted or not with an item

    M = np.zeros((dataset.num_users, dataset.num_items), dtype=np.bool)
    for u in trange(1, dataset.num_users):
        np.add.at(
            M[u], dataset.item_ids[dataset.user_ids == u],
            dataset.ratings[dataset.user_ids == u]
        )

    if adjusted:
        M_u = M.mean(axis=1)
        M = M - M_u[:, np.newaxis]

    similarity_matrix = 1 - squareform(pdist(M.T, metric))

    return similarity_matrix

In [12]:
from collections import Counter


def rank_interactions_to_excluded_item_per_user(cfs, sims_matrix):
    non_solvable_cases = []
    total_data = []

    for items in cfs:
        for rec in items:
            if rec is None: continue

            if not rec.solution_found:
                non_solvable_cases.append(rec.user_id)
                continue

            items_rank = st.rankdata(sims_matrix[rec.item_id, rec.complete_interactions])
            similarity_rank = len(rec.complete_interactions) - items_rank + 1
            del_items_indices = np.where(np.isin(
                rec.complete_interactions, 
                list(set(rec.complete_interactions).difference(set(rec.interactions)))
            ))
            total_data.extend(sorted(similarity_rank[del_items_indices].astype(int)[-1:]))

    return (Counter(total_data), non_solvable_cases)