## Import local packages, e.g., dice, spotlight, and contant variables setting

In [4]:
import os
import sys

for p in ['../spotlight_ext', '../dice_ext']:
    module_path = os.path.abspath(os.path.join(p))
    if module_path not in sys.path:
        sys.path.append(module_path)

In [5]:
import numpy as np
import scipy.stats as st
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import make_scorer

import torch

In [6]:
models_path = '../models'

## Usage example for spotlight

In [7]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.evaluation import mrr_score
from spotlight.factorization.implicit import ImplicitFactorizationModel

In [8]:
# dataset = get_movielens_dataset(variant='100K')

# train, test = random_train_test_split(dataset)

# model = ImplicitFactorizationModel(n_iter=3, loss='bpr')
# model.fit(train)

# mrr = mrr_score(model, test)

## **Sequential models** (candidate for our problem)

In [9]:
from spotlight.cross_validation import user_based_train_test_split
from spotlight.datasets.synthetic import generate_sequential
from spotlight.evaluation import sequence_mrr_score
from spotlight.sequence.implicit import ImplicitSequenceModel

# dataset = generate_sequential(num_users=100,
#                               num_items=1000,
#                               num_interactions=10000,
#                               concentration_parameter=0.01,
#                               order=3)

# train, test = user_based_train_test_split(dataset)

dataset = get_movielens_dataset(variant='1M')
train, test = random_train_test_split(dataset)

train = train.to_sequence()
test = test.to_sequence()

## train model

In [26]:
model = ImplicitSequenceModel(
    batch_size=256,
    embedding_dim=32,
    l2=0.0,
    learning_rate=0.05,
    n_iter=11,
    representation='lstm',
    loss='adaptive_hinge',
#     use_cuda=torch.cuda.is_available(),
    random_state=np.random.RandomState(2020)
)
model.fit(train)

mrr = sequence_mrr_score(model, test)

## save model

In [49]:
ofile = 'entire_model_1m.pt'

torch.save(model, os.path.join(models_path, ofile))

## or load a saved model

In [12]:
ofile = 'entire_model_1m.pt'

model = torch.load(os.path.join(models_path, ofile))

In [10]:
items_interacted = test.sequences[test.user_ids==2][0]

In [14]:
predictions = -model.predict(items_interacted[:-1])
print(f'Item to predict: {items_interacted[-1]}')

Item to predict: 134


In [32]:
next_item_pos = st.rankdata(predictions, method='ordinal')[items_interacted[-1]]
next_item_pos

402

In [33]:
sorted(enumerate(predictions), key=lambda x: x[1])[int(next_item_pos) - 1]

(134, -0.9794686)

In [14]:
param_grid = dict(
    n_iter=st.randint(10, 20),
    representation=['cnn', 'lstm', 'mixture'],
    loss=['adaptive_hinge', 'hinge', 'bpr'],
    embedding_dim=[32, 64, 128, 256],
    batch_size=[32, 64, 128, 256],
    learning_rate=st.expon(loc=0.0001, scale=0.1),
    l2=st.expon(loc=0.0, scale=0.1)
)
score = make_scorer(sequence_mrr_score)
                    
grid = RandomizedSearchCV(
    estimator=ImplicitSequenceModel(), param_distributions=param_grid, n_jobs=4, cv=3,
    scoring=score, verbose=1, n_iter=100
)
grid_result = grid.fit(train)

TypeError: Singleton array array(<Sequence interactions dataset (8413 sequences x 10 sequence length)>,
      dtype=object) cannot be considered a valid collection.

## Causal convolutions for sequence-based recommendations

In [70]:
hyperparameters = {
    'embedding_dim': 128,
    'kernel_width': 5,
    'dilation': [1, 2, 4],
    'num_layers': 5,
    'nonlinearity': 'relu',
    'residual': True,
    'loss': 'adaptive_hinge',
    'batch_size': 128,
    'learning_rate': 0.01,
    'l2': 0,
    'n_iter': 50
}

In [71]:
import torch

from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.sequence.representations import CNNNet
from spotlight.evaluation import sequence_mrr_score


net = CNNNet(train.num_items,
             embedding_dim=hyperparameters['embedding_dim'],
             kernel_width=hyperparameters['kernel_width'],
             dilation=hyperparameters['dilation'],
             num_layers=hyperparameters['num_layers'],
             nonlinearity=hyperparameters['nonlinearity'],
             residual_connections=hyperparameters['residual'])

model = ImplicitSequenceModel(loss=hyperparameters['loss'],
                              representation=net,
                              batch_size=hyperparameters['batch_size'],
                              learning_rate=hyperparameters['learning_rate'],
                              l2=hyperparameters['l2'],
                              n_iter=hyperparameters['n_iter'],
                              use_cuda=torch.cuda.is_available(),
#                               random_state=random_state
                             )

model.fit(train)

test_mrr = sequence_mrr_score(model, test)
# val_mrr = sequence_mrr_score(model, validation)

In [74]:
print(f'Test MRR {test_mrr}')

Test MRR [0.01612903 0.0060241  0.00290698 0.00218818 0.05263158 0.00220751
 0.01052632 0.0052356  0.03333333 0.00108696 0.05       0.33333333
 0.00411523 0.00108696 0.00116009 0.01754386 0.00115875 0.00174216
 1.         0.00273224 0.00103199 0.01149425 0.01470588 0.00150376
 0.00115741 0.00134771 0.00526316 0.00189036 0.00181488 0.00409836
 0.00537634 0.00101833 0.01075269 0.00168067 1.         0.00232019
 0.00315457 0.01020408 0.00111235 0.00290698 0.125      0.00584795
 0.01149425 0.00970874 0.0013624  0.00161812 0.00175439 0.00308642
 0.00275482 1.         0.00137931 0.00423729 0.16666667 0.00187266
 0.00138122 0.00154799 0.00485437 0.00121951 0.00127877 0.125
 0.00117647 0.00507614 0.00564972 1.         0.01960784 0.00169205
 0.00172414 0.00198807 0.01282051 0.0010989  0.00555556 0.00116686
 0.00247525 0.00689655 0.01492537 0.0037037  0.01315789 0.001321
 0.00143472 0.001287   0.00662252 0.00168919 0.00280899 0.00298507
 0.004      0.03333333 0.00204499 0.0013245  0.00181818 0.00

## Usage example for DiCE with torch

In [16]:
import dice_ml
from dice_ml.utils import helpers # helper functions

backend = 'PYT'
# Dataset for training an ML model
d = dice_ml.Data(dataframe=helpers.load_adult_income_dataset(),
                 continuous_features=['age', 'hours_per_week'],
                 outcome_name='income')
# Pre-trained ML model
m = dice_ml.Model(model_path=dice_ml.utils.helpers.get_adult_income_modelpath(backend=backend), backend=backend)
# DiCE explanation instance
exp = dice_ml.Dice(d, m)

In [53]:
query_instance = {
    'age':22,
    'workclass':'Private',
    'education':'HS-grad',
    'marital_status':'Single',
    'occupation':'Service',
    'race': 'White',
    'gender':'Female',
    'hours_per_week': 45
}

In [55]:
# Generate counterfactual examples
dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=4, desired_class="opposite")
# Visualize counterfactual explanation
dice_exp.visualize_as_dataframe()

Diverse Counterfactuals found! total time taken: 00 min 03 sec
Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,22.0,Private,HS-grad,Single,Service,White,Female,45.0,4.2e-05



Diverse Counterfactual set (new outcome : 1)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,57.0,Private,Doctorate,Married,White-Collar,White,Female,45.0,0.993
1,33.0,Private,Prof-school,Married,Service,White,Male,39.0,0.964
2,22.0,Self-Employed,Prof-school,Married,Service,White,Female,45.0,0.748
3,49.0,Private,Masters,Married,Service,White,Female,62.0,0.957


# Brute-force example on Sequential model

In [37]:
# load trained model
ofile = 'entire_model_1m.pt'
model = torch.load(os.path.join(models_path, ofile))

user_id = 2
test.sequences[test.user_ids==user_id]

In [28]:
items_interacted = test.sequences[test.user_ids==user_id][0]
predictions = -model.predict(items_interacted[:-5])
st.rankdata(predictions, method='ordinal')[items_interacted[-5:]]

array([ 76,  40,  45, 641, 402])

In [31]:
items_interacted[-5]

126

In [36]:
sorted(enumerate(predictions), key=lambda x: x[1])[:10]

[(118, -3.549449),
 (196, -3.5017548),
 (151, -3.497812),
 (121, -3.4975362),
 (94, -3.4891403),
 (190, -3.4395144),
 (714, -3.363245),
 (265, -3.3596818),
 (63, -3.3133578),
 (59, -3.2280397)]

In [88]:
class SelectedInteractions:
    def __str__(self):
        return f'found in iter {self.counter_found_best} with score/in pos {self.score} with interactions {self.interactions}\n10-best proposed {self.items_order}'
    
    score = 0
    interactions = []
    items_order = []
    counter_found_best = -1

In [95]:
from itertools import permutations

no_interactions = 5
user_id = 8

items_interacted = test.sequences[test.user_ids==user_id][0]
perm = permutations(items_interacted[:no_interactions], no_interactions - 1)

print(f'Complete interactions for user {user_id} are {items_interacted}')
print(f'Searching for removing 1 of {no_interactions} interacted item that makes next item {items_interacted[no_interactions]} harder to be selected')
print(f'Current pos of next item ({items_interacted[no_interactions]}) is {st.rankdata(predictions, method="ordinal")[items_interacted[no_interactions]]}\n')

counter = 1

best_inter = SelectedInteractions()
for i in list(perm):
    preds = -model.predict(i)
    item_pos = st.rankdata(preds, method='ordinal')[items_interacted[no_interactions]]    
    if item_pos > best_inter.score:
        best_inter.score = item_pos
        best_inter.interactions = i
        best_inter.items_order = sorted(enumerate(preds), key=lambda x: x[1])[:10]
        best_inter.counter_found_best = counter
        
    counter += 1
    
print(best_inter)

Complete interactions for user 8 are [446 452 509 449  37 417 119 512 444 442]
Searching for removing 1 of 5 interacted item that makes next item 417 harder to be selected
Current pos of next item (417) is 309

found in iter 70 with score/in pos 732 with interactions (509, 37, 452, 449)
10-best proposed [(485, -1.8162483), (126, -1.7537876), (490, -1.737611), (28, -1.7062362), (439, -1.6843892), (789, -1.6683452), (124, -1.6451916), (2214, -1.6287087), (909, -1.60427), (353, -1.6042694)]
