## Import local packages, e.g., dice, spotlight

In [17]:
import os
import sys

for p in ['../spotlight_ext', '../dice_ext']:
    module_path = os.path.abspath(os.path.join(p))
    if module_path not in sys.path:
        sys.path.append(module_path)

## Usage example for spotlight

In [8]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.evaluation import mrr_score
from spotlight.factorization.implicit import ImplicitFactorizationModel

In [18]:
dataset = get_movielens_dataset(variant='100K')

train, test = random_train_test_split(dataset)

model = ImplicitFactorizationModel(n_iter=3, loss='bpr')
model.fit(train)

mrr = mrr_score(model, test)

## Sequential models

In [113]:
from spotlight.cross_validation import user_based_train_test_split
from spotlight.datasets.synthetic import generate_sequential
from spotlight.evaluation import sequence_mrr_score
from spotlight.sequence.implicit import ImplicitSequenceModel

# dataset = generate_sequential(num_users=100,
#                               num_items=1000,
#                               num_interactions=10000,
#                               concentration_parameter=0.01,
#                               order=3)

# train, test = user_based_train_test_split(dataset)

dataset = get_movielens_dataset(variant='100K')
train, test = random_train_test_split(dataset)

train = train.to_sequence()
test = test.to_sequence()

model = ImplicitSequenceModel(n_iter=3,
                              representation='cnn',
                              loss='bpr')
model.fit(train)

mrr = sequence_mrr_score(model, test)

In [114]:
predictions = model.predict([1,2,3])
sorted(enumerate(predictions), reverse=True, key=lambda x: x[1])[:10]

[(748, 16.099224),
 (286, 15.949971),
 (294, 15.349819),
 (100, 15.261223),
 (174, 15.060287),
 (258, 15.014128),
 (50, 14.754667),
 (98, 14.642721),
 (300, 14.281036),
 (181, 14.238572)]

In [130]:
predictions = model.predict(test.sequences[test.user_ids==940])
sorted(enumerate(predictions), reverse=True, key=lambda x: x[1])[:10]

[(748, 18.539179),
 (286, 18.417192),
 (294, 17.75017),
 (100, 17.592972),
 (258, 17.393171),
 (174, 17.379957),
 (50, 17.03463),
 (98, 16.864065),
 (300, 16.497986),
 (181, 16.462158)]

## Causal convolutions for sequence-based recommendations

In [70]:
hyperparameters = {
    'embedding_dim': 128,
    'kernel_width': 5,
    'dilation': [1, 2, 4],
    'num_layers': 5,
    'nonlinearity': 'relu',
    'residual': True,
    'loss': 'adaptive_hinge',
    'batch_size': 128,
    'learning_rate': 0.01,
    'l2': 0,
    'n_iter': 50
}

In [71]:
import torch

from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.sequence.representations import CNNNet
from spotlight.evaluation import sequence_mrr_score


net = CNNNet(train.num_items,
             embedding_dim=hyperparameters['embedding_dim'],
             kernel_width=hyperparameters['kernel_width'],
             dilation=hyperparameters['dilation'],
             num_layers=hyperparameters['num_layers'],
             nonlinearity=hyperparameters['nonlinearity'],
             residual_connections=hyperparameters['residual'])

model = ImplicitSequenceModel(loss=hyperparameters['loss'],
                              representation=net,
                              batch_size=hyperparameters['batch_size'],
                              learning_rate=hyperparameters['learning_rate'],
                              l2=hyperparameters['l2'],
                              n_iter=hyperparameters['n_iter'],
                              use_cuda=torch.cuda.is_available(),
#                               random_state=random_state
                             )

model.fit(train)

test_mrr = sequence_mrr_score(model, test)
# val_mrr = sequence_mrr_score(model, validation)

In [74]:
print(f'Test MRR {test_mrr}')

Test MRR [0.01612903 0.0060241  0.00290698 0.00218818 0.05263158 0.00220751
 0.01052632 0.0052356  0.03333333 0.00108696 0.05       0.33333333
 0.00411523 0.00108696 0.00116009 0.01754386 0.00115875 0.00174216
 1.         0.00273224 0.00103199 0.01149425 0.01470588 0.00150376
 0.00115741 0.00134771 0.00526316 0.00189036 0.00181488 0.00409836
 0.00537634 0.00101833 0.01075269 0.00168067 1.         0.00232019
 0.00315457 0.01020408 0.00111235 0.00290698 0.125      0.00584795
 0.01149425 0.00970874 0.0013624  0.00161812 0.00175439 0.00308642
 0.00275482 1.         0.00137931 0.00423729 0.16666667 0.00187266
 0.00138122 0.00154799 0.00485437 0.00121951 0.00127877 0.125
 0.00117647 0.00507614 0.00564972 1.         0.01960784 0.00169205
 0.00172414 0.00198807 0.01282051 0.0010989  0.00555556 0.00116686
 0.00247525 0.00689655 0.01492537 0.0037037  0.01315789 0.001321
 0.00143472 0.001287   0.00662252 0.00168919 0.00280899 0.00298507
 0.004      0.03333333 0.00204499 0.0013245  0.00181818 0.00

## Usage example for DiCE with torch

In [16]:
import dice_ml
from dice_ml.utils import helpers # helper functions

backend = 'PYT'
# Dataset for training an ML model
d = dice_ml.Data(dataframe=helpers.load_adult_income_dataset(),
                 continuous_features=['age', 'hours_per_week'],
                 outcome_name='income')
# Pre-trained ML model
m = dice_ml.Model(model_path=dice_ml.utils.helpers.get_adult_income_modelpath(backend=backend), backend=backend)
# DiCE explanation instance
exp = dice_ml.Dice(d,m)

In [53]:
query_instance = {
    'age':22,
    'workclass':'Private',
    'education':'HS-grad',
    'marital_status':'Single',
    'occupation':'Service',
    'race': 'White',
    'gender':'Female',
    'hours_per_week': 45
}

In [55]:
# Generate counterfactual examples
dice_exp = exp.generate_counterfactuals(query_instance, total_CFs=4, desired_class="opposite")
# Visualize counterfactual explanation
dice_exp.visualize_as_dataframe()

Diverse Counterfactuals found! total time taken: 00 min 03 sec
Query instance (original outcome : 0)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,22.0,Private,HS-grad,Single,Service,White,Female,45.0,4.2e-05



Diverse Counterfactual set (new outcome : 1)


Unnamed: 0,age,workclass,education,marital_status,occupation,race,gender,hours_per_week,income
0,57.0,Private,Doctorate,Married,White-Collar,White,Female,45.0,0.993
1,33.0,Private,Prof-school,Married,Service,White,Male,39.0,0.964
2,22.0,Self-Employed,Prof-school,Married,Service,White,Female,45.0,0.748
3,49.0,Private,Masters,Married,Service,White,Female,62.0,0.957
