# Initialization

In [142]:
import os, sys
import time
import numpy as np
import pandas as pd
import random
from scipy import stats as st
import itertools
import operator

import torch

from tqdm.notebook import trange
from tqdm import tqdm

random_state = np.random.RandomState(2020)

In [143]:
# get currently working directory
base_dir = os.getcwd()

# load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb')
%run $helpers_file

# Load spotlight module
for p in ['../spotlight_ext']:
    module_path = os.path.abspath(os.path.join(base_dir, p))
    if module_path not in sys.path:
        sys.path.append(module_path)

# Load Dataset

## Models

In [144]:
lstm_model = load_model(model_type='entire')
pooling_model = load_model('pooling')

pretrained_models = {
    'lstm': lstm_model,
    'pooling': pooling_model,
}

## Dataset

In [145]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset

# get dataset
dataset = get_movielens_dataset(variant='1M')
train, test = random_train_test_split(dataset, random_state=random_state)

max_sequence_length = 20
train = train.to_sequence(max_sequence_length=max_sequence_length)
test = test.to_sequence(max_sequence_length=max_sequence_length)

# Genetic Search

## Initialization

target item is 14 in this test case, top k is 10

In [146]:
test_interaction = test.sequences[test.user_ids == 3][0].copy()
test_interaction = test_interaction[test_interaction != 0]
test_interaction

array([197, 124, 125, 190, 114, 186, 196,  59, 200, 191, 177], dtype=int32)

In [147]:
len_test_interaction = len(test_interaction)
len_test_interaction

11

In [148]:
prediction = lstm_model.predict(test_interaction)
prediction[test_interaction] = -StaticVars.FLOAT_MAX
rk_data = st.rankdata(-prediction, method='ordinal')
index = np.where(rk_data == 2)
index[0][0]

14

## Random CF candidate selection

In [149]:
import numpy as np

def generate_random_sublists(original_list, sublists_info):
    result_sublists = []
    rng = np.random.default_rng(seed=2020)  # Seed for reproducibility

    for length, count in sublists_info.items():
        generated_sublists_for_length = set()

        while len(generated_sublists_for_length) < count:
            sublist = tuple(rng.choice(original_list, length, replace=False))
            generated_sublists_for_length.add(sublist)

        result_sublists.extend(np.array(list(sublist)) for sublist in generated_sublists_for_length)

    return result_sublists


In [150]:
sublists_info ={
    5:5,
    6:5,
    7:5,
    8:5,
    9:5,
    10:5
}

In [151]:
first_gen = generate_random_sublists(test_interaction, sublists_info)

In [152]:
first_gen

[array([ 59, 197, 190, 196, 200], dtype=int32),
 array([190, 186, 196, 197,  59], dtype=int32),
 array([125, 190, 177,  59, 186], dtype=int32),
 array([190, 197, 124, 191, 196], dtype=int32),
 array([200, 125, 196, 124, 191], dtype=int32),
 array([197, 124, 196, 177, 191, 186], dtype=int32),
 array([197, 190,  59, 124, 177, 191], dtype=int32),
 array([196, 177, 197, 125, 114,  59], dtype=int32),
 array([191, 190, 186, 114, 200, 197], dtype=int32),
 array([ 59, 200, 177, 191, 114, 186], dtype=int32),
 array([191, 177,  59, 124, 196, 186, 125], dtype=int32),
 array([125, 196, 124, 114, 197, 191, 177], dtype=int32),
 array([114, 177, 125, 196, 124, 186,  59], dtype=int32),
 array([124, 186, 177, 196, 200,  59, 190], dtype=int32),
 array([186, 191, 125, 190, 114, 200, 197], dtype=int32),
 array([124, 125, 177,  59, 190, 186, 114, 197], dtype=int32),
 array([197, 196, 125, 191, 124, 177, 114, 186], dtype=int32),
 array([125, 124, 197, 196, 177, 190, 200,  59], dtype=int32),
 array([196, 124

## Crossover and Mutation

In [153]:
import numpy as np

def crossover(first_list, second_list, p_cross):
    # Find the shorter length among the two lists
    length_first = len(first_list)
    length_second = len(second_list)
    shorter_length = min(length_first, length_second)
    
    # Compute the number of crossover points
    num_crossovers = int(shorter_length * p_cross)
    
    # Choose random indices for crossover within the range of shorter length
    rng = np.random.default_rng(seed=2020)
    crossover_indices_first = rng.choice(shorter_length, num_crossovers, replace=False)
    crossover_indices_second = rng.choice(shorter_length, num_crossovers, replace=False)
    
    # Sort the crossover indices
    crossover_indices_first.sort()
    crossover_indices_second.sort()
    
    # Swap the elements at the crossover indices
    for i in range(num_crossovers):
        index_first = crossover_indices_first[i]
        index_second = crossover_indices_second[i]
        first_list[index_first], second_list[index_second] = second_list[index_second], first_list[index_first]
    
    return first_list, second_list

def mutate_array(org_arr, arr_to_mutate, mutation_probability):
    # Calculate the number of elements to mutate
    num_mutations = int(mutation_probability * len(arr_to_mutate))
    rng = np.random.default_rng(seed=2020)
    # Select the indices to mutate
    indices_to_mutate = rng.choice(range(len(arr_to_mutate)), size=num_mutations, replace=False)
    
    # Mutate the selected elements
    for idx in indices_to_mutate:
        arr_to_mutate[idx] = rng.choice(org_arr)

    return arr_to_mutate


def remove_duplicates(arr):
    _, idx = np.unique(arr, return_index=True)
    return arr[np.sort(idx)]

In [154]:
first, second = crossover(first_gen[-1], first_gen[-2], 0.3)
first = mutate_array(test_interaction, first, 0.2)
first = remove_duplicates(first)
second = remove_duplicates(second)
first

array([186, 190, 114, 177,  59, 191, 197, 196], dtype=int32)

## Loss Functions

In [155]:
from itertools import chain, combinations
import numpy as np

def all_subsets(arr):
    return chain.from_iterable(combinations(arr, r) for r in range(1, len(arr)))

def compute_yloss(target_score, kth_score):
    # print(target_score, kth_score)
    yloss = max(0, target_score / kth_score - 1.0)
    return yloss

def compute_distance(x, y):
    diff = np.setdiff1d(x, y)
    return len(diff)

def compute_loss(old_cf, new_cf, model, target_item, top_k):
    new_prediction = model.predict(new_cf)
    new_prediction[new_cf] = -StaticVars.FLOAT_MAX
    new_rk_data = st.rankdata(-new_prediction, method='ordinal')

    top_k_index = np.where(new_rk_data == top_k)[0][0]
    yloss = compute_yloss(new_prediction[target_item], new_prediction[top_k_index])
    dis = compute_distance(old_cf, new_cf)

    subset_yloss = 0
    for subset in all_subsets(new_cf):
        subset = np.array(subset)
        sub_prediction = model.predict(subset)
        sub_prediction[subset] = -StaticVars.FLOAT_MAX
        sub_rk_data = st.rankdata(-sub_prediction, method='ordinal')
        sub_top_k_index = np.where(sub_rk_data == top_k)[0][0]
        subset_yloss += compute_yloss(sub_prediction[target_item], sub_prediction[sub_top_k_index])

    return (yloss, dis, subset_yloss)

In [156]:
compute_loss(test_interaction, first, lstm_model, 14, 10)

(0, 3, 0.041111111640930176)