# Initialization

In [1]:
import os, sys
import time
import numpy as np
import pandas as pd
import random
from scipy import stats as st
import itertools
import operator

import torch

from tqdm.notebook import trange
from tqdm import tqdm

random_state = np.random.RandomState(2020)

In [4]:
# get currently working directory
base_dir = os.getcwd()

# load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb')
%run $helpers_file

# Load spotlight module
for p in ['../spotlight_ext']:
    module_path = os.path.abspath(os.path.join(base_dir, p))
    if module_path not in sys.path:
        sys.path.append(module_path)

# Load Dataset

## Models

In [5]:
lstm_model = load_model(model_type='entire')
pooling_model = load_model('pooling')

pretrained_models = {
    'lstm': lstm_model,
    'pooling': pooling_model,
}

## Dataset

In [6]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset

# get dataset
dataset = get_movielens_dataset(variant='1M')
train, test = random_train_test_split(dataset, random_state=random_state)

max_sequence_length = 20
train = train.to_sequence(max_sequence_length=max_sequence_length)
test = test.to_sequence(max_sequence_length=max_sequence_length)

# Genetic Search

target item is 930 in this test case

In [84]:
test_interaction = test.sequences[test.user_ids == 2][0].copy()
test_interaction

array([  0,   0, 128, 166, 168, 102,  77,  54, 144, 165, 140,  55, 173,
        57,  98,  91, 118,  75, 155, 127], dtype=int32)

In [86]:
prediction = lstm_model.predict(test_interaction)
prediction[test_interaction] = -StaticVars.FLOAT_MAX
rk_data = st.rankdata(-prediction, method='ordinal')
rk_data[0:10]

array([3689,  930,  823, 2014,  783,  454,  314,  644,  836, 1246])

## Random CF candidate selection

In [98]:
import numpy as np

def generate_random_sublists(original_list, sublists_info):
    result_sublists = []

    # Check if the original list is large enough for the requested sublists
    total_length_required = sum(length * count for length, count in sublists_info.items())
    if total_length_required > len(original_list):
        raise ValueError("Original list is not large enough for the requested sublists.")

    for length, count in sublists_info.items():
        for _ in range(count):
            sublist = np.random.choice(original_list, length, replace=False)
            result_sublists.append(sublist)

    return result_sublists


In [99]:
sublists_info ={
    2:1,
    3:2
}

In [100]:
type(test_interaction)

numpy.ndarray

In [101]:
generate_random_sublists(test_interaction, sublists_info)

[array([155,   0], dtype=int32),
 array([  0, 173, 140], dtype=int32),
 array([54, 75, 98], dtype=int32)]