In [1]:
import numpy as np
import matplotlib.pyplot as plt

# Rating environment

In [2]:
N_FEATURES = 10
N_USERS = 5
N_MOVIES = 300

## Helper functions

In [3]:
def generate_users(n_users=N_USERS, n_features=N_FEATURES):
    users = np.random.random((n_users, n_features))
    for i in range(len(users)):
        polarization = np.random.randint(1,20)
        users[i] = users[i] ** polarization
        users[i][users[i] < 1e-5] = 0
    return users

In [4]:
def generate_movies(n_movies=N_MOVIES, n_features=N_FEATURES):
    movies = np.random.random((n_movies, n_features))
    for i in range(len(movies)):
        polarization = np.random.randint(1,20)
        movies[i] = movies[i] ** polarization
        movies[i][movies[i] < 1e-5] = 0
    return movies

In [5]:
def reward_func(user, movie):
    """return cosine similarity scaled and shifted between 1 and 5"""
    return np.round(1 + 4 * max(np.dot(user, movie) / (np.linalg.norm(user) * np.linalg.norm(movie))), 0)

In [6]:
def get_rewards_matrix(users, movies, reward_f=reward_func):
    n_users, n_movies = len(users), len(movies)
    res = np.zeros((n_users, n_movies))
    for i in range(n_users):
        for j in range(n_movies):
            res[i][j] = reward_f(users[i], movies[j])
    return res

In [7]:
def get_available_matrix(n_users, n_movies, history):
    res = np.zeros((n_users, n_movies))
    for user_idx, hist in enumerate(history):
        for movie_idx in hist:
            res[user_idx][movie_idx] = 1
    return res

## Environment

In [8]:
class Environment:
    
    def __init__(self, n_users=N_USERS, n_features=N_FEATURES, n_movies=N_MOVIES, history=None, reward_f=reward_func, seed=None):
        self.reset(n_users, n_features, n_movies, history, reward_f, seed)
        
    def reset(self, n_users=N_USERS, n_features=N_FEATURES, n_movies=N_MOVIES, history=None, reward_f=reward_func, seed=None):
        self.rng = np.random.default_rng(seed)
        self.n_users = n_users
        self.n_features = n_features
        self.n_movies = n_movies
        self.users = generate_users(n_users, n_features)
        self.movies = generate_movies(n_movies, n_features)
        self.rewards_matrix = get_rewards_matrix(self.users, self.movies, reward_f)
        if history is not None:
            self.history = history
        else:
            self.history = [[] for i in range(N_USERS)]
        self.available_matrix = get_available_matrix(self.n_users, self.n_movies, self.history)
    
    def get_reward(self, user_idx, movie_idx):
        reward = self.rewards_matrix[user_idx][movie_idx]
        self.update_history(user_idx, movie_idx)
        return reward
    
    def get_next_step(self):
        user_idx = self.rng.choice(self.n_users)
        hist = self.available_matrix[user_idx]
        available = hist[hist > 0]
        self.rng.shuffle(available)
        context = available[:10]
        return user_idx, available
    
    def update_history(self, user_idx, movie_idx):
        self.history[user_idx].append(movie_idx)
        self.available_matrix[user_idx][movie_idx] = 1

## Agents

In [9]:
class RandomAgent:
    
    def __init__(self, n_users=N_USERS, n_features=N_FEATURES, n_movies=N_MOVIES, seed=None):
        
        self.n_users = n_users
        self.n_features = n_features
        self.n_movies = n_movies
        
        self.rng = np.random.default_rng(seed)
    
    def recommend(self, user_idx):
        return self.rng.choice(self.n_movies)

In [10]:
class GDAgent:
    
    def __init__(self, n_users=N_USERS, n_features=N_FEATURES, n_movies=N_MOVIES, seed=None):
        
        self.n_users = n_users
        self.n_features = n_features
        self.n_movies = n_movies
        
        self.users = np.ones((n_users, n_features))
        self.movies = np.ones((n_movies, n_features))
        
        self.rng = np.random.default_rng(seed)
    
    def recommend(self, user_idx):
        return self.rng.choice(self.n_movies)

In [253]:
env = Environment()
agent = RandomAgent()

In [254]:
for i in range(50):
    user_idx, context = env.get_next_step()
    movie_idx = agent.recommend(user_idx)
    env.update_history(user_idx, movie_idx)

In [255]:
env.history

[[269, 284, 295, 241, 42, 135, 180, 53],
 [72, 215, 194, 268, 158, 265, 19, 49, 42],
 [64, 162, 254, 116, 189, 147, 63, 90, 120, 229, 170, 40],
 [19, 285, 38, 67, 52, 268, 57, 26, 249, 24, 235],
 [24, 185, 136, 205, 202, 259, 217, 43, 199, 79]]