# Load data

In [2]:
import numpy as np
import pandas as pd
import os

In [3]:
print('Data loading...')

ROOT_DIR = '/Users/vivianyan/Desktop/Reinforcement-Learning/Project/RL-Movie-Rec-Sys'
DATA_DIR = os.path.join(ROOT_DIR, 'ml-100k')

ratings_list = [i.strip().split("\t") for i in open(os.path.join(DATA_DIR,'mod_ratings.csv'), 'r').readlines()]
ratings_df = pd.DataFrame(ratings_list[1:], columns = ['userId', 'movieId', 'rating', 'timestamp'])
ratings_df['userId'] = ratings_df['userId'].apply(pd.to_numeric)
ratings_df['movieId'] = ratings_df['movieId'].apply(pd.to_numeric)
ratings_df['rating'] = ratings_df['rating'].astype(float)

movies_list = [i.strip().split("\t") for i in open(os.path.join(DATA_DIR,'mod_movies.csv'),encoding='latin-1').readlines()]
movies_df = pd.DataFrame(movies_list[1:], columns = ['movieId', 'title', 'genres'])
movies_df['movieId'] = movies_df['movieId'].apply(pd.to_numeric)

users_list = [i.strip().split("\t") for i in open(os.path.join(DATA_DIR,'users.csv'), 'r').readlines()]
users_df = pd.DataFrame(users_list[1:], columns=['userId', 'gender', 'age', 'occupation', 'zip'])

tags_list = [i.strip().split("\t") for i in open(os.path.join(DATA_DIR,'mod_tags.csv'), 'r').readlines()]
tags_df = pd.DataFrame(tags_list[1:], columns=['userId', 'movieId', 'tag', 'timestamp'])
tags_df['userId'] = tags_df['userId'].apply(pd.to_numeric)
tags_df['movieId'] = tags_df['movieId'].apply(pd.to_numeric)

print("Data loading complete!")
print("Data preprocessing...")

Data loading...
Data loading complete!
Data preprocessing...


# Generate evaluation data
* movies_id_to_movies={movieId: [title, genres],}
* users_dict={userId:[movieId, rating],}
* users_history_lens=[length,]

In [4]:
print(len(movies_list))
movies_id_to_movies = {movie[0]: movie[1:] for movie in movies_list[1:]}
dict(list(movies_id_to_movies.items())[:10])

9743


{'1': ['Toy Story (1995)', 'Adventure|Animation|Children|Comedy|Fantasy'],
 '2': ['Jumanji (1995)', 'Adventure|Children|Fantasy'],
 '3': ['Grumpier Old Men (1995)', 'Comedy|Romance'],
 '4': ['Waiting to Exhale (1995)', 'Comedy|Drama|Romance'],
 '5': ['Father of the Bride Part II (1995)', 'Comedy'],
 '6': ['Heat (1995)', 'Action|Crime|Thriller'],
 '7': ['Sabrina (1995)', 'Comedy|Romance'],
 '8': ['Tom and Huck (1995)', 'Adventure|Children'],
 '9': ['Sudden Death (1995)', 'Action'],
 '10': ['GoldenEye (1995)', 'Action|Adventure|Thriller']}

In [5]:
users_dict = {user: [] for user in set(ratings_df["userId"])}
filtered_users_dict = users_dict.copy()

# sort the dataset by time
ratings_df = ratings_df.sort_values(by='timestamp', ascending=True)

for idx, row in ratings_df.iterrows():
    users_dict[row['userId']].append((row['movieId'], row['rating']))
    if row['rating'] >= 4.0:
        filtered_users_dict[row['userId']].append((row['movieId'], row['rating']))

print(f'User dict (10 examples of UserId=1): {users_dict[1][:10]}')

users_history_lens = [len(filtered_users_dict[u]) for u in set(ratings_df["userId"])]
print(f'Length of user dict history: {len(users_history_lens)}')

User dict (10 examples of UserId=1): [(1210, 5.0), (1210, 5.0), (804, 4.0), (804, 4.0), (2018, 5.0), (2018, 5.0), (2826, 4.0), (2826, 4.0), (2628, 4.0), (2628, 4.0)]
Length of user dict history: 610


In [6]:
np.save(os.path.join(os.getcwd(), "data/user_dict.npy"), users_dict)
np.save(os.path.join(os.getcwd(), "data/users_histroy_len.npy"), users_history_lens)

# Train evaluation split

In [7]:
users_num = max(ratings_df['userId']) + 1
movies_num = max(ratings_df['movieId']) + 1
print(f'# users: {users_num}, # movies: {movies_num}')

# users: 611, # movies: 193610


In [8]:
train_users_num = int(users_num * 0.8)
train_movies_num = movies_num
print(f'Training set: \n# users: {train_users_num}, # movies: {train_movies_num}')

train_users_dict = {k: users_dict[k] for k in range(1, train_users_num+1)}
train_users_history_lens = users_history_lens[:train_users_num]
print(f'Length of users dict: {len(train_users_dict)}, Length of users history lens: {len(train_users_history_lens)}')

Training set: 
# users: 488, # movies: 193610
Length of users dict: 488, Length of users history lens: 488


In [9]:
eval_users_num = int(users_num * 0.2)
eval_movies_num = movies_num
print(f'Evaluation set: \n# users: {eval_users_num}, # movies: {eval_movies_num}')

eval_users_dict = {k: users_dict[k] for k in range(users_num-eval_users_num, users_num)}
eval_users_history_lens = users_history_lens[-eval_users_num:]
print(f'Length of users dict: {len(eval_users_dict)}, Length of users history lens: {len(eval_users_history_lens)}')

Evaluation set: 
# users: 122, # movies: 193610
Length of users dict: 122, Length of users history lens: 122


# Evaluation
Variables:
* movies_id_to_movies={movieId: [title, genres],}
* users_dict={userId:[movieId, rating],}
* users_history_lens=[length,]
* users_num=611
* movies_num=193610

In [None]:
import tensorflow as tf
import itertools
import time

from envs import OfflineEnv
from ddpg import DDPGAgent

In [None]:
def evaluate(recommender, env, check_movies=False, top_k=10):
    episode_reward = 0
    steps = 0
    mean_precision = 0
    mean_ndcg = 0
    mean_recall = 0
    mean_ap = 0

    user_id, items_ids, done = env.reset()
    if check_movies:
        print(f'user_id : {user_id}, rated_items_length:{len(env.user_items)}')
        print('history items : \n', np.array(env.get_items_names(items_ids)))

    while not done:
        user_eb = recommender.embedding_network.get_layer('user_embedding')(np.array(user_id))
        items_eb = recommender.embedding_network.get_layer('movie_embedding')(np.array(items_ids))
        
        state = recommender.srm_ave([np.expand_dims(user_eb, axis=0), np.expand_dims(items_eb, axis=0)])
        
        action = recommender.actor.network(state)
        
        recommended_item = recommender.recommend_item(action, env.recommended_items, top_k=top_k)
        if check_movies:
            print(f'recommended items ids : {recommended_item}')
            print(f'recommened items : \n {np.array(env.get_items_names(recommended_item), dtype=object)}')
        
        next_items_ids, reward, done, _ = env.step(recommended_item, top_k=top_k)
        if top_k:
            correct_list = [1 if r > 0 else 0 for r in reward]
            # ndcg
            dcg, idcg = calculate_ndcg(correct_list, [1 for _ in range(len(reward))])
            mean_ndcg += dcg/idcg
            
            #precision
            correct_num = top_k-correct_list.count(0)
            mean_precision += correct_num/top_k

            # recall
            if hasattr(env, 'num_relevant_items') and env.num_relevant_items > 0:
                recall_k = correct_num / env.num_relevant_items
            else:
                recall_k = 0
            mean_recall += recall_k

            # AP@k (average precision at k)
            num_hits = 0
            ap_sum = 0.0
            for idx, rel in enumerate(correct_list):
                if rel == 1:
                    num_hits += 1
                    ap_sum += num_hits / (idx + 1)
            average_precision = ap_sum / max(correct_list.count(1), 1)
            mean_ap += average_precision

            
        reward = np.sum(reward)
        items_ids = next_items_ids
        episode_reward += reward
        steps += 1
        
        if check_movies:
            print(f'precision : {correct_num/top_k}, dcg : {dcg:0.3f}, idcg : {idcg:0.3f}, ndcg : {dcg/idcg:0.3f}, reward : {reward}')
            print()
        break
    
    if check_movies:
        print(f'precision : {mean_precision/steps}, ngcg : {mean_ndcg/steps}, recall : {mean_recall/steps}, MAP : {mean_ap/steps}, episode_reward : {episode_reward}')
        print()
    
    return mean_precision/steps, mean_ndcg/steps, mean_recall/steps, mean_ap/steps

def calculate_ndcg(rel, irel):
    dcg = 0
    idcg = 0
    rel = [1 if r>0 else 0 for r in rel]
    for i, (r, ir) in enumerate(zip(rel, irel)):
        dcg += (r)/np.log2(i+2)
        idcg += (ir)/np.log2(i+2)
    return dcg, idcg

In [None]:
# get the saved model dir
saved_actor = './save_model/trail-2022-12-13-04/actor_10_fixed.h5'
saved_critic = './save_model/trail-2022-12-13-04/critic_10_fixed.h5'

In [None]:
tf.keras.backend.set_floatx('float64')

STATE_SIZE = 10
TOP_K = 10 

sum_precision = 0
sum_ndcg = 0
sum_recall = 0
sum_map = 0

end_evaluation = 10

for i, user_id in enumerate(eval_users_dict.keys()):
    env = OfflineEnv(eval_users_dict, eval_users_history_lens, movies_id_to_movies, STATE_SIZE, fix_user_id=user_id)
    recommender = DDPGAgent(env, users_num, movies_num, STATE_SIZE)
    recommender.actor.build_networks()
    recommender.critic.build_networks()
    recommender.load_model(saved_actor, saved_critic)

    precision, ndcg, recall, map = evaluate(recommender, env, check_movies=True, top_k=TOP_K) # if check movies is true, you can check the recommended movies
    sum_precision += precision
    sum_ndcg += ndcg
    sum_recall += recall
    sum_map += map
    
    if i > end_evaluation:
        break
    
print(f'precision@{TOP_K} : {sum_precision/len(eval_users_dict)}, ndcg@{TOP_K} : {sum_ndcg/len(eval_users_dict)}, recall@{TOP_K} : {sum_recall/len(eval_users_dict)}, MAP@{TOP_K} : {sum_map/len(eval_users_dict)}')