## Dataset

In [None]:
!unzip sample_data/ml-100k.zip -d sample_data

Archive:  sample_data/ml-100k.zip
   creating: sample_data/ml-100k/
  inflating: sample_data/ml-100k/allbut.pl  
  inflating: sample_data/ml-100k/mku.sh  
  inflating: sample_data/ml-100k/README  
  inflating: sample_data/ml-100k/u.data  
  inflating: sample_data/ml-100k/u.genre  
  inflating: sample_data/ml-100k/u.info  
  inflating: sample_data/ml-100k/u.item  
  inflating: sample_data/ml-100k/u.occupation  
  inflating: sample_data/ml-100k/u.user  
  inflating: sample_data/ml-100k/u1.base  
  inflating: sample_data/ml-100k/u1.test  
  inflating: sample_data/ml-100k/u2.base  
  inflating: sample_data/ml-100k/u2.test  
  inflating: sample_data/ml-100k/u3.base  
  inflating: sample_data/ml-100k/u3.test  
  inflating: sample_data/ml-100k/u4.base  
  inflating: sample_data/ml-100k/u4.test  
  inflating: sample_data/ml-100k/u5.base  
  inflating: sample_data/ml-100k/u5.test  
  inflating: sample_data/ml-100k/ua.base  
  inflating: sample_data/ml-100k/ua.test  
  inflating: sample_data/ml-

In [None]:
import pandas as pd
import numpy as np
import os
import time
from datetime import datetime
import json

dir = 'sample_data/ml-100k'
col_names = ['user id', 'item id', 'rating', 'timestamp']
data = pd.read_csv(os.path.join(dir, 'u.data'), delimiter='\t', names=col_names, header=None)
data['timestamp'] = data['timestamp'].apply(lambda x: datetime.fromtimestamp(x))

with open(os.path.join(dir, 'u.item'), encoding = "ISO-8859-1") as f:
  movie = pd.read_csv(f, delimiter='|', header=None)

movie.columns = ['item id', 'title' ,'release date','video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
 'Animation', 'Children\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy',
 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

with open(os.path.join(dir, 'u.user'), encoding = "ISO-8859-1") as f:
  user = pd.read_csv(f, delimiter='|', header=None)

user.columns = ['user id', 'age', 'gender', 'occupation', 'zip code']

ratings = data.merge(movie[['item id', 'title']], on='item id')

ratings['like'] = ratings['rating'] > 3

In [None]:
ratings.sort_values(by=['user id'], ascending=[True]).head(10)

Unnamed: 0,user id,item id,rating,timestamp,title,like
43606,1,12,5,1997-11-03 07:42:40,"Usual Suspects, The (1995)",True
79998,1,254,1,1997-11-03 07:16:32,Batman & Robin (1997),False
35906,1,189,3,1998-03-01 06:15:28,"Grand Day Out, A (1992)",False
69127,1,87,5,1997-11-03 07:52:21,Searching for Bobby Fischer (1993),True
51383,1,187,4,1997-09-22 22:01:18,"Godfather: Part II, The (1974)",True
71026,1,180,3,1997-09-24 03:42:53,Apocalypse Now (1979),False
97387,1,46,4,1997-10-15 05:27:10,Exotica (1994),True
6611,1,201,3,1997-11-03 07:42:40,Evil Dead II (1987),False
36126,1,64,5,1997-09-24 03:40:04,"Shawshank Redemption, The (1994)",True
6751,1,241,4,1997-11-03 07:45:33,"Last of the Mohicans, The (1992)",True


In [None]:
train_ratio = 0.9
train_size = int(len(ratings)*train_ratio)
ratings_train = ratings.sample(train_size, random_state=42)
ratings_test = ratings[~ratings.index.isin(ratings_train.index)]

## Recall (MF)

In [None]:
pip install implicit

Collecting implicit
  Downloading implicit-0.7.2-cp310-cp310-manylinux2014_x86_64.whl (8.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m16.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: implicit
Successfully installed implicit-0.7.2


In [None]:
from scipy.sparse import csr_matrix

n_users = ratings_train['user id'].max()
n_item = ratings_train['item id'].max()
ratings_train_pos = ratings_train[ratings_train['like']]
ratings_test_pos = ratings_test[ratings_test['like']]


row=ratings_train_pos['user id'].values - 1
col=ratings_train_pos['item id'].values - 1
data=np.ones(len(ratings_train_pos))
user_item_data = csr_matrix((data, (row, col)), shape=(n_users, n_item))

In [None]:
import implicit

# initialize a model
model = implicit.als.AlternatingLeastSquares(factors=50, random_state=42)

# train the model on a sparse matrix of user/item/confidence weights
model.fit(user_item_data)

  0%|          | 0/15 [00:00<?, ?it/s]

In [None]:
from sklearn.metrics import dcg_score, ndcg_score

def precision_k(actuals, recs, k=5):
  return len(set(recs[0:k]).intersection(set(actuals)))/k

def recall_k(actuals, recs, k=5):
  return len(set(recs[0:k]).intersection(set(actuals)))/len(actuals)

def dcg_k(actuals, recs, k=5):
  relevance = np.array([[float(i in actuals) for i in recs[0:k]]])
  score = k - np.arange(k)
  return dcg_score(relevance, score.reshape(1,-1), k=k)

def ndcg_k(actuals, recs, k=5):
  relevance = np.array([[float(i in actuals) for i in recs[0:k]]])
  score = k - np.arange(k)
  return ndcg_score(relevance, score.reshape(1,-1), k=k)

def recall_stage(model, user_id, user_item_data, ratings_train, N):
  filter_items = ratings_train[ratings_train['user id']==user_id]['item id'].values
  filter_items = filter_items - 1
  user_id = user_id - 1

  recs, scores = model.recommend(user_id,
                                 user_item_data[user_id],
                                 filter_items=filter_items,
                                 N=N_recall)
  recs = recs.flatten() + 1
  return recs

def evaluate(user_id, ratings_test_pos, recs, k=5):
  actuals = ratings_test_pos[ratings_test_pos['user id']==user_id]['item id'].values
  return precision_k(actuals, recs, k), recall_k(actuals, recs, k), dcg_k(actuals, recs, k)

In [None]:
# recommend items for a user
N_recall=30
user_id=1
recs = recall_stage(model, user_id, user_item_data, ratings_train, N_recall)
evaluate(user_id, ratings_test_pos, recs, 20)

(0.2, 0.26666666666666666, 1.8389804011272912)

## Ranking (GPT)

In [None]:
pip install langchain openai==0.27.0

Collecting langchain
  Downloading langchain-0.1.0-py3-none-any.whl (797 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m798.0/798.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting openai==0.27.0
  Downloading openai-0.27.0-py3-none-any.whl (70 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m70.1/70.1 kB[0m [31m4.8 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.3-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.9 (from langchain)
  Downloading langchain_community-0.0.11-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1.7 (from langchain)
  Downloading langchain_core-0.1.8-py3-none-any.whl (215 kB)
[

In [None]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate
from langchain.chains import LLMChain
import openai
import os
from google.colab import userdata

if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
llm_model = "gpt-3.5-turbo"
llm = ChatOpenAI(temperature=0.0, model=llm_model)

prompt = ChatPromptTemplate.from_template(
"""The person has a list of liked movies: {movies_liked}. \
The person has a list of disliked movies: {movies_disliked}. \
Tell me if this person likes each of the candidate movies: {movies_candidates}.\
Return a list of boolean values and explain why the person likes or dislikes.

<< FORMATTING >>
Return a markdown code snippet with a list of JSON object formatted to look like:
{{
    "title": string \ the name of the movie in candidate movies
    "like": boolean \ true or false
    "explanation": string \ explain why the person likes or dislikes the candidate movie
}}


REMEMBER: Each boolean and explanation for each element in candidate movies.
REMEMBER: The explanation must relate to the person's liked and disliked movies.
"""
)

chain = LLMChain(llm=llm, prompt=prompt)

  warn_deprecated(


In [None]:
def ranking_stage(chain, user_id, ratings_train, pre_recs, movie, batch_size=10):

  few_shot = ratings_train[(ratings_train['user id']==user_id)]
  if len(few_shot) >= 300:
    few_shot = few_shot.sample(300, random_state=42)
  recall_recs = movie.set_index('item id').loc[pre_recs].reset_index()

  movies_liked = ','.join(few_shot[few_shot['like']]['title'].values.tolist())
  movies_disliked = ','.join(few_shot[~few_shot['like']]['title'].values.tolist())

  n_batch = int(np.ceil(len(recall_recs)/batch_size))
  candidates = recall_recs[['item id', 'title']]
  result_json = []

  for i in range(n_batch):
    candidates_batch = candidates.iloc[i*batch_size: (i+1)*batch_size]
    movies_candidates = ','.join(candidates_batch['title'].values.tolist())
    result = chain.run(movies_liked=movies_liked, movies_disliked=movies_disliked, movies_candidates=movies_candidates)
    result_list = result.replace('\n', '').replace('},', '}\n,').split('\n,')
    result_json_batch = [json.loads(i) for i in result_list]
    result_json = result_json + result_json_batch

  result_rank = pd.DataFrame.from_dict(result_json)
  result_rank['item id'] = recall_recs['item id'].values
  result_rank = pd.concat([result_rank[result_rank['like']], result_rank[~result_rank['like']]])

  return result_rank

In [None]:
rank_result = ranking_stage(chain, user_id, ratings_train, recs, movie)
rank_recs = rank_result['item id'].values

p, r, ndcg = evaluate(user_id, ratings_test_pos, rank_recs, k=5)

In [None]:
k=20
evaluate(user_id, ratings_test_pos, recs, k), evaluate(user_id, ratings_test_pos, rank_recs, k)

((0.2, 0.26666666666666666, 1.8389804011272912),
 (0.25, 0.3333333333333333, 2.083630943245517))

## Evaluation


In [None]:
import joblib

In [None]:
ratings_train_pos['user id'].nunique(), ratings_test_pos['user id'].nunique()

(942, 854)

In [None]:
set(ratings_train_pos['user id'].unique()).issuperset(set(ratings_test_pos['user id'].unique()))

True

### Generate recs for sample users

In [None]:
np.random.seed(42)
eval_users = np.random.choice(ratings_test_pos['user id'].sort_values().unique(), 20, replace=False)
eval_users

array([ 73, 474, 221, 236, 875, 591, 308, 325, 399, 891, 776, 695, 771,
       345, 342, 568, 483, 628, 450, 929])

Get recall recs...

In [None]:
N_recall = 30
recall_results_dict = {}
for user_id in eval_users:
  recs = recall_stage(model, user_id, user_item_data, ratings_train, N_recall)
  recall_results_dict.update({user_id: recs})

path = 'drive/MyDrive/colab/'
joblib.dump(recall_results_dict, path+'recall_results_dict.pkl')

['drive/MyDrive/colab/recall_results_dict.pkl']

Get ranking recs...

In [None]:
rank_results_dict = {}
rank_results_raw = pd.DataFrame()

eval_recall = []
eval_rank = []

for user_id in eval_users:
  print(np.where(eval_users==user_id)[0][0], user_id)
  recs = recall_results_dict.get(user_id)
  rank_result = ranking_stage(chain, user_id, ratings_train, recs, movie)
  rank_result['user id'] = user_id
  rank_result['rank'] = list(range(len(rank_result)))

  rank_results_dict.update({user_id: rank_result['item id'].values})
  rank_results_raw = pd.concat([rank_results_raw, rank_result])

  k=20
  p, r, n = evaluate(user_id, ratings_test_pos, recs, k)
  eval_recall.append({'p': p, 'r': r, 'n': n})
  rank_recs = rank_results_dict.get(user_id)
  p_rank, r_rank, n_rank = evaluate(user_id, ratings_test_pos, rank_result['item id'].values, k)
  eval_rank.append({'p': p_rank, 'r': r_rank, 'n': n_rank})

  eval_recall_df = pd.DataFrame.from_records(eval_recall)
  eval_rank_df = pd.DataFrame.from_records(eval_rank)

  print(eval_recall_df.mean().values, eval_rank_df.mean().values)

joblib.dump(rank_results_dict, path+'rank_results_dict.pkl')
rank_results_raw.to_csv(path+'rank_results_raw.csv', index=False)

0 73
[0.05       0.2        0.23981247] [0.05 0.2  0.25]
1 474
[0.15       0.225      1.21504605] [0.15       0.225      1.21599661]
2 221
[0.1       0.15      0.8100307] [0.1        0.15       0.81066441]
3 236
[0.075      0.1125     0.60752302] [0.075      0.1125     0.60799831]
4 875
[0.08       0.19       0.60027205] [0.07       0.14       0.54949162]
5 591
[0.075      0.21388889 0.56470217] [0.06666667 0.17222222 0.52968911]
6 308
[0.07142857 0.19285714 0.53491718] [0.07142857 0.16666667 0.54980338]
7 325
[0.0875     0.24017857 0.67777413] [0.08125    0.19940476 0.69116253]
8 399
[0.07777778 0.21349206 0.6024659 ] [0.07222222 0.17724868 0.61436669]
9 891
[0.08       0.25880952 0.68090459] [0.075      0.22619048 0.69599768]
10 776
[0.08181818 0.26125541 0.68515601] [0.08181818 0.24458874 0.73722856]
11 695
[0.07916667 0.26031746 0.65057952] [0.08333333 0.26587302 0.71986595]
12 771
[0.07692308 0.25567766 0.6236911 ] [0.08846154 0.29157509 0.73627154]
13 345
[0.08214286 0.25272109 0

### Consolidate results

In [None]:
import joblib
path = 'drive/MyDrive/colab/'

recall_results_dict = joblib.load(path+'recall_results_dict.pkl')
rank_results_dict = joblib.load(path+'rank_results_dict.pkl')
rank_results_raw = pd.read_csv(path+'rank_results_raw.csv')

eval_users = list(rank_results_dict.keys())

In [None]:
pd.set_option("display.precision", 4)

k_val=[5, 10, 15, 20]
eval_results = pd.DataFrame({'Method': ['MF', 'MF+GPT']})

for k in k_val:

  eval_recall = []
  eval_rank = []
  eval_all = []

  for user_id in eval_users:

    recall_recs = recall_results_dict.get(user_id)
    p, r, d = evaluate(user_id, ratings_test_pos, recall_recs, k)
    eval_recall.append({'p': p, 'r': r, 'd': d})

    rank_recs = rank_results_dict.get(user_id)
    p_rank, r_rank, d_rank = evaluate(user_id, ratings_test_pos, rank_recs, k)
    eval_rank.append({'p': p_rank, 'r': r_rank, 'd': d_rank})

  eval_recall = pd.DataFrame.from_records(eval_recall).mean().round(4)
  eval_rank = pd.DataFrame.from_records(eval_rank).mean().round(4)
  eval_all = pd.DataFrame({'P@'+str(k): [eval_recall['p'], eval_rank['p']],
                'R@'+str(k): [eval_recall['r'], eval_rank['r']],
                'DCG@'+str(k): [eval_recall['d'], eval_rank['d']]})

  eval_results = pd.concat([eval_results, eval_all], axis=1)

eval_results.to_csv(path+'eval.csv', index=False)
eval_results

Unnamed: 0,Method,P@5,R@5,DCG@5,P@10,R@10,DCG@10,P@15,R@15,DCG@15,P@20,R@20,DCG@20
0,MF,0.13,0.1299,0.3948,0.115,0.2117,0.5583,0.1067,0.2661,0.6768,0.095,0.3092,0.7473
1,MF+GPT,0.16,0.1502,0.4823,0.125,0.2176,0.6223,0.1133,0.2673,0.7405,0.1025,0.317,0.8225


## Explanation Example

In [None]:
user_id = eval_users[1]
recall_results = pd.DataFrame({'item id':recall_results_dict.get(user_id),
                               'MF rank': np.arange(len(recall_results_dict.get(user_id)))})
actuals = ratings_test[ratings_test['user id']==user_id]
actuals = actuals[actuals['like']]['item id'].values

In [None]:
rank_result = rank_results_raw[rank_results_raw['user id']==user_id].drop(columns=['user id'])
rank_result.rename(columns={'rank': 'MF+GPT rank', 'like': 'GPT like'}, inplace=True)
rank_result = rank_result.merge(recall_results, on='item id')
rank_result['actual like'] = rank_result['item id'].isin(actuals)
cols = ['title', 'explanation', 'MF+GPT rank']
rank_result[cols].to_csv(path+'user_1_demo.csv', index=False)
rank_result[cols]

Unnamed: 0,title,explanation,MF+GPT rank
0,"Right Stuff, The (1983)",The person likes this movie because they enjoy...,0
1,His Girl Friday (1940),The person likes this movie because they appre...,1
2,Harold and Maude (1971),The person likes this movie because they enjoy...,2
3,Sling Blade (1996),The person likes this movie because they appre...,3
4,Good Will Hunting (1997),The person likes this movie because they enjoy...,4
5,My Life as a Dog (Mitt liv som hund) (1985),The person likes this movie because they appre...,5
6,"Birds, The (1963)",The person likes this movie because they enjoy...,6
7,Mr. Holland's Opus (1995),The person likes this movie because they appre...,7
8,"Full Monty, The (1997)","The person likes 'Full Monty, The (1997)' beca...",8
9,Much Ado About Nothing (1993),The person likes 'Much Ado About Nothing (1993...,9
