In [13]:
import cornac
import pandas as pd

from cornac.data.reader import read_text

In [14]:
cornac.__version__

'1.14.2'

In [15]:
REVIEWS_FILE_PATH = "../../data/original/wine_reviews.csv"
TRAINING_FILE_PATH = "../../data/train_ratings_seen.csv"

In [16]:
reader = cornac.data.Reader()
ratings = reader.read(TRAINING_FILE_PATH, "UIR", sep=",", skip_lines=1)

In [17]:
train_df = pd.DataFrame(ratings, columns=["user_id", "item_id", "ratings"])
count_user = train_df.copy()
count_user["count"] = 1
counted = count_user[["user_id", "count"]].groupby(["user_id"]).sum().reset_index()
to_remove_users = counted[counted["count"] < 3]["user_id"].unique()

In [26]:
len(train_df["item_id"].unique())

39520

In [18]:
len(ratings)

119922

In [19]:
ratings = [r for r in ratings if r[0] not in to_remove_users]
len(ratings)

119921

In [22]:
train_set = set([(r[0], r[1]) for r in ratings])

review_df = pd.read_csv(REVIEWS_FILE_PATH)

reviews = []
for r in review_df.itertuples():
  tup = (str(r[1]), str(r[2]))
  if tup in train_set:
    reviews.append((str(r[1]), str(r[2]), str(r[3])))
    train_set.remove(tup)

for ts in train_set:
  reviews.append((ts[0], ts[1], 'dummy'))

In [33]:
review_modality = cornac.data.ReviewModality(
    data=reviews,
    tokenizer=cornac.data.text.BaseTokenizer(stop_words="english"),
    max_vocab=3000,
    max_doc_freq=0.5,
)

In [35]:
VERBOSE = True
SEED = 42

eval = cornac.eval_methods.StratifiedSplit(
    ratings,
    group_by="user",
    fmt="UIR",
    test_size=0.1,
    rating_threshold=1.0,
    exclude_unknowns=True,
    seed=SEED,
    verbose=VERBOSE,
    review_text=review_modality,
)

pretrained_word_embeddings = {}

narre = cornac.models.NARRE(
    embedding_size=100,
    id_embedding_size=32,
    n_factors=32,
    attention_size=16,
    kernel_sizes=[3],
    n_filters=64,
    dropout_rate=0.5,
    max_text_length=50,
    batch_size=64,
    max_iter=10,
    init_params={'pretrained_word_embeddings': pretrained_word_embeddings},
    verbose=VERBOSE,
    seed=SEED,
)

# Define metrics
metrics = [
    cornac.metrics.RMSE(),
    cornac.metrics.FMeasure(k=20),
    cornac.metrics.AUC(),
    cornac.metrics.MRR(),
    cornac.metrics.NCRR(k=20),
    cornac.metrics.NDCG(k=20),
    cornac.metrics.Recall(k=20)
]

cornac.Experiment(
    eval_method=eval,
    models=[narre],
    metrics=metrics,
    user_based=True,
    verbose=VERBOSE,
).run()

rating_threshold = 1.0
exclude_unknowns = True
---
Training data:
Number of users = 5065
Number of items = 36663
Number of ratings = 105949
Max rating = 5.0
Min rating = 1.0
Global mean = 3.9
---
Test data:
Number of users = 4725
Number of items = 6104
Number of ratings = 11044
Number of unknown users = 0
Number of unknown items = 0
---
Total users = 5065
Total items = 36663

[NARRE] Training started!
Number of OOV words: 404
Model: "model_7"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_user_review (InputLayer)  [(None, None, 50)]   0                                            
__________________________________________________________________________________________________
input_item_review (InputLayer)  [(None, None, 50)]   0                                            
__________________________________________________________

100%|██████████| 10/10 [7:09:40<00:00, 2578.05s/it, best_epoch=None, best_val_loss=1e+9, loss=0.463, val_loss=0] 


Learning completed!

[NARRE] Evaluation started!


Rating: 100%|██████████| 11044/11044 [00:00<00:00, 26066.96it/s]
Ranking: 100%|██████████| 4725/4725 [01:26<00:00, 54.90it/s]


AttributeError: Can't pickle local object 'make_gradient_clipnorm_fn.<locals>.<lambda>'

In [52]:
cornac.Experiment(
    eval_method=eval,
    models=[narre],
    metrics=metrics,
    user_based=True,
    verbose=VERBOSE,
).run()


[NARRE] Training started!

[NARRE] Evaluation started!


Rating: 100%|██████████| 11044/11044 [00:00<00:00, 36901.14it/s]
Ranking: 100%|██████████| 4725/4725 [01:09<00:00, 68.12it/s]


TEST:
...
      |   RMSE |    AUC |  F1@20 |    MRR | NCRR@20 | NDCG@20 | Recall@20 | Train (s) | Test (s)
----- + ------ + ------ + ------ + ------ + ------- + ------- + --------- + --------- + --------
NARRE | 0.5749 | 0.5065 | 0.0001 | 0.0003 |  0.0000 |  0.0001 |    0.0003 |    0.0009 |  70.4030






In [51]:
narre.trainable = False

In [49]:
len(i)

36663

In [50]:
len(j)

36663

In [73]:
narre.rev

(32, 1)