In [1]:
import cornac
import pandas as pd

from cornac.data.reader import read_text

FM model is only supported on Linux.
Windows executable can be found at http://www.libfm.org.


In [2]:
cornac.__version__

'1.14.2'

In [3]:
TRAINING_FILE_PATH = "../../data/train_ratings_seen.csv"

In [5]:
reader = cornac.data.Reader()
ratings = reader.read(TRAINING_FILE_PATH, "UIR", sep=",", skip_lines=1)

In [6]:
len(ratings)

119922

In [7]:
train_df = pd.DataFrame(ratings, columns=["user_id", "item_id", "ratings"])
count_user = train_df.copy()
count_user["count"] = 1
counted = count_user[["user_id", "count"]].groupby(["user_id"]).sum().reset_index()
to_remove_users = counted[counted["count"] < 3]["user_id"].unique()

In [8]:
ratings = [r for r in ratings if r[0] not in to_remove_users]
len(ratings)

119921

In [9]:
VERBOSE = True
SEED = 42

ss = cornac.eval_methods.StratifiedSplit(
    ratings,
    group_by="user",
    fmt="UIR",
    test_size=0.1,
    rating_threshold=1.0,
    exclude_unknowns=False,
    seed=SEED,
    verbose=VERBOSE,
)

most_pop = cornac.models.MostPop()

# Define metrics
metrics = [
    cornac.metrics.RMSE(),
    cornac.metrics.FMeasure(k=20),
    cornac.metrics.AUC(),
    cornac.metrics.MRR(),
    cornac.metrics.NCRR(k=20),
    cornac.metrics.NDCG(k=20),
    cornac.metrics.Recall(k=20)
]

cornac.Experiment(
    eval_method=ss,
    models=[most_pop],
    metrics=metrics,
    user_based=True,
    verbose=VERBOSE,
    save_dir="./run",
).run()

rating_threshold = 1.0
exclude_unknowns = False
---
Training data:
Number of users = 5065
Number of items = 36663
Number of ratings = 105949
Max rating = 5.0
Min rating = 1.0
Global mean = 3.9
---
Test data:
Number of users = 5065
Number of items = 8961
Number of ratings = 13972
Number of unknown users = 0
Number of unknown items = 2857
---
Total users = 5065
Total items = 39520

[MostPop] Training started!

[MostPop] Evaluation started!


Rating: 100%|██████████| 13972/13972 [00:00<00:00, 59851.80it/s]
Ranking: 100%|██████████| 5065/5065 [00:23<00:00, 216.67it/s]


TEST:
...
        |   RMSE |    AUC |  F1@20 |    MRR | NCRR@20 | NDCG@20 | Recall@20 | Train (s) | Test (s)
------- + ------ + ------ + ------ + ------ + ------- + ------- + --------- + --------- + --------
MostPop | 1.2645 | 0.6449 | 0.0064 | 0.0212 |  0.0098 |  0.0146 |    0.0286 |    0.0110 |  24.3425






In [10]:
reader = cornac.data.Reader()
ratings = reader.read(TRAINING_FILE_PATH, "UIR", sep=",", skip_lines=1)

most_pop.fit(cornac.data.Dataset.from_uir(ratings))

<cornac.models.most_pop.recom_most_pop.MostPop at 0x7f9887c52cd0>

In [11]:
most_pop.save("../../data")

'../../data/MostPop/2022-06-21_20-50-16-307424.pkl'