In [1]:
import os
import sys
import numpy as np
import pandas as pd
import tensorflow as tf
from scipy.sparse import csr_matrix
from numpy import random as npr
from sklearn.model_selection import train_test_split

module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from modules import models


In [2]:
SEED = 2021
DATASETS_PATH = "../../datasets"
npr.seed(SEED)

In [3]:
rating_df = pd.read_table(f"{DATASETS_PATH}/ml-1m/ratings.dat", sep="::", names=["user_id", "item_id", "rating", "timestamp"], engine='python')
rating_df["item_id"] = rating_df["item_id"].astype('category').cat.codes
rating_df["user_id"] = rating_df["user_id"].astype('category').cat.codes
rating_df["rating"] = rating_df["rating"] / 5
rating_df.head()

DeepCTR-PyTorch version 0.2.7 detected. Your version is 0.2.5.
Use `pip install -U deepctr-torch` to upgrade.Changelog: https://github.com/shenweichen/DeepCTR-Torch/releases/tag/v0.2.7


Unnamed: 0,user_id,item_id,rating,timestamp
0,0,1104,1.0,978300760
1,0,639,0.6,978302109
2,0,853,0.6,978301968
3,0,3177,0.8,978300275
4,0,2162,1.0,978824291


In [4]:
rating_df.astype("category").describe()

Unnamed: 0,user_id,item_id,rating,timestamp
count,1000209,1000209,1000209.0,1000209
unique,6040,3706,5.0,458455
top,4168,2651,0.8,975528402
freq,2314,3428,348971.0,30


In [5]:
rating_df.max()

user_id      6.039000e+03
item_id      3.705000e+03
rating       1.000000e+00
timestamp    1.046455e+09
dtype: float64

In [6]:
def transform_long_table_to_sparse_matrix(df, test_size):
    n_users = df.user_id.unique().shape[0]
    n_items = df.item_id.unique().shape[0]

    train_data, test_data = train_test_split(df, test_size=test_size)
    train_data = pd.DataFrame(train_data)
    test_data = pd.DataFrame(test_data)

    train_row = []
    train_col = []
    train_rating = []

    for line in train_data.itertuples():
        u = line[1]
        i = line[2]
        train_row.append(u)
        train_col.append(i)
        train_rating.append(line[3])
    train_matrix = csr_matrix((train_rating, (train_row, train_col)), shape=(n_users, n_items))

    test_row = []
    test_col = []
    test_rating = []
    for line in test_data.itertuples():
        test_row.append(line[1])
        test_col.append(line[2])
        test_rating.append(line[3])
    test_matrix = csr_matrix((test_rating, (test_row, test_col)), shape=(n_users, n_items))
    print("Load data finished. Number of users:", n_users, "Number of items:", n_items)
    return train_matrix.todok(), test_matrix.todok(), n_users, n_items

In [7]:
def train_test_autorec(rating_df, **kwargs):
    train_matrix, test_matrix, n_users, n_items = transform_long_table_to_sparse_matrix(rating_df, 0.1)
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.compat.v1.Session(config=config) as sess:
        model = models.IAutoRec(sess, n_users, n_items, epoch=500)
        model.build_network()
        print("Network built")
        log = model.execute(train_matrix, test_matrix)
    return model, log

In [8]:
%%time
npr.seed(SEED)

autorec_model, autorec_log = train_test_autorec(rating_df)

Load data finished. Number of users: 6040 Number of items: 3706
IAutoRec.
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


2021-08-07 16:45:52.073064: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 AVX512F FMA
2021-08-07 16:45:52.079228: I tensorflow/core/platform/profile_utils/cpu_utils.cc:102] CPU Frequency: 2494090000 Hz
2021-08-07 16:45:52.080387: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f9fb0000b60 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-08-07 16:45:52.080411: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
2021-08-07 16:45:52.082578: W tensorflow/stream_executor/platform/default/dso_loader.cc:55] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2021-08-07 16:45:52.082601: E tensorflow/stream_executor/cuda/cuda_driver.cc:313] failed call to cuInit: UNKNOWN ERROR (303)
2021-08-07 16:45:52.082627: I tenso

Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Network built
