In [48]:
import pandas as pd
import numpy as np
from zipfile import ZipFile
import tensorflow as tf
tf.config.run_functions_eagerly(True)
from tensorflow import keras
from tensorflow.keras import layers
from pathlib import Path
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [49]:
df= pd.read_csv('data/users-score-2023.csv', usecols=["user_id","anime_id","rating"])
print("Shape of the Dataset:",df.shape)
df.head()

Shape of the Dataset: (24325191, 3)


Unnamed: 0,user_id,anime_id,rating
0,1,21,9
1,1,48,7
2,1,320,5
3,1,49,8
4,1,304,8


In [50]:
user_enc = LabelEncoder()
df['user'] = user_enc.fit_transform(df['user_id'].values)
n_users = df['user'].nunique()
item_enc = LabelEncoder()
df['anime'] = item_enc.fit_transform(df['anime_id'].values)
n_movies = df['anime'].nunique()
df['rating'] = df['rating'].values.astype(np.float32)
min_rating = min(df['rating'])
max_rating = max(df['rating'])

print(
    "Number of users: {}, Number of Movies: {}, Min rating: {}, Max rating: {}".format(
        n_users, n_movies, min_rating, max_rating
    )
)

Number of users: 270033, Number of Movies: 16500, Min rating: 1.0, Max rating: 10.0


In [51]:
df.columns

Index(['user_id', 'anime_id', 'rating', 'user', 'anime'], dtype='object')

In [52]:
X = df[['user', 'anime']].values
y = df['rating'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((21892671, 2), (2432520, 2), (21892671,), (2432520,))

In [53]:
print(X_train[0:3])

[[221051   6988]
 [163968   7314]
 [166296    166]]


In [54]:
EMBEDDING_SIZE = 32

In [55]:
# def init_normal(shape, name=None):
#     return initializations.normal(shape, scale=0.01, name=name)

In [56]:
from keras.models import Model
from keras.layers import Input, Reshape, Dot, Embedding, Flatten, Concatenate, Dense
from keras.regularizers import l2

user_input = Input(shape=(1,))
user_embed = Embedding(n_users+1,
                       EMBEDDING_SIZE)(user_input) #vocab size for users = n unique users = n_users

anime_input = Input(shape=(1, ))
anime_embed = Embedding(n_movies+1,
                       EMBEDDING_SIZE)(anime_input) #vocab size for users = n unique users = n_users(anime_input) #vocab size for anime = n unique anime = n_movies

user_latent = Flatten()(user_embed)
anime_latent = Flatten()(anime_embed)
    
# The 0-th layer is the concatenation of embedding layers
vector = Concatenate(axis=1)([user_latent, anime_latent])

dense1 = keras.layers.Dense(20, activation='relu')(vector)
dense2 = keras.layers.Dense(10, activation='relu')(dense1)
output = Dense(1, activation='sigmoid')(dense2)

model = Model(inputs=[user_input, anime_input], outputs=output)

In [57]:
from keras.optimizers import Adam
opt = Adam(learning_rate=0.001)
model.compile(loss='binary_crossentropy', optimizer=opt)

In [59]:
tf.test.gpu_device_name()

''

In [61]:
history = model.fit([X_train[:,0], X_train[:,1]], y_train, epochs=5, batch_size=128, verbose=1)

Epoch 1/5
    54/171037 [..............................] - ETA: 9:58:53 - loss: -1809.5853

KeyboardInterrupt: 