In [None]:
#import library

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [None]:
raw_data = pd.read_csv('eco_rating.csv')
data = raw_data[:800]
data

Unnamed: 0,user_id,place_id,user_rating
0,1,28,2
1,1,86,4
2,1,94,4
3,1,146,3
4,2,17,2
...,...,...,...
795,145,170,3
796,145,13,3
797,145,35,2
798,146,40,5


In [None]:
train, test = train_test_split(data, test_size=0.2, random_state=42, shuffle=True)

In [None]:
# ini data training (80%) sebanyak 640 data

train

Unnamed: 0,user_id,place_id,user_rating
264,49,147,3
615,111,118,3
329,61,105,3
342,63,93,4
394,72,155,5
...,...,...,...
71,13,54,3
106,21,90,2
270,50,162,2
435,79,75,5


In [None]:
# ini data test (20%) sebanyak 160 data

test

Unnamed: 0,user_id,place_id,user_rating
696,127,3,3
667,121,76,2
63,12,47,4
533,97,27,5
66,12,156,3
...,...,...,...
589,106,62,4
798,146,40,5
744,136,129,5
513,93,64,3


In [None]:
#menghitung total jumlah user dan item yang unique

number_users = data['user_id'].nunique()
number_items = data['place_id'].nunique()

number_users, number_items


(146, 181)

In [None]:
#create model NCF

user_input = tf.keras.layers.Input(shape=(1,))
item_input = tf.keras.layers.Input(shape=(1,))

user_embedding = tf.keras.layers.Embedding(number_users + 10, 16)(user_input)
item_embedding = tf.keras.layers.Embedding(number_items + 10, 16)(item_input)

user_flat = tf.keras.layers.Flatten()(user_embedding)
item_flat = tf.keras.layers.Flatten()(item_embedding)

concat = tf.keras.layers.Concatenate()([user_flat, item_flat])
dropout_1 = tf.keras.layers.Dropout(0.5)(concat)
dense_1 = tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(0.01))(dropout_1)
dropout_2 = tf.keras.layers.Dropout(0.3)(dense_1)
dense_2 = tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.L2(0.01))(dropout_2)
output = tf.keras.layers.Dense(1)(dense_2)

model = tf.keras.models.Model(inputs=[user_input, item_input], outputs=output)

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 input_4 (InputLayer)        [(None, 1)]                  0         []                            
                                                                                                  
 embedding_2 (Embedding)     (None, 1, 16)                2496      ['input_3[0][0]']             
                                                                                                  
 embedding_3 (Embedding)     (None, 1, 16)                3056      ['input_4[0][0]']             
                                                                                            

In [None]:
#model compiler with optimizer and loss mse

model.compile(optimizer='rmsprop', loss='mean_squared_error')

In [None]:
#training data with model

model.fit([train['user_id'], train['place_id']], train['user_rating'], epochs=100, batch_size=32, validation_split=0.2, verbose=2)

Epoch 1/100
16/16 - 2s - loss: 12.4589 - val_loss: 9.0308 - 2s/epoch - 94ms/step
Epoch 2/100
16/16 - 0s - loss: 7.0879 - val_loss: 3.9458 - 77ms/epoch - 5ms/step
Epoch 3/100
16/16 - 0s - loss: 3.0148 - val_loss: 2.0850 - 74ms/epoch - 5ms/step
Epoch 4/100
16/16 - 0s - loss: 2.0596 - val_loss: 1.9604 - 59ms/epoch - 4ms/step
Epoch 5/100
16/16 - 0s - loss: 1.8914 - val_loss: 1.8391 - 75ms/epoch - 5ms/step
Epoch 6/100
16/16 - 0s - loss: 1.6839 - val_loss: 1.8351 - 61ms/epoch - 4ms/step
Epoch 7/100
16/16 - 0s - loss: 1.6325 - val_loss: 1.7128 - 59ms/epoch - 4ms/step
Epoch 8/100
16/16 - 0s - loss: 1.4907 - val_loss: 1.6312 - 58ms/epoch - 4ms/step
Epoch 9/100
16/16 - 0s - loss: 1.3563 - val_loss: 1.6123 - 74ms/epoch - 5ms/step
Epoch 10/100
16/16 - 0s - loss: 1.3263 - val_loss: 1.6947 - 59ms/epoch - 4ms/step
Epoch 11/100
16/16 - 0s - loss: 1.2319 - val_loss: 1.5689 - 78ms/epoch - 5ms/step
Epoch 12/100
16/16 - 0s - loss: 1.1637 - val_loss: 1.6477 - 74ms/epoch - 5ms/step
Epoch 13/100
16/16 - 0s -

<keras.src.callbacks.History at 0x7cb2446351b0>

In [None]:
#evaluate model with test data - MAE

result_mae = model.evaluate([test['user_id'], test['place_id']], test['user_rating'])
print(f'MAE: {result_mae}')

MAE: 1.67837655544281


In [None]:
#predict data in row 801

raw_data_predict = raw_data.iloc[801]
user_id = raw_data_predict['user_id']
item_id = raw_data_predict['place_id']
rating = raw_data_predict['user_rating']

y_predict = model.predict([np.array([user_id]), np.array([item_id])])
print(f'\nNilai prediksi rating untuk USER_ID={user_id} dan ITEM_ID={item_id} adalah: {y_predict[0][0]} \ndengan data sebenarnya yaitu {rating}')


Nilai prediksi rating untuk USER_ID=146 dan ITEM_ID=114 adalah: 2.761202096939087 
dengan data sebenarnya yaitu 3


In [None]:
#Nilai MAE(Mean Absolute Error) dan RMSE

mae = mean_absolute_error([rating], y_predict.flatten())
print(f'MAE: {mae}')

rmse = np.sqrt(mean_squared_error([rating], y_predict.flatten()))
print(f'RMSE: {rmse}')

MAE: 0.23879790306091309
RMSE: 0.23879790306091309
