In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split

In [2]:
search_history = pd.read_csv('csv/search_history.csv')

In [3]:
search_history['item_id'] = search_history['search'].astype('category').cat.codes

In [4]:
user_ids = search_history['user_id'].values
item_ids = search_history['item_id'].values

train_data, test_data = train_test_split(search_history, test_size=0.2, random_state=30)

train_user_ids = train_data['user_id'].values
train_item_ids = train_data['item_id'].values

test_user_ids = test_data['user_id'].values
test_item_ids = test_data['item_id'].values

In [10]:
num_users = search_history['user_id'].nunique()
num_items = search_history['item_id'].nunique()

input_users = tf.keras.layers.Input(shape=(1,), name='user_input')
input_items = tf.keras.layers.Input(shape=(1,), name='item_input')

embed_users = tf.keras.layers.Embedding(input_dim=num_users, output_dim=16, name='user_embedding')(input_users)
embed_items = tf.keras.layers.Embedding(input_dim=num_items, output_dim=16, name='item_embedding')(input_items)

flat_users = tf.keras.layers.Flatten()(embed_users)
flat_items = tf.keras.layers.Flatten()(embed_items)

concat = tf.keras.layers.Concatenate()([flat_users, flat_items])
dense = tf.keras.layers.Dense(16, activation='relu')(concat)
dense = tf.keras.layers.Dense(8, activation='relu')(dense)
output = tf.keras.layers.Dense(1, activation='sigmoid')(dense)

model = tf.keras.Model([input_users, input_items], output)

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 user_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 item_input (InputLayer)     [(None, 1)]                  0         []                            
                                                                                                  
 user_embedding (Embedding)  (None, 1, 16)                3104      ['user_input[0][0]']          
                                                                                                  
 item_embedding (Embedding)  (None, 1, 16)                320       ['item_input[0][0]']          
                                                                                            

In [11]:
train_labels = np.ones(len(train_user_ids))
test_labels = np.ones(len(test_item_ids))

model.fit([train_user_ids, train_item_ids], train_labels, validation_data=([test_user_ids, test_item_ids], test_labels), epochs=10, batch_size=2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.src.callbacks.History at 0x7d4a2fd8f9a0>