# Building a Content-Based Filtering Model

1. Dataset
2. Data Preprocessing
3. Building a Content-Based Filtering Model

In [2]:

import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

## Dataset

In [3]:
# Define the number of users and items
num_users = 100
num_items = 50
# Generate random user-item interactions
user_ids = np.random.randint(0, num_users, size=1000)
item_ids = np.random.randint(0, num_items, size=1000)
# Simulate ratings or preferences (e.g., on a scale of 1 to 5)
ratings = np.random.randint(1, 6, size=1000)
# Create a pandas DataFrame for the dataset
dataset = pd.DataFrame({'user_id': user_ids, 'item_id': item_ids, 'rating': ratings})

## Data Preprocessing

In [4]:

# Encoding categorical variables (if applicable)
# Encoding categorical variables
label_encoder = LabelEncoder()
dataset['user_id'] = label_encoder.fit_transform(dataset['user_id'])
dataset['item_id'] = label_encoder.fit_transform(dataset['item_id'])
# Splitting the dataset into training and testing sets
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)

## Building a Content-Based Filtering Model

In [5]:

# Define the input layers
user_input = tf.keras.Input(shape=(1,), dtype=tf.int32, name='user_input')
item_input = tf.keras.Input(shape=(1,), dtype=tf.int32, name='item_input')
# Embedding layers for users and items
embedding_dim = 8
user_embedding = tf.keras.layers.Embedding(num_users, embedding_dim, name='user_embedding')(user_input)
item_embedding = tf.keras.layers.Embedding(num_items, embedding_dim, name='item_embedding')(item_input)
# Flatten the embeddings
user_flatten = tf.keras.layers.Flatten()(user_embedding)
item_flatten = tf.keras.layers.Flatten()(item_embedding)
# Concatenate the flattened embeddings
concat = tf.keras.layers.Concatenate()([user_flatten, item_flatten])
# Add a dense layer for prediction
output = tf.keras.layers.Dense(1, activation='linear')(concat)
# Create the model
model = tf.keras.Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer='adam', loss='mse')
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 user_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 item_input (InputLayer)        [(None, 1)]          0           []                               
                                                                                                  
 user_embedding (Embedding)     (None, 1, 8)         800         ['user_input[0][0]']             
                                                                                                  
 item_embedding (Embedding)     (None, 1, 8)         400         ['item_input[0][0]']             
                                                                                              