# Building a Collaborative Filtering Model

1. Dataset
2. Data Preprocessing
3. Building the Collaborative Filtering Model 
4. Evaluating the Model

In [2]:


import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

## Dataset

In [3]:


# Define the number of users and items
num_users = 100
num_items = 50
# Generate random user-item interactions
user_ids = np.random.randint(0, num_users, size=1000)
item_ids = np.random.randint(0, num_items, size=1000)
# Simulate ratings or preferences (e.g., on a scale of 1 to 5)
ratings = np.random.randint(1, 6, size=1000)
# Create a pandas DataFrame for the dataset
dataset = pd.DataFrame({'user_id': user_ids, 'item_id': item_ids, 'rating': ratings})

## Data Preprocessing

In [4]:


# Encoding categorical variables
label_encoder = LabelEncoder()
dataset['user_id'] = label_encoder.fit_transform(dataset['user_id'])
dataset['item_id'] = label_encoder.fit_transform(dataset['item_id'])
# Splitting the dataset into training and testing sets
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)



## Building the Collaborative Filtering Model

In [5]:
# Define the input layers
user_input = tf.keras.Input(shape=(1,), dtype=tf.int32, name='user_input')
item_input = tf.keras.Input(shape=(1,), dtype=tf.int32, name='item_input')

In [8]:
# Embedding layers for users and items
embedding_dim = 8
user_embedding = tf.keras.layers.Embedding(num_users, embedding_dim)(user_input)
item_embedding = tf.keras.layers.Embedding(num_items, embedding_dim)(item_input)

In [9]:
# Calculate similarities using dot product
user_item_sim = tf.keras.layers.Dot(axes=2, normalize=True)([user_embedding, item_embedding])

In [10]:
# Flatten the output
output = tf.keras.layers.Flatten()(user_item_sim)

## Evaluating the Model

In [11]:
# Create the model
model = tf.keras.Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer='adam', loss='mse')

In [9]:


# Train the model
model.fit([train_data['user_id'], train_data['item_id']], train_data['rating'], epochs=10, batch_size=32)
# Save the model
model.save('my_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:


# Make predictions

predictions = model.predict([test_data['user_id'], test_data['item_id']])





In [15]:
# Evaluate the model

mse = tf.keras.losses.mean_squared_error(test_data['rating'], predictions)

mse_value = mse.numpy()

print("Mean Squared Error:", mse_value)

Mean Squared Error: [12.325531  11.885969  12.436343  13.732897   8.046588  10.68853
 12.318252   9.104243   8.156378  12.613146   8.881644  11.503025
 12.866154  11.405117  10.952876   9.119992  11.183534   8.651856
 14.274805   9.283539  12.988406  13.202881   9.923576  11.062291
 10.530294   8.838253  10.9403105  8.695935  12.305611   9.067198
  9.206889  10.321973  12.547029  10.83472   11.047949   8.927082
 12.201172  13.35196   10.4475355 12.159759   7.876215  15.012119
 10.738297  12.41084   12.061172  12.055454  14.738361  13.77263
 10.451808  11.821681   9.832619  13.417811  11.995871  13.814663
 16.246521  16.07849   10.869428   7.4363346 11.979509  13.319485
 10.120524  13.413506  11.970356  12.305611  13.3474     9.436904
 11.862949  15.506072  11.814264  12.998291  12.762215   9.7165165
 13.019937  10.293037  13.956592   7.7596045 13.54307   10.776119
 15.47196   10.974573  10.228893  13.347617  10.694331   9.665447
 10.515251  12.484907  14.993466  10.693904  11.961169  1