# Building a Collaborative Filtering Model

1. Dataset
2. Data Preprocessing
3. Building the Collaborative Filtering Model 
4. Evaluating the Model

In [1]:


import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

## Dataset

In [2]:


# Define the number of users and items
num_users = 100
num_items = 50
# Generate random user-item interactions
user_ids = np.random.randint(0, num_users, size=1000)
item_ids = np.random.randint(0, num_items, size=1000)
# Simulate ratings or preferences (e.g., on a scale of 1 to 5)
ratings = np.random.randint(1, 6, size=1000)
# Create a pandas DataFrame for the dataset
dataset = pd.DataFrame({'user_id': user_ids, 'item_id': item_ids, 'rating': ratings})

## Data Preprocessing

In [3]:


# Encoding categorical variables
label_encoder = LabelEncoder()
dataset['user_id'] = label_encoder.fit_transform(dataset['user_id'])
dataset['item_id'] = label_encoder.fit_transform(dataset['item_id'])
# Splitting the dataset into training and testing sets
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)



## Building the Collaborative Filtering Model

In [4]:
# Define the input layers
user_input = tf.keras.Input(shape=(1,), dtype=tf.int32, name='user_input')
item_input = tf.keras.Input(shape=(1,), dtype=tf.int32, name='item_input')

In [5]:
# Embedding layers for users and items
embedding_dim = 8
user_embedding = tf.keras.layers.Embedding(num_users, embedding_dim)(user_input)
item_embedding = tf.keras.layers.Embedding(num_items, embedding_dim)(item_input)

In [6]:
# Calculate similarities using dot product
user_item_sim = tf.keras.layers.Dot(axes=2, normalize=True)([user_embedding, item_embedding])

In [7]:
# Flatten the output
output = tf.keras.layers.Flatten()(user_item_sim)

## Evaluating the Model

In [8]:
# Create the model
model = tf.keras.Model(inputs=[user_input, item_input], outputs=output)
model.compile(optimizer='adam', loss='mse')

In [9]:


# Train the model
model.fit([train_data['user_id'], train_data['item_id']], train_data['rating'], epochs=10, batch_size=32)
# Save the model
model.save('my_model.h5')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:


# Make predictions

predictions = model.predict([test_data['user_id'], test_data['item_id']])





In [12]:
# Evaluate the model

mse = tf.keras.losses.mean_squared_error(test_data['rating'], predictions)

mse_value = mse.numpy()

print("Mean Squared Error:", mse_value)

Mean Squared Error: [ 6.5843554  6.1087914  6.390244   6.996768   6.6836724  7.133465
  6.875077   6.507872   8.660327   7.510725   5.9567037  5.7798157
  8.576308   6.9878373  6.8295717  8.422362   6.0703998  6.8721886
  6.933267   6.809137   6.531346   6.928822   5.809286   5.896659
  5.8441143  5.832834   7.077341   6.7604604  5.9947057  7.0401626
  5.953419   7.9779177  6.30525    6.9487433  5.9557357  5.846556
  6.848395   7.333658   8.850171   6.6259     7.374183   7.016434
  7.123144   6.541188   5.8176394  6.3394213  6.2005615  5.9324365
  9.291928   7.1891327  7.427412   7.3894653  6.240869   6.055829
  6.5424104  5.6998305  6.1952863  6.52077    5.715554   8.039052
  6.4908834  7.73167    6.954766   8.047593   6.120841   9.669979
  8.301126   7.0896873  7.0932264  6.685193   6.20922    6.33753
  6.9604387  6.9492016  6.789306   7.2316456  7.4844446  6.6448035
  7.9175453  6.3948364  6.345433   8.489819   6.766022   6.601632
  7.7876415  5.931896   9.027836   5.822621   6.7338