# Data Preparation for Recommender Systems 

1. Loading and Exploring the Dataset 
2. Data Preprocessing 
3. Creating User-Item Interaction Matrix 
4. Converting the Data into TensorFlow-friendly Format 


In [5]:
# !pip install tensorflow-datasets

import tensorflow as tf
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import scipy.sparse as sp

## Loading and Exploring the Dataset 


In [None]:
# Load the dataset
dataset = tfds.load('movielens/100k-ratings', split='train')

## Data Preprocessing

In [7]:
# Convert the TensorFlow dataset to a pandas DataFrame
dataset = tfds.as_dataframe(dataset)
# Handling missing values
dataset = dataset.fillna(0)
# Encoding categorical variables
label_encoder = LabelEncoder()
dataset['user_id'] = label_encoder.fit_transform(dataset['user_id'])
dataset['item_id'] = label_encoder.fit_transform(dataset['item_id'])
# Splitting the dataset into training and testing sets
train_data, test_data = train_test_split(dataset, test_size=0.2, random_state=42)


ImportError: Missing optional dependency 'Jinja2'. DataFrame.style requires jinja2. Use pip or conda to install Jinja2.

## Creating User-Item Interaction Matrix

In [None]:
# Creating the user-item interaction matrix
num_users = dataset['user_id'].nunique()
num_items = dataset['item_id'].nunique()
user_item_matrix = sp.dok_matrix((num_users, num_items), dtype=np.float32)
for row in train_data.itertuples():
    user_item_matrix[row.user_id, row.item_id] = row.rating
# Handling sparse matrices
user_item_matrix = user_item_matrix.tocsr()


## Converting the Data into TensorFlow-friendly Format

In [None]:
# Representing the user-item matrix as a TensorFlow SparseTensor
sparse_tensor = tf.sparse.SparseTensor(indices=np.vstack([user_item_matrix.row, user_item_matrix.col]).T,
                                      values=user_item_matrix.data,
                                      dense_shape=user_item_matrix.shape)
