In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Embedding, Flatten, Dense, Concatenate
from tensorflow.keras.models import Model
import csv

data = pd.read_csv("e-commerce-product.csv")
df = pd.DataFrame(data)

ModuleNotFoundError: No module named 'sklearn'

In [None]:
def drop_columns(df):
  try:
    df.drop(['event_time', 'user_session', 'category_id', 'category_code'], axis=1, inplace=True)
    print("The columns 'event_time' and 'user_session' are dropped")
  except KeyError as e:
    print(f"Error: {e}. One or both columns do not exist in the DataFrame.")
  except Exception as e:
    print(f"An unexpected error occurred: {e}")
  return df

df = drop_columns(df)

The columns 'event_time' and 'user_session' are dropped


In [None]:
def check_nan_values(df):
  for i in df.columns:
    print(i, df[i].isna().sum())

check_nan_values(df)

event_type 0
product_id 0
brand 511091
price 0
user_id 1


In [None]:
dict_event_type = {'view': 0, 'cart': 1, 'purchase': 2, 'remove_from_cart': 3}
df['event_type'] = df['event_type'].map(dict_event_type)
df['event_type'] = np.array(df['event_type'])
if "NaN" in df['event_type']:
  df['event_type'] = df['event_type'].fillna(0)

In [None]:
df_exploded = df.explode('brand')
brand_counts = df_exploded['brand'].value_counts()
most_frequent_brand = brand_counts.idxmax()
print("The most frequent brand is:", most_frequent_brand)
df['brand'] = df['brand'].fillna('Unknown', inplace=True)

The most frequent brand is: runail


In [None]:
max_values = df['user_id'].max()
df.dropna(subset=['user_id'], inplace=True)
df['user_id'] = df['user_id'].astype(int)
df['product_id'] = df['product_id'].astype(int)

In [None]:
# Recommendation System

df_recommendation = df[df['event_type'] == 2]
df_recommendation = df_recommendation[['user_id', 'product_id', 'price']]

df_interactions = df[df['event_type'] == 2][['user_id', 'product_id']].drop_duplicates()

user_ids = df_interactions['user_id'].unique().tolist()
user2user_encoded = {x: i for i, x in enumerate(user_ids)}
userencoded2user = {i: x for i, x in enumerate(user_ids)}

product_ids = df_interactions['product_id'].unique().tolist()
product2product_encoded = {x: i for i, x in enumerate(product_ids)}
product_encoded2product = {i: x for i, x in enumerate(product_ids)}

df_interactions['get_user'] = df_interactions['user_id'].map(user2user_encoded)
df_interactions['get_product'] = df_interactions['product_id'].map(product2product_encoded)

num_users = len(user2user_encoded)
num_products = len(product_encoded2product)

train, test = train_test_split(df_interactions, test_size=0.2, random_state=42)

In [None]:
embedding_size = 50

user_input = Input(shape=(1,), name="User-Input")
user_embedding = Embedding(num_users, embedding_size, name="User-Embedding")(user_input)
user_vec = Flatten(name="Flatten-Users")(user_embedding)

product_input = Input(shape=(1,), name="Product-Input")
product_embedding = Embedding(num_products, embedding_size, name="Product-Embedding")(product_input)
product_vec = Flatten(name="Flatten-Products")(product_embedding)

merged = Concatenate()([user_vec, product_vec])

dense_layer_1 = Dense(128, activation='relu')(merged)
output_layer = Dense(1, activation='sigmoid')(dense_layer_1)

model = Model(inputs=[user_input, product_input], outputs=output_layer)

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

history = model.fit([train.get_user.values, train.get_product.values],
                    train.get_product.values,
                    epochs=5,
                    verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
def get_recommendations(user_id, model, n=10):
  user_encoded = user2user_encoded.get(user_id)
  if user_encoded is None:
    return "User not found!"

  products_not_interacted = np.setdiff1d(np.array(product_ids),
                                         df_interactions[df_interactions.user_id == user_id]['product_id'].values)
  products_not_interacted_encoded = [product2product_encoded.get(x) for x in products_not_interacted
                                    if product2product_encoded.get(x) is not None]

  user_encoder = np.full(len(products_not_interacted_encoded), user_encoded)

  predictions = model.predict([user_encoder, np.array(products_not_interacted_encoded)])

  top_ratings_indices = predictions.flatten().argsort()[-n:][::-1]
  top_product_ids = [product_encoded2product[products_not_interacted_encoded[i]] for i in top_ratings_indices]

  return top_product_ids

In [None]:
user_sample = np.random.choice(list(user2user_encoded.keys()), size=100, replace=False)
# for user_id in user_sample:
    # recommendations = get_recommendations(user_id, model, n=5)
    # print(f"Recommended products for user {user_id}: {recommendations(Product ID)}")
with open('recommendations.csv', 'w', newline='') as csvfile:
  writer = csv.writer(csvfile)
  writer.writerow(['user_id', 'recommendations'])

  for user_id in user_sample:  # Use the sampled user IDs
      recommendations = get_recommendations(user_id, model, n=5)
      # Join recommendations as a string
      recommendations_str = ','.join(str(x) for x in recommendations)
      writer.writerow([user_id, recommendations_str])

print("Recommendations exported successfully to recommendations.csv")



KeyboardInterrupt: 