<a href="https://colab.research.google.com/github/vemulaakshay12/Purchase-Recomendation-System/blob/main/fullcode.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout, Bidirectional
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score

# Simulate data
data = [
    {'user': 'user1', 'purchases': ['amazon', 'flipkart', 'myntra']},
    {'user': 'user2', 'purchases': ['amazon', 'flipkart']},
    # Add more user data as needed
]

# Extract purchase sequences
purchase_sequences = [entry['purchases'] for entry in data]
print("Purchase Sequences:", purchase_sequences)

# Tokenize the purchase sequences
tokenizer = Tokenizer(oov_token='<unk>')
tokenizer.fit_on_texts(purchase_sequences)
sequences = tokenizer.texts_to_sequences(purchase_sequences)
print("Tokenized Sequences:", sequences)

# Vocabulary size
vocab_size = len(tokenizer.word_index) + 1
print("Vocabulary Size:", vocab_size)

# Pad sequences
max_sequence_length = max(len(seq) for seq in sequences)
sequences_padded = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')
print("Padded Sequences:", sequences_padded)

# Prepare input-output pairs
X = sequences_padded[:, :-1]
y = sequences_padded[:, 1:]

# One-hot encode the output
y = tf.keras.utils.to_categorical(y, num_classes=vocab_size)
print("Shape of X:", X.shape)
print("Shape of y:", y.shape)

# Define the model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=100, input_length=max_sequence_length-1))
model.add(Bidirectional(LSTM(100, return_sequences=True)))
model.add(Dropout(0.2))
model.add(Bidirectional(LSTM(100, return_sequences=True)))
model.add(Dropout(0.2))
model.add(Dense(vocab_size, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = ModelCheckpoint('best_model.h5', save_best_only=True, monitor='val_loss', mode='min')

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test),
                    callbacks=[early_stopping, model_checkpoint])

# Load the best model
model.load_weights('best_model.h5')

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Loss: {loss}')
print(f'Test Accuracy: {accuracy}')

# Function to generate recommendations for a given sequence
def generate_recommendations(model, tokenizer, sequence, num_recommendations=5):
    token_list = tokenizer.texts_to_sequences([sequence])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_length-1, padding='post')
    print("Token List for Prediction:", token_list)
    predicted_probs = model.predict(token_list, verbose=0)
    predicted_indices = predicted_probs.argmax(axis=-1)
    print("Predicted Indices:", predicted_indices)
    predicted_items = [tokenizer.index_word.get(idx, '<unknown>') for idx in predicted_indices[0]]
    return predicted_items[:num_recommendations]

# Example sequence from the test set
example_sequence = purchase_sequences[0]  # replace with any test sequence
recommendations = generate_recommendations(model, tokenizer, example_sequence)
print("Input Sequence: ", example_sequence)
print("Recommendations: ", recommendations)

# Function to calculate metrics
def evaluate_recommendations(model, tokenizer, X_test, y_test, num_recommendations=5):
    y_true = []
    y_pred = []

    for i in range(len(X_test)):
        input_seq = X_test[i]
        true_output_seq = y_test[i].argmax(axis=-1)  # get the indices of true items
        true_items = [tokenizer.index_word.get(idx, '<unknown>') for idx in true_output_seq if idx != 0]  # exclude padding

        input_items = [tokenizer.index_word.get(idx, '<unknown>') for idx in input_seq if idx != 0]  # exclude padding
        recommendations = generate_recommendations(model, tokenizer, input_items, num_recommendations)

        y_true.extend(true_items)
        y_pred.extend(recommendations)

    # Ensure y_true and y_pred have the same length
    min_length = min(len(y_true), len(y_pred))
    y_true = y_true[:min_length]
    y_pred = y_pred[:min_length]

    # Calculate precision, recall, and F1-score with zero_division set to handle undefined metrics
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)

    return precision, recall, f1

precision, recall, f1 = evaluate_recommendations(model, tokenizer, X_test, y_test)
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1-Score: {f1}')

# Generate recommendations for a few sequences and manually inspect them
for i in range(len(purchase_sequences)):  # Adjust the range as needed
    example_sequence = purchase_sequences[i]
    recommendations = generate_recommendations(model, tokenizer, example_sequence)
    print(f'Input Sequence: {example_sequence}')
    print(f'Recommendations: {recommendations}\n')


Purchase Sequences: [['amazon', 'flipkart', 'myntra'], ['amazon', 'flipkart']]
Tokenized Sequences: [[2, 3, 4], [2, 3]]
Vocabulary Size: 5
Padded Sequences: [[2 3 4]
 [2 3 0]]
Shape of X: (2, 2)
Shape of y: (2, 2, 5)
Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (None, 2, 100)            500       
                                                                 
 bidirectional_6 (Bidirecti  (None, 2, 200)            160800    
 onal)                                                           
                                                                 
 dropout_6 (Dropout)         (None, 2, 200)            0         
                                                                 
 bidirectional_7 (Bidirecti  (None, 2, 200)            240800    
 onal)                                                           
                                   

  saving_api.save_model(


Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Test Loss: 1.542565107345581
Test Accuracy: 0.5
Token List for Prediction: [[3 4]]
Predicted Indices: [[3 4]]
Input Sequence:  ['amazon', 'flipkart', 'myntra']
Recommendations:  ['flipkart', 'myntra']
Token List for Prediction: [[2 3]]
Predicted Indices: [[3 4]]
Precision: 1.0
Recall: 1.0
F1-Score: 1.0
Token List for Prediction: [[3 4]]
Predicted Indices: [[3 4]]
Input Sequence: ['amazon', 'flipkart', 'myntra']
Recommendations: ['flipkart', 'myntra']

Token List for Prediction: [[2 3]]
Predicted Indices: [[3 4]]
Input Sequence: ['amazon', 'flipkart']
Recommendations: ['flipkart', 'myntra']

