In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# %matplotlib inline
plt.style.use("ggplot")

import sklearn
from sklearn.decomposition import TruncatedSVD

In [None]:
import pandas as pd

amazon_ratings = pd.read_csv('../data/train.csv')
amazon_ratings = amazon_ratings.dropna()

In [None]:
amazon_ratings.info()

amazon_ratings.describe()

In [None]:
amazon_ratings.shape

In [None]:
popular_products = pd.DataFrame(amazon_ratings.groupby('item_id')['rating'].count())
most_popular = popular_products.sort_values('rating', ascending=False)
most_popular.head(10)

In [None]:
most_popular.head(30).plot(kind = "bar")

# Part 2

In [8]:
amazon_ratings1 = amazon_ratings.head(10000)

In [None]:
ratings_utility_matrix = amazon_ratings1.pivot_table(values='rating', index='user_id', columns='item_id', fill_value=0)
ratings_utility_matrix.head()

In [None]:
ratings_utility_matrix.shape

In [None]:
X = ratings_utility_matrix.T
X.head()

In [None]:
X.shape

In [13]:
X1 = X

In [None]:
SVD = TruncatedSVD(n_components=10)
decomposed_matrix = SVD.fit_transform(X)
decomposed_matrix.shape

In [None]:
correlation_matrix = np.corrcoef(decomposed_matrix)
correlation_matrix.shape

In [None]:
X.index[99]

In [None]:
i=3703

product_names = list(X.index)
product_ID = product_names.index(i)
product_ID

In [None]:
correlation_product_ID = correlation_matrix[product_ID]
correlation_product_ID.shape

In [None]:
Recommend = list(X.index[correlation_product_ID > 0.90])

# Removes the item already bought by the customer
Recommend.remove(i) 

Recommend[0:9]

In [76]:
# Part 3: User-Based Recommendation Function
# This function combines Part 1 (popularity-based) and Part 2 (collaborative filtering)
# to provide personalized recommendations for a given user ID

def get_user_recommendations(user_id, num_recommendations=10):
    """
    Recommend top products for a given user ID based on their purchase history
    and collaborative filtering techniques.
    
    Parameters:
    user_id (str): The user ID for whom to generate recommendations
    num_recommendations (int): Number of recommendations to return (default: 10)
    
    Returns:
    list: Top recommended product IDs
    """
    
    # Check if user exists in the dataset
    if user_id not in ratings_utility_matrix.index:
        print(f"User {user_id} not found. Returning popular products for new users...")
        # Fall back to popularity-based recommendations (Part 1)
        return most_popular.head(num_recommendations).index.tolist()
    
    # Get user's purchase history
    user_ratings = ratings_utility_matrix.loc[user_id]
    purchased_products = user_ratings[user_ratings > 0].index.tolist()
    
    if len(purchased_products) == 0:
        print(f"User {user_id} has no purchase history. Returning popular products...")
        return most_popular.head(num_recommendations).index.tolist()
    
    print(f"User {user_id} has purchased {len(purchased_products)} products.")
    print(f"Sample purchased products: {purchased_products[:3]}")
    
    # Get recommendations based on collaborative filtering (Part 2)
    recommendation_scores = {}
    
    for product in purchased_products:
        if product in X.index:
            # Find the index of the product in correlation matrix
            product_idx = list(X.index).index(product)
            
            # Get correlations for this product
            product_correlations = correlation_matrix[product_idx]
            
            # Add scores for correlated products
            for i, correlation in enumerate(product_correlations):
                recommended_product = X.index[i]
                
                # Skip products already purchased
                if recommended_product not in purchased_products:
                    if recommended_product not in recommendation_scores:
                        recommendation_scores[recommended_product] = 0
                    recommendation_scores[recommended_product] += correlation
    
    # Sort recommendations by score
    sorted_recommendations = sorted(recommendation_scores.items(), 
                                  key=lambda x: x[1], 
                                  reverse=True)
    
    # Extract top recommendations
    top_recommendations = [product for product, score in sorted_recommendations[:num_recommendations]]
    
    # If we don't have enough recommendations, fill with popular products
    if len(top_recommendations) < num_recommendations:
        popular_products_list = most_popular.index.tolist()
        for product in popular_products_list:
            if product not in top_recommendations and product not in purchased_products:
                top_recommendations.append(product)
                if len(top_recommendations) >= num_recommendations:
                    break
    
    return top_recommendations[:num_recommendations]


In [None]:
get_user_recommendations(3)

In [None]:
test_users = pd.read_csv('../data/test.csv')
# fill predictions with results of get_user_recommendations(3)

predictions = []

for user_id in test_users['user_id'].tolist():
    prediction = get_user_recommendations(user_id)
    final_prediction = []
    for item_id in prediction:
        final_prediction.append(int(item_id))
    predictions.append(final_prediction)

submission = pd.DataFrame({
    'user_id': test_users['user_id'].tolist(), 
    'predictions': [' '.join(map(str, pred)) for pred in predictions]
})
submission.to_csv("submission.csv", index=False)
print(f"Submission saved: {submission.shape[0]:,} predictions")
