# Recommender System

### 1.The main objective of this project is to create a recommender system that will give an idea about which items to recommend. 
### 2.The goal is to find popular items - globally, country-wise and month-wise and to recommmend items based on estimated ratings and user ratings.

### Import the necessary libraries.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import pandas as pd
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
from surprise import SVD
from surprise import Dataset
from surprise import Reader

from mlxtend.frequent_patterns import apriori, association_rules

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

### Load the dataset 

In [None]:
df = pd.read_excel('/kaggle/input/onlineretail/OnlineRetail.xlsx')


In [None]:
df = df.loc[df['Quantity'] > 0]

### Displaying the dataset

In [None]:
df.head(10)

### List of Countries available in the dataset

In [None]:
df.value_counts(['Country'])

In [None]:
df.value_counts(['InvoiceDate'])

### Drop the Null values

In [None]:
df.dropna(inplace=True)

### Most popular items globally

In [None]:
df1 = df['Description']
duplicates = df1.duplicated()
print("Number of duplicates:", duplicates.sum())


In [None]:
duplicated_rows = df1[duplicates]
most_common_duplicates = duplicated_rows.value_counts().head(10)
print(f"Most popular items Globally:\n{most_common_duplicates}")

### The above result generates set of item descriptions that are most popular globally

### The next is to find the most popular item among countries

In [None]:
most_popular_items = df.groupby(['Country'])['Description'].sum().sort_values(ascending=False).reset_index()


### Print the result

In [None]:
print(most_popular_items.head(25))

### The above result generates the most popular items for each country specifically

##  **The below step is to find the most popular items - monthly wise**

In [None]:
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])


df['date_new'] = df.InvoiceDate.dt.strftime('%Y-%m')
top_items_monthly = []
for month in df.date_new.unique():
    trans_month = df.loc[df.date_new == month]
    trans_month = (trans_month.groupby(['InvoiceNo', 'Description'])['Quantity']
                  .sum().unstack().reset_index().fillna(0)
                  .set_index('InvoiceNo'))

In [None]:

trans_month[trans_month >= 1] = True
trans_month[trans_month.isna()] = False 

In [None]:
import warnings
warnings.filterwarnings('ignore')
frequent_itemsets = apriori(trans_month, min_support=0.03,use_colnames=True)


### Using the **association rules** method

In [None]:
associationRules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

top_k = associationRules.sort_values(by=['support'],ascending=False).iloc[:10][['antecedents','support']].reset_index(drop=True)

top_items_monthly.append((month, top_k))

### **Using Pivot**

In [None]:
pivot_dfs = []
for i, montly_pairs in enumerate(top_items_monthly):
    month, data = montly_pairs
    inv_map = {k: v for k, v in enumerate(data.antecedents)}
    rows = []
    for index, row in df.loc[(df.date_new == month)].iterrows():
        keys = [inv_map[k] for tup in str(row['Description']).split(',') for k,v in inv_map.items() if str(row['Description']) in list(v)]
        for key in keys:
            rows.append([month, key])
    pivot_df = pd.DataFrame(rows, columns=['month','Item'])
    pivot_df.head()
    pivot_dfs.append(pivot_df.pivot_table(values=["Item"],index=["month"],aggfunc="count",fill_value=0))


### The below code displays the most popular items- monthly wise

In [None]:

df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])
df.set_index('InvoiceDate', inplace=True)

monthly_df = df.groupby(pd.Grouper(freq='M'))['Description'].value_counts()

repeated_products = monthly_df[monthly_df > 1]

for month, counts in repeated_products.groupby(level=0):
    print(f"Month: {month.strftime('%B %Y')}")
    print(counts)
    

### **The following steps will be implementing the recommendation predictor using user ratings and estimated ratings.**

#### Create a new dataframe with attribute values

In [None]:
df1 = df[['CustomerID', 'Description','StockCode', 'Quantity']]

#### Create a rating matrix with the help of pivot table.

In [None]:
ratings_matrix = df1.pivot_table(index=['CustomerID'], columns=['StockCode'], values='Quantity', fill_value=0)


#### The algorithm we will be using is **SVD()-> Single Value Decomposition**

In [None]:
algo = SVD()


In [None]:

reader = Reader(rating_scale=(1, 5))
surprise_data = Dataset.load_from_df(df1[['CustomerID', 'StockCode', 'Quantity']], reader)

In [None]:

trainset = surprise_data.build_full_trainset()
testset = trainset.build_anti_testset()


#### Fit the algorithm

In [None]:
algo.fit(trainset)

#### Make predictions using the test data that we found using the algorithm.

In [None]:

predictions = algo.test(testset)

In [None]:

top_n = {}
for uid, iid, true_r, est, _ in predictions:
    if uid not in top_n.keys():
        top_n[uid] = [(iid, est)]
    else:
        top_n[uid].append((iid, est))

#### Drop the null values in the Columns = StockCode and Description as both holds the items that contains missing values and outliers.

In [None]:


df1.dropna(subset=["StockCode", "Description"], inplace=True)

descriptions = df1.groupby("StockCode").first()["Description"]


desc_dict = descriptions.to_dict()


### **The below code displays the predictions based on the recommended items.**

In [None]:
global_top_n = {}

for uid, user_ratings in top_n.items():
    user_ratings.sort(key=lambda x: x[1], reverse=True)
    global_top_n[uid] = []
    for iid, est_rating in user_ratings[:5]:
        if iid in desc_dict:
            global_top_n[uid].append((iid, desc_dict[iid]))

all_items = [iid for uid in global_top_n for iid, desc in global_top_n[uid]]
item_counts = {iid: all_items.count(iid) for iid in set(all_items)}

print("Most Recommended Items (in number of recommendations):")
for item, count in sorted(item_counts.items(), key=lambda x: x[1], reverse=True):
    if item in desc_dict:
        desc = desc_dict[item]
        print("\t", "Item ID:", item, "(\"" + str(desc) + "\")", f"recommended {count} times") 
