<a href="https://colab.research.google.com/github/v670/MLProjects/blob/main/Market_Basket_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Implementing market basket analysis

In [None]:
%pip install mlxtend --upgrade



In [None]:
#Loading neccesary packages
import numpy as np
import pandas as pd



In [None]:
#Reading Data From Web
df = pd.read_excel('http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


# Data Preparation

In [None]:
# Modify StockCode to always be a string by prepending '_'
df['StockCode'] = df['StockCode'].apply(lambda x: '_'+str(x))

In [None]:
# DataFrame for building the recommendation system
orders = df[['InvoiceNo', 'StockCode']]
orders.head()

Unnamed: 0,InvoiceNo,StockCode
0,536365,_85123A
1,536365,_71053
2,536365,_84406B
3,536365,_84029G
4,536365,_84029E


In [None]:
# DataFrame for retrieving product descriptions
products = df[['StockCode', 'Description']].copy()
products.head()

Unnamed: 0,StockCode,Description
0,_85123A,WHITE HANGING HEART T-LIGHT HOLDER
1,_71053,WHITE METAL LANTERN
2,_84406B,CREAM CUPID HEARTS COAT HANGER
3,_84029G,KNITTED UNION FLAG HOT WATER BOTTLE
4,_84029E,RED WOOLLY HOTTIE WHITE HEART.


In [None]:
products = products[~products.duplicated()]
products.head()

Unnamed: 0,StockCode,Description
0,_85123A,WHITE HANGING HEART T-LIGHT HOLDER
1,_71053,WHITE METAL LANTERN
2,_84406B,CREAM CUPID HEARTS COAT HANGER
3,_84029G,KNITTED UNION FLAG HOT WATER BOTTLE
4,_84029E,RED WOOLLY HOTTIE WHITE HEART.


In [None]:
products.shape

(5752, 2)

In [None]:
# Drop descriptions that are not uppercase
products = products[
    products['Description'].str.upper() == products['Description']
]

In [None]:
products.shape

(4211, 2)

In [None]:
# Keep only the first Description of each product
products = products[~products.duplicated(subset=['StockCode'])]
products

Unnamed: 0,StockCode,Description
0,_85123A,WHITE HANGING HEART T-LIGHT HOLDER
1,_71053,WHITE METAL LANTERN
2,_84406B,CREAM CUPID HEARTS COAT HANGER
3,_84029G,KNITTED UNION FLAG HOT WATER BOTTLE
4,_84029E,RED WOOLLY HOTTIE WHITE HEART.
...,...,...
509369,_85179a,GREEN BITTY LIGHT CHAIN
512588,_23617,SET 10 CARDS SWIRLY XMAS TREE 17104
527065,_90214U,"LETTER ""U"" BLING KEY RING"
537224,_47591b,SCOTTIES CHILDRENS APRON


In [None]:
# Set the index to StockCode
products = products.set_index('StockCode')

# Convert to Series for eve easier lookups
products = products['Description']

In [None]:

# Test it out
products['_21755']

'LOVE BUILDING BLOCK WORD'

In [None]:
def string_list(x):
    return [str(i) for i in x]

orders = orders.groupby('InvoiceNo')['StockCode'].apply(string_list).reset_index()
orders.head()

Unnamed: 0,InvoiceNo,StockCode
0,536365,"[_85123A, _71053, _84406B, _84029G, _84029E, _..."
1,536366,"[_22633, _22632]"
2,536367,"[_84879, _22745, _22748, _22749, _22310, _8496..."
3,536368,"[_22960, _22913, _22912, _22914]"
4,536369,[_21756]


In [None]:
from mlxtend.preprocessing import TransactionEncoder


In [None]:
te = TransactionEncoder()

te.fit(orders['StockCode'])
orders_1hot = te.transform(orders['StockCode'])

In [None]:
# convert orders_1hot to a DataFrame
orders_1hot = pd.DataFrame(orders_1hot, columns =te.columns_)
# Inspect the first 5 rows of the DataFrame
orders_1hot.head()

Unnamed: 0,_10002,_10080,_10120,_10123C,_10123G,_10124A,_10124G,_10125,_10133,_10134,_10135,_11001,_15030,_15034,_15036,_15039,_15044A,_15044B,_15044C,_15044D,_15056BL,_15056N,_15056P,_15056bl,_15056n,_15056p,_15058A,_15058B,_15058C,_15060B,_15060b,_16008,_16010,_16011,_16012,_16014,_16015,_16016,_16020C,_16033,...,_90214S,_90214T,_90214U,_90214V,_90214W,_90214Y,_90214Z,_AMAZONFEE,_B,_BANK CHARGES,_C2,_CRUK,_D,_DCGS0003,_DCGS0004,_DCGS0055,_DCGS0057,_DCGS0066P,_DCGS0067,_DCGS0068,_DCGS0069,_DCGS0070,_DCGS0071,_DCGS0072,_DCGS0073,_DCGS0074,_DCGS0076,_DCGSSBOY,_DCGSSGIRL,_DOT,_M,_PADS,_POST,_S,_gift_0001_10,_gift_0001_20,_gift_0001_30,_gift_0001_40,_gift_0001_50,_m
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [None]:
from mlxtend.frequent_patterns import fpgrowth

In [None]:

%%timeit -n1 -r1
fpgrowth(orders_1hot, min_support=0.01, max_len=2, use_colnames=True)

1 loop, best of 1: 5.99 s per loop


In [None]:
is_fp = fpgrowth(orders_1hot, min_support=0.01, max_len=2, use_colnames=True)


In [None]:
is_fp.head()

Unnamed: 0,support,itemsets
0,0.086718,(_85123A)
1,0.017915,(_84029G)
2,0.016911,(_84029E)
3,0.014865,(_22752)
4,0.013205,(_71053)


In [None]:
from mlxtend.frequent_patterns import association_rules

In [None]:
is_fp

Unnamed: 0,support,itemsets
0,0.086718,(_85123A)
1,0.017915,(_84029G)
2,0.016911,(_84029E)
3,0.014865,(_22752)
4,0.013205,(_71053)
...,...,...
997,0.010077,"(_23203, _23344)"
998,0.010039,"(_22086, _23344)"
999,0.011853,"(_23295, _23293)"
1000,0.010077,"(_23296, _23293)"


In [None]:
rules = association_rules(is_fp, metric="lift", min_threshold=10)
rules.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(_21754),(_21755),0.030386,0.024363,0.011467,0.377382,15.490025,0.010727,1.566993
1,(_21755),(_21754),0.024363,0.030386,0.011467,0.470681,15.490025,0.010727,1.831815
2,(_22748),(_22745),0.016988,0.016448,0.012124,0.713636,43.387751,0.011844,3.434626
3,(_22745),(_22748),0.016448,0.016988,0.012124,0.737089,43.387751,0.011844,3.738955
4,(_22726),(_22727),0.038726,0.041737,0.024942,0.644068,15.431412,0.023326,2.692261


In [None]:

rules.shape

(280, 9)

In [None]:
def predict(antecedent, rules, max_results= 6):
    
    # get the rules for this antecedent
    preds = rules[rules['antecedents'] == antecedent]
    
    # a messy way to convert a frozen set with one element to string
    preds = preds['consequents'].apply(iter).apply(next)
    
    return preds[:max_results].reset_index(drop=True)

In [None]:
preds = predict({'_20712'}, rules)
preds

0    _22379
1    _20713
2    _21930
3    _21931
4    _22386
5    _21928
Name: consequents, dtype: object

In [None]:
print(products['_20712'])

JUMBO BAG WOODLAND ANIMALS


In [None]:
for stockid in preds:  
    print(products[stockid])

RECYCLING BAG RETROSPOT 
JUMBO BAG OWLS
JUMBO STORAGE BAG SKULLS
JUMBO STORAGE BAG SUKI
JUMBO BAG PINK POLKADOT
JUMBO BAG SCANDINAVIAN PAISLEY


In [None]:
print(products['_22112'])

CHOCOLATE HOT WATER BOTTLE


In [None]:
# get the predictions
preds = predict({'_22112'}, rules)

# Display the descriptions of the predictions
for stockid in preds:  
    print(products[stockid])

HOT WATER BOTTLE TEA AND SYMPATHY
SCOTTIE DOG HOT WATER BOTTLE
HOT WATER BOTTLE I AM SO POORLY
HOT WATER BOTTLE KEEP CALM
