### Association Model
##### A Standard Apriori algorithm

-- market-basket.basket <- Sample text data 

![BasketAnalysis](http://www.improvast.com/uploads/1/0/0/1/10017386/728383.png?1445420086 "Basket Analysis")

**Market Basket analysis aims to give answers to the following:**
- What are the Purchase Patterns?(Items purchased together/sequentially/seasonally)
- Which products might benefit from advertising?
- Why do customers buy certain products?
- What time of the day do they buy it?
- Who are the customers? (Students, families etc.)

### Default Customer Purchase History        				 				
- Bread, Milk 
- Bread, Diapers, Beer, Eggs 	
- Milk, Diapers, Beer, Cola 	
- Bread, Milk, Diapers, Beer 	
- Bread, Milk, Diapers, Cola

In [23]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

dataset = [["Bread", "Milk", "Beer"],
            ["Bread", "Diapers", "Eggs"],
            ["Milk", "Diapers", "Beer", "Cola"],
            ["Bread", "Milk", "Diapers", "Beer"],
            ["Bread", "Milk", "Cola"]]
dataset

[['Bread', 'Milk', 'Beer'],
 ['Bread', 'Diapers', 'Eggs'],
 ['Milk', 'Diapers', 'Beer', 'Cola'],
 ['Bread', 'Milk', 'Diapers', 'Beer'],
 ['Bread', 'Milk', 'Cola']]

In [24]:
te = TransactionEncoder()
te = te.fit(dataset)  # Reads all unique items from the dataset
te_ary = te.transform(dataset)          # Converts array into hot coded matrix
print(te.columns_)
print(te_ary)


['Beer', 'Bread', 'Cola', 'Diapers', 'Eggs', 'Milk']
[[ True  True False False False  True]
 [False  True False  True  True False]
 [ True False  True  True False  True]
 [ True  True False  True False  True]
 [False  True  True False False  True]]


In [11]:
df = pd.DataFrame(te_ary, columns=te.columns_)     # Panda dataframe
df

Unnamed: 0,Beer,Bread,Cola,Diapers,Eggs,Milk
0,True,True,False,False,False,True
1,False,True,False,True,True,False
2,True,False,True,True,False,True
3,True,True,False,True,False,True
4,False,True,True,False,False,True


In [13]:
# Finds the freuency of the item
frequent_itemsets = apriori(df, min_support=0.6,use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.6,(Beer)
1,0.8,(Bread)
2,0.6,(Diapers)
3,0.8,(Milk)
4,0.6,"(Milk, Beer)"
5,0.6,"(Milk, Bread)"


In [26]:
from mlxtend.frequent_patterns import association_rules
association_rules(frequent_itemsets, metric="support", min_threshold=0.6)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Milk),(Beer),0.8,0.6,0.6,0.75,1.25,0.12,1.6
1,(Beer),(Milk),0.6,0.8,0.6,1.0,1.25,0.12,inf
2,(Milk),(Bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
3,(Bread),(Milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8


In [16]:
from mlxtend.frequent_patterns import association_rules
rules=association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Milk),(Beer),0.8,0.6,0.6,0.75,1.25,0.12,1.6
1,(Beer),(Milk),0.6,0.8,0.6,1.0,1.25,0.12,inf
2,(Milk),(Bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
3,(Bread),(Milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8


In [18]:
rules[rules['support'] == rules['support'].max()]

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Milk),(Beer),0.8,0.6,0.6,0.75,1.25,0.12,1.6
1,(Beer),(Milk),0.6,0.8,0.6,1.0,1.25,0.12,inf
2,(Milk),(Bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8
3,(Bread),(Milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8


In [22]:
CartItem = ["Milk"]
# rules[rules["antecedants"].apply(lambda x : set(CartItem).issubset(set(x)))].sort_values(["support"])
rules[rules["antecedents"].apply(lambda x : set(CartItem).issubset(set(x)))].sort_values(["support"])

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(Milk),(Beer),0.8,0.6,0.6,0.75,1.25,0.12,1.6
2,(Milk),(Bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8


**Reference** : https://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/

## Excercise


Observe Support Confidence Lift Parameters for given Dataset

In [None]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

te = TransactionEncoder()
te = te.fit(dataset)
te_ary = te.transform(dataset)
df = pd.DataFrame(te_ary, columns=te.columns_)
print(df)
frequent_itemsets = apriori(df, min_support=0.8, use_colnames=True) 
print(frequent_itemsets)
rules_support=association_rules(frequent_itemsets, metric="support", min_threshold=0.6)
print(rules_support)
rules_confidence=association_rules(frequent_itemsets, metric="confidence", min_threshold=0.8)
print(rules_confidence)