## Import libraries and data

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
df = pd.read_csv('Market_Basket_Optimisation.csv', header = None)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,


### Replace NaN with zeros

In [3]:
df.fillna(0, inplace = True)
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,chutney,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,turkey,avocado,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,mineral water,milk,energy bar,whole wheat rice,green tea,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


### Further data processing

i is row number.
j is column number. each row can accommodate up to 20 columns.

In [4]:
#simple looping, transaction in array
transactions = []
for i in range(0, len(df)):
    transactions.append([str(df.values[i,j]) for j in range(0,20) if str(df.values[i, j])!='0'])
    

In [5]:
transactions[0]

['shrimp',
 'almonds',
 'avocado',
 'vegetables mix',
 'green grapes',
 'whole weat flour',
 'yams',
 'cottage cheese',
 'energy drink',
 'tomato juice',
 'low fat yogurt',
 'green tea',
 'honey',
 'salad',
 'mineral water',
 'salmon',
 'antioxydant juice',
 'frozen smoothie',
 'spinach',
 'olive oil']

In [6]:
len(transactions)

7501

---

## Training the Apriori on the Dataset


- `min_support` is 3 out of 1000. Meaning there are some people who bought it.
- `min_confidence` is at least 20%. Meaning at least 20% of who bought item A, will buy item B.
- `min_lift` is 3. 
- `max_length` is Maximum number of items that are bought together.

In [7]:
from apyori import apriori

rules = apriori(transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, max_length = 3)
results = list(rules)

### Saving your results

In [8]:
df1 = pd.DataFrame(transactions)
df1.to_csv("transactions.csv")

df2 = pd.DataFrame(results)
df2.to_csv("results.csv")

results = pd.read_csv('results.csv', header=None)
results.head(10)

Unnamed: 0,0,1,2,3
0,,items,support,ordered_statistics
1,0.0,"frozenset({'chicken', 'light cream'})",0.004532728969470737,[OrderedStatistic(items_base=frozenset({'light...
2,1.0,"frozenset({'mushroom cream sauce', 'escalope'})",0.005732568990801226,[OrderedStatistic(items_base=frozenset({'mushr...
3,2.0,"frozenset({'escalope', 'pasta'})",0.005865884548726837,[OrderedStatistic(items_base=frozenset({'pasta...
4,3.0,"frozenset({'honey', 'fromage blanc'})",0.003332888948140248,[OrderedStatistic(items_base=frozenset({'froma...
5,4.0,"frozenset({'ground beef', 'herb & pepper'})",0.015997866951073192,[OrderedStatistic(items_base=frozenset({'herb ...
6,5.0,"frozenset({'ground beef', 'tomato sauce'})",0.005332622317024397,[OrderedStatistic(items_base=frozenset({'tomat...
7,6.0,"frozenset({'olive oil', 'light cream'})",0.003199573390214638,[OrderedStatistic(items_base=frozenset({'light...
8,7.0,"frozenset({'whole wheat pasta', 'olive oil'})",0.007998933475536596,[OrderedStatistic(items_base=frozenset({'whole...
9,8.0,"frozenset({'shrimp', 'pasta'})",0.005065991201173177,[OrderedStatistic(items_base=frozenset({'pasta...


In [9]:
len(results)

57

In [10]:
resultz = results.iloc[1:, :]
resultz.sort_values(by=[2], ascending=False)

Unnamed: 0,0,1,2,3
5,4.0,"frozenset({'ground beef', 'herb & pepper'})",0.0159978669510731,[OrderedStatistic(items_base=frozenset({'herb ...
27,26.0,"frozenset({'ground beef', 'spaghetti', 'frozen...",0.0086655112651646,[OrderedStatistic(items_base=frozenset({'spagh...
8,7.0,"frozenset({'whole wheat pasta', 'olive oil'})",0.0079989334755365,[OrderedStatistic(items_base=frozenset({'whole...
31,30.0,"frozenset({'mineral water', 'shrimp', 'frozen ...",0.0071990401279829,[OrderedStatistic(items_base=frozenset({'miner...
49,48.0,"frozenset({'spaghetti', 'milk', 'olive oil'})",0.0071990401279829,[OrderedStatistic(items_base=frozenset({'spagh...
39,38.0,"frozenset({'ground beef', 'herb & pepper', 'mi...",0.0066657778962804,[OrderedStatistic(items_base=frozenset({'herb ...
35,34.0,"frozenset({'spaghetti', 'tomatoes', 'frozen ve...",0.0066657778962804,[OrderedStatistic(items_base=frozenset({'spagh...
40,39.0,"frozenset({'ground beef', 'herb & pepper', 'sp...",0.0063991467804292,[OrderedStatistic(items_base=frozenset({'herb ...
33,32.0,"frozenset({'spaghetti', 'shrimp', 'frozen vege...",0.0059992001066524,[OrderedStatistic(items_base=frozenset({'spagh...
44,43.0,"frozenset({'ground beef', 'spaghetti', 'shrimp'})",0.0059992001066524,[OrderedStatistic(items_base=frozenset({'groun...
