# Eclat
Only speaks about support. apriori is usually better

## Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [2]:
dataset = pd.read_csv("Market_Basket_Optimisation.csv", header=None)

# same as
#  transactions = []
# for i in range(0, 7501):
# transactions.append([str(dataset.values[i, j]) for j in range(0, 20)])

transactions = dataset.apply(lambda row: row.astype(str).tolist(), axis=1).tolist()

## Training the Eclat model on the dataset

In [3]:
from apyori import apriori

# we have 7501 transactions
# we need support of 2 products per rule
# the 7501 were recorded over a week
# say we need to see products that appear together in at least 3 transactions per day
# the above means we would see these 2 products together at least 21 times (3 times X 7 days)
# then support is 21 / 7501 (support is number of transactions containing 2 specific products / total number of transactions)
# for min confidence start from 0.8 see what you get, if too few divide by 2 and go on like that
# min lift should be at least 3
rules = apriori(
    transactions=transactions,
    min_support=0.003,  # products appear at least 3% toghether over the whole dataset
    min_confidence=0.2, # for each product A we will have product be at least 20% of the times
    min_lift=3,
    min_length=2,  # how many elements per rule minimum
    max_length=2,  # how many elements per rule minimum
)

## Visualising the results

### Displaying the first results coming directly from the output of the apriori function

In [4]:
results = list(rules)
results

[RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'mushroom cream sauce', 'escalope'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'pasta', 'escalope'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'fromage blanc', 'honey'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

### Putting the results well organised into a Pandas DataFrame

In [5]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    return list(zip(lhs, rhs, supports))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Product 1', 'Product 2', 'Support'])

### Displaying the results non sorted

In [6]:
resultsinDataFrame

Unnamed: 0,Product 1,Product 2,Support
0,light cream,chicken,0.004533
1,mushroom cream sauce,escalope,0.005733
2,pasta,escalope,0.005866
3,fromage blanc,honey,0.003333
4,herb & pepper,ground beef,0.015998
5,tomato sauce,ground beef,0.005333
6,light cream,olive oil,0.0032
7,whole wheat pasta,olive oil,0.007999
8,pasta,shrimp,0.005066


### Displaying the results sorted by descending lifts

In [7]:
resultsinDataFrame.nlargest(n = 10, columns = 'Support')

Unnamed: 0,Product 1,Product 2,Support
4,herb & pepper,ground beef,0.015998
7,whole wheat pasta,olive oil,0.007999
2,pasta,escalope,0.005866
1,mushroom cream sauce,escalope,0.005733
5,tomato sauce,ground beef,0.005333
8,pasta,shrimp,0.005066
0,light cream,chicken,0.004533
3,fromage blanc,honey,0.003333
6,light cream,olive oil,0.0032
