In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

# Example 1: Simple use of algorithm

## Creating Dataset

In [None]:
basket_lists = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
                ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
                ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
                ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
                ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

trans_encoder = TransactionEncoder()
encoded_baskets = trans_encoder.fit(basket_lists).transform(basket_lists)
dataset_df = pd.DataFrame(encoded_baskets, columns= trans_encoder.columns_)
dataset_df

## Computing support values

In [None]:
apriori(dataset_df, min_support= 0.6, use_colnames= True)

# Example2: Association rule mining

## Loading dataset

In [None]:
basket_df = pd.read_excel('Online Retail.xlsx')
basket_df.head()

### Dataset description

## Some basic exploratory

In [None]:
basket_df.shape

In [None]:
basket_df.dtypes

In [None]:
basket_df.isna().sum()

In [None]:
canceled = basket_df.loc[basket_df['InvoiceNo'].str.contains('C') == True]
canceled.shape

## Cleansing dataset

In [None]:
basket_df['Description'] = basket_df['Description'].str.strip()
# removing canceled records
basket_df.drop(canceled.index, inplace= True)
print(f'{canceled.shape[0]} has removed from dataset, new dataset size is {basket_df.shape[0]}')

## Preparing dataset for analysing

### Selecting one region (France)

In [None]:
france_basket = basket_df.loc[basket_df['Country'] == 'France']
france_basket.shape

### Creating transaction, items set

In [None]:
basket = []
transaction_nom = []
for transaction, items in france_basket.groupby('InvoiceNo').groups.items():
    products = []
    transaction_nom.append(transaction)
    for item in items:
        products.append(str(france_basket.loc[item]['Description']))
    basket.append(products)

### creating binary dataset

In [None]:
trans_encoder = TransactionEncoder()
encoded_baskets = trans_encoder.fit(basket).transform(basket)
binary_df = pd.DataFrame(encoded_baskets, columns= trans_encoder.columns_, index= transaction_nom)
binary_df

### Association extraction(Recommendation based on antecedents)

In [None]:
frequencies_items = apriori(binary_df, min_support= 0.05, use_colnames= True)
rules = association_rules(frequencies_items, metric= "lift", min_threshold= 1)
rules = rules.sort_values(['confidence', 'lift'], ascending= [False, False])
rules.reset_index(inplace= True, drop= True)
rules.head()

### Filtering results

In [None]:
condition0 = rules.antecedents.apply(lambda x: False if 'POSTAGE' in x else True)
condition1 = rules.consequents.apply(lambda x: False if 'POSTAGE' in x else True)
condition2 = rules['antecedents'].apply(lambda x: len(x) >= 2)
rules.loc[condition0 & condition1 & condition2]

# Exercise1

### Line 137, 170 and 171 changed

# Exercise2

In [None]:
switzerland_basket = basket_df.loc[basket_df['Country'] == 'Switzerland']

In [None]:
basket_2 = []
transaction_nom_2 = []
for transaction, items in switzerland_basket.groupby('InvoiceNo').groups.items():
    products_2 = []
    transaction_nom_2.append(transaction)
    for item in items:
        products_2.append(str(switzerland_basket.loc[item]['Description']))
    basket_2.append(products_2)

In [None]:
trans_encoder = TransactionEncoder()
encoded_baskets_2 = trans_encoder.fit(basket_2).transform(basket_2)
binary_df_2 = pd.DataFrame(encoded_baskets_2, columns= trans_encoder.columns_, index= transaction_nom_2)
binary_df_2

In [None]:
frequencies_items_2 = apriori(binary_df_2, min_support= 0.06, use_colnames= True)
rules_2 = association_rules(frequencies_items_2, metric= "lift", min_threshold= 1)
rules_2 = rules_2.sort_values(['confidence', 'lift'], ascending= [False, False])
rules_2.reset_index(inplace= True, drop= True)
rules_2.head()

In [None]:
condition0_2 = rules_2.antecedents.apply(lambda x: False if 'POSTAGE' in x else True)
condition1_2 = rules_2.consequents.apply(lambda x: False if 'POSTAGE' in x else True)
condition2_2 = rules_2['antecedents'].apply(lambda x: len(x) >= 2)
rules_2.loc[condition0_2 & condition1_2 & condition2_2]