In [12]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [13]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules


# Sample transaction data

In [14]:

transactions = [
    ['bread', 'milk', 'eggs'],
    ['bread', 'butter', 'jam'],
    ['milk', 'butter', 'eggs'],
    ['bread', 'milk', 'butter', 'jam'],
    ['bread', 'milk', 'eggs', 'butter']
]


In [15]:
transactions

[['bread', 'milk', 'eggs'],
 ['bread', 'butter', 'jam'],
 ['milk', 'butter', 'eggs'],
 ['bread', 'milk', 'butter', 'jam'],
 ['bread', 'milk', 'eggs', 'butter']]

# Transform transactions into a one-hot encoded matrix

In [16]:

te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)
df

Unnamed: 0,bread,butter,eggs,jam,milk
0,True,False,True,False,True
1,True,True,False,True,False
2,False,True,True,False,True
3,True,True,False,True,True
4,True,True,True,False,True


# Use Apriori algorithm to find frequent itemsets

In [17]:

frequent_itemsets = apriori(df, min_support=0.2, use_colnames=True)
frequent_itemsets

Unnamed: 0,support,itemsets
0,0.8,(bread)
1,0.8,(butter)
2,0.6,(eggs)
3,0.4,(jam)
4,0.8,(milk)
5,0.6,"(butter, bread)"
6,0.4,"(eggs, bread)"
7,0.4,"(jam, bread)"
8,0.6,"(milk, bread)"
9,0.4,"(eggs, butter)"


# Generate association rules

In [18]:

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

In [19]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,(butter),(bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
1,(bread),(butter),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
2,(jam),(bread),0.4,0.8,0.4,1.0,1.25,0.08,inf,0.333333
3,(milk),(bread),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
4,(bread),(milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
5,(jam),(butter),0.4,0.8,0.4,1.0,1.25,0.08,inf,0.333333
6,(butter),(milk),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
7,(milk),(butter),0.8,0.8,0.6,0.75,0.9375,-0.04,0.8,-0.25
8,(eggs),(milk),0.6,0.8,0.6,1.0,1.25,0.12,inf,0.5
9,(milk),(eggs),0.8,0.6,0.6,0.75,1.25,0.12,1.6,1.0


# Function to find most frequent item brought with the given item

In [20]:

def find_most_frequent_item(item):
    item = [item]
    item_support = frequent_itemsets[frequent_itemsets['itemsets'] == frozenset(item)]['support'].values[0]
    related_rules = rules[rules['antecedents'] == frozenset(item)]

    if not related_rules.empty:
        most_frequent_item = related_rules.iloc[0]['consequents']
        confidence = related_rules.iloc[0]['confidence'] * 100
        print(f"The most frequent item brought with {item} is {most_frequent_item} with {confidence:.2f}% confidence.")
    else:
        print(f"No frequent association found for {item}.")

# Get user input for the item

# Find and print the most frequent item brought with the user-specified item

In [22]:

user_input = input("Enter the name of the item: ")


find_most_frequent_item(user_input)

Enter the name of the item:  butter


The most frequent item brought with ['butter'] is frozenset({'bread'}) with 75.00% confidence.
