# Market Basket Analysis

In [None]:
!pip install networkx matplotlib
!pip install mlxtend
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
from pandas.plotting import parallel_coordinates
import networkx as nx
import matplotlib.pyplot as plt

# Data Preparation

In [None]:
df = pd.read_csv("transactions_dataset.csv",  sep=';')

In [None]:
df['date_order'] = pd.to_datetime(df['date_order'])  # Convert the column to datetime format
df['date_invoice'] = pd.to_datetime(df['date_invoice'])

# Get the maximum and minimum dates
max_date = df['date_order'].max()
min_date = df['date_order'].min()

print("Maximum Date:", max_date)
print("Minimum Date:", min_date)

In [None]:
df['client_id'] = df['client_id'].astype(str)
df['product_id'] = df['product_id'].astype(str)
df['branch_id'] = df['branch_id'].astype(str)

In [None]:
df = df.sort_values(by='date_order')

In [None]:
transactions = df.groupby('client_id')['product_id'].apply(list).tolist()

In [None]:
df.head()

# Association Rules Analysis

In [None]:
item_counts = df['product_id'].value_counts()
frequent_items = item_counts[item_counts >= len(df) * 0.005].index
filtered_transactions = [[item for item in t if item in frequent_items] for t in transactions]


In [None]:
# Transaction Encoding
te = TransactionEncoder()
te_ary = te.fit(filtered_transactions).transform(filtered_transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules.sort_values(['confidence', 'lift'], ascending=[False, False])

Insight 1: High-Value Product Combinations
Observation: Rule #83 shows a strong association between items 3000784, 1895362, 1390401, and 1366829, with a high confidence of 88.9% and a lift of 2.62. This indicates not only a strong likelihood of these items being purchased together but also that their combination is more frequent than would be expected if they were bought independently.

Conclusion: These items form a high-value combination that customers are likely to buy together. This could be due to complementary usage or a common customer need that these products collectively fulfill.

Insight 2: Strategic Product Placement
Observation: The rules show specific item combinations with high lift values, indicating that certain products are much more likely to be bought together than others. For example, Rule #80 suggests that when items 1366829, 3000784, and 1895362 are bought, item 1390401 is also likely to be purchased, with a lift of 2.81.

Conclusion: Consider strategic product placement both in physical stores and online platforms. Placing these items closer together or suggesting them as add-ons during the online checkout process could increase basket size and overall sales.

Insight 3: Targeted Promotions
Observation: Items with strong associations, as identified in the rules, provide an excellent opportunity for targeted promotions. For example, offering discounts on less frequently bought items (consequents) when customers purchase popular items (antecedents) could boost the sales of the former.

Conclusion: Use the identified item associations to craft targeted promotional campaigns. For instance, if purchasing item 3000784 has a strong association with purchasing item 1366829, offering a discount on 1366829 when 3000784 is purchased could incentivize increased sales of both.

Insight 4: Inventory Management
Observation: The support values indicate the frequency of itemsets being bought together. Higher support values, such as for the combinations in Rules #83 and #34, suggest these items are popular and frequently purchased together.

Conclusion: Inventory management can be optimized based on these insights. Ensure that high-demand items identified in these rules are well-stocked to meet customer demand, especially when promotional activities are planned.

Insight 5: Understanding Customer Preferences
Observation: The combination of items in these rules suggests that certain products are consistently preferred by customers when purchased together. This pattern could reflect underlying customer preferences or the complementary nature of the products.

Conclusion: Analyze the characteristics or features of these frequently bought-together items to understand customer preferences better. This understanding can guide new product development, adjustments in product offerings, and personalized marketing messages.

Presenting to the Client:
When presenting these results to your client, focus on how these insights can drive actionable strategies:

Promotions: Highlight how targeted promotions based on these rules can increase cross-selling opportunities.
Product Placement: Discuss the potential for increased sales through strategic product placement both in-store and online.
Inventory Optimization: Suggest inventory adjustments based on the popularity and association strength of certain item combinations.
Customer Insights: Emphasize how understanding product associations can reveal deeper insights into customer preferences, guiding future product and marketing strategies.

# Market BAsket Graph

In [None]:
G = nx.DiGraph()
for _, row in rules.iterrows():
    antecedents = tuple(row['antecedents'])
    consequents = tuple(row['consequents'])
    lift = row['lift']
    
    edge_width = lift * 0.1  
    
    # Add nodes and edges to the graph
    G.add_node(antecedents, label=antecedents)
    G.add_node(consequents, label=consequents)
    G.add_edge(antecedents, consequents, weight=edge_width)

plt.figure(figsize=(12, 12))
pos = nx.spring_layout(G, k=0.5, iterations=20)
edge_widths = [G[u][v]['weight'] for u,v in G.edges()]

nx.draw_networkx_nodes(G, pos, node_size=7000, node_color='skyblue', alpha=0.6)
nx.draw_networkx_edges(G, pos, width=edge_widths, alpha=0.5, edge_color='gray')
nx.draw_networkx_labels(G, pos, labels={node:node for node in G.nodes()}, font_size=10)

plt.title("Item Association Rules Graph")
plt.axis('off')
plt.show()


In our Market Basket Analysis, we've constructed an Item Association Rules Graph that vividly illustrates the purchasing patterns and relationships between different products. Each node in the graph represents a set of items commonly bought together, and the edges denote the strength of the association between these item sets. The width of the edges is proportional to the 'lift' metric from our analysis – a measure of how much more often the item sets are purchased together than we would expect if they were statistically independent. In practical terms, this graph helps us identify 'cross-selling' opportunities. 

"In our Market Basket Analysis graph, each node represents a cluster of items that customers often purchase together. For instance, the node labeled ('1366829', '2512504') could represent customers who frequently buy item 1366829 (let’s say, 'Organic Bananas') and item 2512504 ('Almond Milk') in the same transaction. The proximity of nodes in the graph's center, such as ('3000784', '1895362', '1390401'), suggests a strong and central interconnection in the purchasing patterns. This indicates that these items, perhaps 'Coffee', 'Whole Wheat Bread', and 'Butter', are not only commonly purchased together, but they may also be pivotal items that connect to many other purchase combinations within our store.

Edges between nodes, depicted with varying widths, represent the 'lift' of the association rule. A wider edge means a higher lift value, which implies a stronger than expected likelihood that the connected items are bought together. This can be seen clearly in the dense center of the graph, where a web of strong connections suggests a core group of products that are highly interrelated in customer purchases.

The strategic placement of these core items could potentially influence shopping behavior and increase basket sizes. For example, placing 'Coffee' near 'Whole Wheat Bread' might encourage customers who came for bread to remember they also need coffee, increasing the chance of a combined purchase.

Additionally, the graph provides insights for promotional strategies. Items on the periphery with thinner connecting edges might benefit from targeted promotions to increase their association strength with central items, driving their sales up."

# Lift Correlation Analysis

In [None]:
# Lift Correlation Analysis
rules['antecedents_str'] = rules['antecedents'].apply(lambda x: ', '.join(list(x)))
rules['consequents_str'] = rules['consequents'].apply(lambda x: ', '.join(list(x)))

pivot = rules.pivot("antecedents_str", "consequents_str", "lift")
pivot.fillna(0, inplace=True)

plt.figure(figsize=(10, 8))
sns.heatmap(pivot, annot=True, cmap="YlGnBu")
plt.title("Heatmap of Lift Values for Item Associations")
plt.show()


# Support vs Confidence Analysis

In [None]:
rules['antecedents_str'] = rules['antecedents'].apply(lambda x: ', '.join(map(str, list(x))))
rules['consequents_str'] = rules['consequents'].apply(lambda x: ', '.join(map(str, list(x))))
rules['rule'] = rules['antecedents_str'] + " -> " + rules['consequents_str']

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=rules, x='support', y='confidence', size='lift', hue='lift', sizes=(20, 200), alpha=0.6, palette="viridis")
plt.title('Association Rules: Support vs. Confidence (Bubble Size = Lift)')
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
plt.show()
