In [1]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
def load_poi_data(filename):
    """Load POI data from an Excel file."""
    return pd.read_excel(filename)

In [3]:
def prepare_basket(data):
    """Group data by coordinates and create a list of POI categories for each group."""
    return data.groupby(['x', 'y'])['category'].apply(list).reset_index(name='POI_List')


In [4]:
def encode_transactions(basket):
    """Encode transaction data for Apriori algorithm using TransactionEncoder."""
    encoder = TransactionEncoder()
    encoded_array = encoder.fit(basket['POI_List']).transform(basket['POI_List'])
    return pd.DataFrame(encoded_array, columns=encoder.columns_)

In [5]:
def find_frequent_itemsets(encoded_df, min_support=0.1):
    """Apply the Apriori algorithm to find frequent itemsets."""
    return apriori(encoded_df, min_support=min_support, use_colnames=True)

In [6]:
def generate_association_rules(frequent_itemsets, metric="lift", min_threshold=1):
    """Generate association rules from frequent itemsets."""
    rules = association_rules(frequent_itemsets, metric=metric, min_threshold=min_threshold)
    return rules

In [7]:
def sort_rules(rules, top=20):
    """Filter rules based on support and sort by confidence."""
    return rules.sort_values(by='confidence', ascending=False)[['antecedents', 'consequents', 'support', 'confidence']].head(top)

In [8]:
def apriori_analysis(filename, min_support=0.1, min_threshold=1, top_ranked=20):
    """Run Apriori analysis on POI data."""
    # Step 1: Load data
    data = load_poi_data(filename)
    
    # Step 2: Prepare basket format
    basket = prepare_basket(data)
    
    # Step 3: Encode transactions
    encoded_df = encode_transactions(basket)
    print("Encoded DataFrame:\n", encoded_df.head())

    # Step 4: Find frequent itemsets
    frequent_itemsets = find_frequent_itemsets(encoded_df, min_support)
    print("Frequent Itemsets:\n", frequent_itemsets)
    
    # Step 5: Generate association rules
    rules = generate_association_rules(frequent_itemsets, metric="lift", min_threshold=min_threshold)
    print("Association Rules:\n", rules)
    
    # Step 6: Filter and sort rules
    final_rules = sort_rules(rules, top=top_ranked)
    print("Filtered Rules:\n", final_rules)
    
    return final_rules

# Final Association Rules

In [9]:
apriori_analysis("POIdata_cityB.xlsx", min_support=0.1, min_threshold=1, top_ranked=20)


Encoded DataFrame:
       1      2      3      4      5      6      7      8      9      10  ...  \
0  False  False  False  False  False  False  False  False  False  False  ...   
1  False  False  False  False  False  False  False  False  False  False  ...   
2  False  False  False  False  False  False  False  False  False  False  ...   
3  False  False  False  False  False  False  False  False  False  False  ...   
4  False  False  False  False  False  False  False  False  False  False  ...   

      76     77     78     79     80     81     82     83     84     85  
0  False  False  False   True  False  False  False  False   True  False  
1  False  False  False  False  False  False   True  False  False  False  
2  False  False  False  False  False  False  False  False  False  False  
3  False  False  False  False  False  False  False  False  False  False  
4  False  False  False  False  False  False  False  False  False  False  

[5 rows x 84 columns]
Frequent Itemsets:
      support

Unnamed: 0,antecedents,consequents,support,confidence
102,"(69, 79)",(48),0.100395,0.812777
17,(66),(48),0.105984,0.79654
13,(62),(48),0.121219,0.747297
36,(59),(79),0.121328,0.740964
112,"(81, 60)",(79),0.107628,0.737237
100,"(48, 69)",(79),0.100395,0.734563
9,(59),(48),0.118698,0.7249
90,"(60, 79)",(48),0.110259,0.721664
19,(69),(48),0.136673,0.721226
88,"(48, 60)",(79),0.110259,0.720115


In [10]:
apriori_analysis("POIdata_cityC.xlsx", min_support=0.1, min_threshold=1, top_ranked=20)


Encoded DataFrame:
       1      2      3      4      5      6      7      8      9      10  ...  \
0  False  False  False  False  False  False  False  False  False  False  ...   
1  False  False  False  False  False  False  False  False  False  False  ...   
2  False  False  False  False  False  False  False  False  False  False  ...   
3  False  False  False  False  False  False  False  False  False  False  ...   
4  False  False  False  False  False  False  False  False  False  False  ...   

      76     77     78     79     80     81     82     83     84     85  
0  False  False  False  False  False  False  False  False  False  False  
1  False  False  False  False  False  False  False  False  False  False  
2  False  False  False  False  False  False  False  False  False  False  
3  False  False  False  False  False  False  False  False  False  False  
4  False  False  False  False  False  False  False  False  False  False  

[5 rows x 85 columns]
Frequent Itemsets:
         supp

Unnamed: 0,antecedents,consequents,support,confidence
1627718,"(66, 76, 47, 51, 54, 60, 62)",(59),0.101815,0.996988
1220708,"(66, 76, 47, 54, 60, 62)",(59),0.114119,0.994638
1593174,"(66, 76, 47, 48, 54, 60, 62)",(59),0.111043,0.99449
1337888,"(66, 76, 51, 54, 60, 62)",(59),0.11012,0.994444
1545929,"(66, 76, 47, 48, 51, 54, 60)",(59),0.109812,0.994429
1662008,"(66, 76, 48, 51, 54, 60, 62)",(59),0.108274,0.99435
1646514,"(66, 76, 47, 79, 54, 60, 62)",(59),0.107044,0.994286
1645752,"(66, 69, 76, 47, 54, 60, 62)",(59),0.106121,0.994236
1630257,"(66, 76, 47, 79, 51, 54, 60)",(59),0.104891,0.994169
1710522,"(66, 69, 76, 51, 54, 60, 62)",(59),0.104583,0.994152


In [11]:
apriori_analysis("POIdata_cityD.xlsx", min_support=0.1, min_threshold=1, top_ranked=20)


Encoded DataFrame:
       1      2      3      4      5      6      7      8      9      10  ...  \
0  False  False  False  False  False  False  False  False  False  False  ...   
1  False  False  False  False  False  False  False  False  False  False  ...   
2  False  False   True  False  False  False  False  False  False  False  ...   
3  False  False  False  False  False  False  False  False  False  False  ...   
4  False  False  False  False  False  False  False  False  False  False  ...   

      76     77     78     79     80     81     82     83     84     85  
0  False  False  False  False  False  False  False  False  False  False  
1  False  False  False  False  False  False  False  False  False  False  
2  False  False  False  False  False  False   True  False  False  False  
3   True  False  False  False  False   True  False  False  False  False  
4  False  False  False   True  False  False  False  False  False  False  

[5 rows x 84 columns]
Frequent Itemsets:
      support

Unnamed: 0,antecedents,consequents,support,confidence
34,(51),(69),0.105934,0.735777
11,(51),(48),0.105752,0.734513
136,"(81, 69)",(79),0.102111,0.729993
118,"(48, 69)",(79),0.101656,0.715567
52,(59),(79),0.125046,0.715252
39,(51),(81),0.102657,0.713021
36,(51),(79),0.102566,0.712389
138,"(69, 79)",(81),0.102111,0.709677
89,(66),(81),0.101383,0.707751
120,"(69, 79)",(48),0.101656,0.706515
