In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from scipy.spatial import distance

# STEP 1: Candidate Formation (Object + Profiling Parameters + Spatial Values)
# Representing users and their interests (1 = interested, 0 = not)
data = {
    'User_ID': ['Object-1', 'Object-2', 'Object-3'],
    'Culture_A': [1, 1, 0],
    'Tech_Innovation': [1, 1, 1],
    'Anthropology': [0, 1, 0],
    'Lat': [12.97, 12.98, 13.00],  # Spatial Value (Latitude)
    'Lon': [77.59, 77.60, 77.62]   # Spatial Value (Longitude)
}
df = pd.DataFrame(data)

# STEP 2: Interests Gathering & Pruning (Lattice/Support calculation)
# We use apriori to find frequent interests with a support threshold
interests_only = df[['Culture_A', 'Tech_Innovation', 'Anthropology']]
frequent_interests = apriori(interests_only, min_support=0.5, use_colnames=True)

# STEP 3: Demographic Distance Calculation
def calculate_demographic_distance(user1_idx, user2_idx):
    # Extract Profiling Parameters (PP)
    pp1 = interests_only.iloc[user1_idx].values
    pp2 = interests_only.iloc[user2_idx].values
    
    # Extract Spatial Values
    loc1 = [df.iloc[user1_idx]['Lat'], df.iloc[user1_idx]['Lon']]
    loc2 = [df.iloc[user2_idx]['Lat'], df.iloc[user2_idx]['Lon']]
    
    # Calculate how different their interests are (Jaccard Distance)
    # and how far they are physically (Euclidean Distance)
    intellectual_diff = distance.jaccard(pp1, pp2)
    physical_dist = distance.euclidean(loc1, loc2)
    
    # Combined Demographic Distance
    return intellectual_diff + physical_dist

# Result for Object-1 and Object-2
dist = calculate_demographic_distance(0, 1)
print(f"Demographic Distance between Object-1 and Object-2: {dist:.4f}")

Demographic Distance between Object-1 and Object-2: 0.3475




In [2]:
import pandas as pd
from mlxtend.frequent_patterns import apriori
from scipy.spatial import distance

# STEP 1: Candidate Formation (10 Objects + Profiling Parameters + Spatial Values) [cite: 36]
# We group 3 users with similar interests and locations to form 3 clusters.
data = {
    'User_ID': [f'Object-{i+1}' for i in range(10)],
    # Interests: [Tech, Art, Science]
    'Tech':    [1, 1, 1, 0, 0, 0, 0, 0, 0, 0], # Cluster 1
    'Art':     [0, 0, 0, 1, 1, 1, 0, 0, 0, 0], # Cluster 2
    'Science': [0, 0, 0, 0, 0, 0, 1, 1, 1, 0], # Cluster 3
    # Lat/Lon: Users in the same cluster are physically close 
    'Lat': [10.1, 10.1, 10.2, 20.1, 20.1, 20.2, 30.1, 30.1, 30.2, 99.9],
    'Lon': [10.1, 10.2, 10.1, 20.1, 20.2, 20.1, 30.1, 30.2, 30.1, 99.9]
}
df = pd.DataFrame(data)

# STEP 2: Interests Gathering & Pruning [cite: 28, 30]
interests_only = df[['Tech', 'Art', 'Science']]
# Pruning unwanted parameters from the lattice using a threshold [cite: 29, 30]
frequent_interests = apriori(interests_only, min_support=0.2, use_colnames=True)

# STEP 3: Demographic Distance Calculation 
def calculate_demographic_distance(user1_idx, user2_idx):
    pp1 = interests_only.iloc[user1_idx].values
    pp2 = interests_only.iloc[user2_idx].values
    
    loc1 = [df.iloc[user1_idx]['Lat'], df.iloc[user1_idx]['Lon']]
    loc2 = [df.iloc[user2_idx]['Lat'], df.iloc[user2_idx]['Lon']]
    
    # intellectual_diff: How much their ideologies mismatch [cite: 15, 61]
    intellectual_diff = distance.jaccard(pp1, pp2)
    # physical_dist: Geographical distance [cite: 17]
    physical_dist = distance.euclidean(loc1, loc2)
    
    # Combined Demographic Distance 
    return intellectual_diff + physical_dist

# NEW STEP: Pooling into Category Buckets 
def form_category_buckets(threshold):
    buckets = {}
    num_users = len(df)
    assigned = set()

    for i in range(num_users):
        if i in assigned: continue
        
        current_bucket = [df.iloc[i]['User_ID']]
        assigned.add(i)
        
        for j in range(i + 1, num_users):
            if j in assigned: continue
            
            # Check if User J is compatible with the first person in the bucket [cite: 37, 61]
            if calculate_demographic_distance(i, j) <= threshold:
                current_bucket.append(df.iloc[j]['User_ID'])
                assigned.add(j)
        
        buckets[f"Bucket_{len(buckets)+1}"] = current_bucket
        
    return buckets

# Simulation: Threshold allows people within ~0.3 units of distance to group [cite: 30]
THRESHOLD_VALUE = 0.5 
all_buckets = form_category_buckets(THRESHOLD_VALUE)

print("Final Social Clustering Outcome[cite: 55, 61]:")
for bucket_name, users in all_buckets.items():
    status = "OUTLIER" if len(users) == 1 else "CLUSTER"
    print(f"{bucket_name} ({status}): {users}")

Final Social Clustering Outcome[cite: 55, 61]:
Bucket_1 (CLUSTER): ['Object-1', 'Object-2', 'Object-3']
Bucket_2 (CLUSTER): ['Object-4', 'Object-5', 'Object-6']
Bucket_3 (CLUSTER): ['Object-7', 'Object-8', 'Object-9']
Bucket_4 (OUTLIER): ['Object-10']




This simulation code organizes a population of 10 people into distinct "Category Buckets" by evaluating their intellectual and physical compatibility. It first uses the Apriori algorithm to identify and prune interests, then calculates a Demographic Distance for each pair of users by combining their shared ideologies (Jaccard distance) and geographical proximity (Euclidean distance). By applying a specific threshold value, the system successfully pools similar-minded users into three clusters of three people each, while identifying the tenth user—who has unique interests and a distant location—as an outlier to prevent "intellectual congestion."