## **Rule Filtering and Shapley Value Analysis**

In [None]:

import numpy as np
import itertools
import pandas as pd
from collections import defaultdict

In [None]:
def calc_support(df, itemset, weight_col):
	"""
	itemset è un dict: {colonna: (min_val, max_val), ...}
	"""
	total_weight = df[weight_col].sum()  # somma di tutti i pesi
	num_sum = 0

	# iteriamo sulle righe del dataframe
	for index, row in df.iterrows():
		match = True  # assumiamo che la riga soddisfi le condizioni

		for col, (low, high) in itemset.items():
			value = row[col]

			# controllo se il valore è dentro i bounds
			if not (low <= value < high):
				match = False
				break

		# se la riga soddisfa tutti i vincoli dell'itemset
		if match:
			num_sum += row[weight_col]

	support = num_sum / total_weight if total_weight > 0 else 0
	return support
	

def closure_Ant(Ant, df, weight_col='weight'):
	best = {}  # mappa attr -> (ant, support)
	for ant in Ant:
		# ant è un dict con una sola chiave
		attr = list(ant.keys())[0]
		interval = ant[attr]
		# print("Attr: ", attr, "Interval: ", interval)
		support = calc_support(df, {attr: interval}, weight_col=weight_col)
		
		if attr not in best or support > best[attr][1]:
			best[attr] = (ant, support)
	
	# restituisce solo i dict con intervallo massimo per ciascun attributo
	return [ant for ant, _ in best.values()]
	# prendi solo gli ant con supporto massimo per ciascun attributo
	# prima del return: {'A1': (('A1', (0, 3)), 0.8), 'A2': (('A2', (5, 7)), 1.0)}
	# scorre dentro la lista best e siccome c'è un elemento (key, interval), per ogni key, allora prendo solo l'elemento di quella key

def compute_Antj(Ant, Cons):
	ants_j = {}
	for cons in Cons:
		# estrai la chiave del dict cons (l'attributo j)
		attr_cons = list(cons.keys())[0]
		ant_j = []
		for ant in Ant:
			attr_ant = list(ant.keys())[0]  # estrai la chiave del dict ant
			if attr_ant != attr_cons:
				ant_j.append(ant)
		ants_j[attr_cons] = ant_j
	return ants_j

def all_subsets(lst):
	"""Return all subsets (as lists of dicts) of a given list of dicts."""
	subsets = []
	for r in range(len(lst)+1):
		for combo in itertools.combinations(lst, r):
			if combo:  # skip the empty subset
				subsets.append(list(combo))
	return subsets

def get_consj_for_j_measure(key, Cons):
	for cons in Cons:
		for cons_key in cons.keys():   # cons ha una sola chiave
			if cons_key == key:
				return cons
	return None
		
def compute_j_measure(antecedent, consequent, df, weight_col='weight', eps=1e-10): 
	'''Measures how much knowing X reduces uncertaintyabout Y'''
	# antecedent and consequent are dicts
	# print("Computing J-measure for antecedent:", antecedent, "and consequent:", consequent)
	attr_ant = list(antecedent.keys())[0]
	interval_ant = antecedent[attr_ant]
	# print("Attr:", attr, "Interval:", interval)
	support_antecedent = calc_support(df, {attr_ant: interval_ant}, weight_col)
	attr_cons = list(consequent.keys())[0]
	interval_cons = consequent[attr_cons]
	support_consequent = calc_support(df, {attr_cons: interval_cons}, weight_col)
	# print("Computing support for both antecedent and consequent:", {**antecedent, **consequent})
	support_both = calc_support(df, {**antecedent, **consequent}, weight_col)
	
	if support_antecedent == 0 or support_consequent == 0:
		return 0.0
	
	confidence = support_both / (support_antecedent + eps)
	lift = confidence / (support_consequent + eps)

	# Primo termine
	term1 = 0.0
	if confidence > 0:
		term1 = confidence * np.log2(lift + eps)

	# Secondo termine
	term2 = 0.0
	if confidence < 1:  # evita log(0)
		ratio = (1 - confidence) / (1 - support_consequent + eps)
		term2 = (1 - confidence) * np.log2(ratio + eps)

	j_measure = support_antecedent * (term1 + term2)

	# print("Support Antecedent:", support_antecedent, "Support Consequent:", support_consequent, "Support Both:", support_both, "Confidence:", confidence, "Lift:", lift, "J-measure:", j_measure)
	return j_measure

def compute_shapley_values(ants_j, Cons, df, weight_col='weight', num_samples=100, eps=1e-10):
	# misura quanto ciascun “giocatore” contribuisce al valore totale di una coalizione.
	"""
	Approximate Shapley values for each antecedent interval in Antj relative to each consequent.
	ants_j: dict mapping cons_attr -> list of antecedents (dicts)
	Cons: list of consequent dicts
	df: dataframe
	"""
	shapley_values = {}

	# get consequent dict for a given attribute
	def get_consj_for_j(key):
		for cons in Cons:
			if key in cons:
				return cons
		return None

	for cons_attr, ant_list in ants_j.items():
		# initialize Shapley values for each antecedent in Antj
		phi = {str(ant): 0.0 for ant in ant_list}
		if not ant_list:
			continue

		cons_dict = get_consj_for_j(cons_attr)
		n = len(ant_list)

		for _ in range(num_samples):
			# sample a random permutation of antecedents
			perm = np.random.permutation(ant_list)

			coalition = []
			coalition_support_prev = 0.0

			for ant in perm: # capisco se aggiugnere l'ant alla permutazione vale la pena o meno
				# closure of coalition without current ant
				cl_prev = closure_Ant(coalition, df, weight_col)
				# closure of coalition with current ant added
				cl_new = closure_Ant(coalition + [ant], df, weight_col)

				# compute J-measure
				if cl_new:
					j_new = compute_j_measure({k: v for d in cl_new for k, v in d.items()},
											  cons_dict, df, weight_col, eps)
				else:
					j_new = 0.0
				if cl_prev:
					j_prev = compute_j_measure({k: v for d in cl_prev for k, v in d.items()},
											   cons_dict, df, weight_col, eps)
				else:
					j_prev = 0.0

				# marginal contribution
				phi[str(ant)] += j_new - j_prev

				# add current ant to coalition
				coalition.append(ant)

		# average over num_samples
		shapley_values[cons_attr] = {k: round(v/num_samples, 3) for k, v in phi.items()}

	return shapley_values


# ----------------


# MAIN 
def main():
	data = pd.DataFrame({
		'A1': [1.2, 2.5, 3.1, 2.9, 1.5],
		'A2': [5.0, 5.5, 6.0, 5.2, 5.1],
		'weight': [1, 1, 1, 1, 1]
	})
	df = pd.DataFrame(data)
	
	# keep only rules with p-value < 0.05 and lift > 1.5
	
	final_association_rules = [
		{'antecedent': {'A1': (0, 3)}, 'consequent': {'A2': (0, 6)}, 'support': 0.8, 'confidence': 1.0, 'lift': 1.25, 'p_value': 0.2},
		{'antecedent': {'A2': (0, 6)}, 'consequent': {'A1': (0, 3)}, 'support': 0.8, 'confidence': 1.0, 'lift': 1.25, 'p_value': 0.2},
		{'antecedent': {'A1': (0, 2)}, 'consequent': {'A2': (0, 6)}, 'support': 0.4, 'confidence': 1.0, 'lift': 1.25, 'p_value': 0.6},
		{'antecedent': {'A1': (0, 4)}, 'consequent': {'A2': (0, 6)}, 'support': 0.8, 'confidence': 0.8, 'lift': 1.0, 'p_value': 1.0},
		{'antecedent': {'A2': (0, 6)}, 'consequent': {'A1': (0, 4)}, 'support': 0.8, 'confidence': 1.0, 'lift': 1.0, 'p_value': 1.0}
	]

	p_threshold = 0.5
	lift_threshold = 1.2
	weight_col = 'weight'
	eps = 1e-10
	num_samples = 100

	final_rules = [] # filtro rispetto a lift e p-value
	for rule in final_association_rules:
		if rule['p_value'] < p_threshold and rule['lift'] > lift_threshold:
			final_rules.append(rule)
			
	# create Ant e Cons lists
	Ant = []
	Cons = []
	for rule in final_rules:
		Ant.append(rule['antecedent'])
		Cons.append(rule['consequent'])
	print("Cons: " + str(Cons))
	# Ant = [
	# {"A1", (0,3)},   # support 0.2
	# {("A1", (3,6)},   # support 0.5
	# {("A2", (5,7)},   # support 0.4
	# {("A2", (7,10)},  # support 0.3
	# ]
	# print(Ant)
	
	# create closure of antecedents, keep for each key only the tuple with the max support
	# example of execution
	cl_Ant = closure_Ant(Ant, df, weight_col=weight_col)

	# compute Antj	
	ants_j = compute_Antj(Ant, Cons)
	print("Antjs:", ants_j)
	
	# compute CPO
	# cpos = compute_cpo(ants_j, Cons, df, weight_col=weight_col, eps=eps)
	# print("CPOs:", cpos)
	
	# compute shapley values
	shapley_values = compute_shapley_values(ants_j, Cons, df=df, num_samples=num_samples, weight_col=weight_col, eps=eps)
	print("Shapley values:", shapley_values)
	
main()