In [18]:
import random
import pandas as pd
import numpy as np
from multiprocessing import Manager, Process
from IPython.display import display

# ======================
# Utility Functions
# ======================
def point_in_interval(point, interval):
	return interval[0] < point < interval[1]

def satisfies(itemset, row):
	return all(point_in_interval(row[attr], itemset[attr]) for attr in itemset)

def support(itemset, data, class_col):
	total_c = data[class_col].sum()
	matched = data[data.apply(lambda row: satisfies(itemset, row), axis=1)]
	return matched[class_col].sum() / total_c if total_c > 0 else 0

def shrink_difference(inner, outer):
	return (inner[0] - outer[0]) + (outer[1] - inner[1])

def delta(itemset, max_vals):
	return sum(shrink_difference(itemset[attr], (0, max_vals[attr])) for attr in itemset)

# ======================
# Standard Apriori
# ======================
def apriori_standard(data, epsilon, class_col):
	attributes = [col for col in data.columns if col != class_col] # ricavo i nomi delle colonne
	max_vals = {a: int(np.ceil(data[a].max())) for a in attributes} # ricavo i max val con cui costruire il mio bottom itemset
	I0 = {a: (0, max_vals[a]) for a in attributes} # definisco il bottom itemset
	R, SWk = {}, [I0] # inizializzo R (relations) e SWk (set of supported witnesses)
	
	# print("Bottom itemset: " + str(I0))
	
	while SWk:
		Wk = []
		for I in SWk: # esploro tutti gli itemset
			for a in attributes:
				b, e = I[a]
				# restringo bordo inferiore di 1 (se possibile)
				if b + 1 < e:
					new_I = I.copy()
					new_I[a] = (b + 1, e)
					Wk.append(new_I)

				# restringo bordo superiore di 1 (se possibile)
				if b < e - 1:
					new_I = I.copy()
					new_I[a] = (b, e - 1)
					Wk.append(new_I)
		SWk = []
		for I in Wk:
			s = support(I, data, class_col) # calcolo supporto con la funzione support
			if s >= epsilon:
				R[str(I)] = s
				SWk.append(I)
	return R

# ======================
# Randomic Apriori
# ======================
def is_predecessor(Ip, I):
	for a in I:
		b1, e1 = Ip[a]
		b2, e2 = I[a]
		if not (b1 <= b2 and e2 <= e1):
			return False
	return True

def is_successor(In, I):
	for a in I:
		b1, e1 = I[a]
		b2, e2 = In[a]
		if not (b2 <= b1 and e1 <= e2):
			return False
	return True

def apriori_randomic(data, epsilon, class_col):
	attributes = [c for c in data.columns if c != class_col]
	max_vals = {a: int(np.ceil(data[a].max())) for a in attributes}
	I0 = {a: (0, max_vals[a]) for a in attributes}

	R, LP, LS, LNS = {}, [I0], {}, {}
		
	while LP:
		I = LP.pop(random.randint(0, len(LP) - 1)) # rimuovo I da LP
		key = tuple((a, I[a]) for a in attributes)  # rappresentazione stabile

		# Se esiste un predecessore supportato o un successore non supportato, salto
		if any(is_predecessor(eval(k), I) for k in LS.keys()) or \
		   any(is_successor(eval(k), I) for k in LNS.keys()):
			continue

		# Calcolo supporto
		s = support(I, data, class_col)

		if s >= epsilon:
			R[str(I)] = s
			LS[str(I)] = s
			# Generazione figli
			for a in attributes:
				b, e = I[a]
				# restringo il bordo inferiore di 1, se possibile
				if b + 1 < e:
					new_I = I.copy()
					new_I[a] = (b + 1, e)
					LP.append(new_I)

				# restringo il bordo superiore di 1, se possibile
				if b < e - 1:
					new_I = I.copy()
					new_I[a] = (b, e - 1)
					LP.append(new_I)
		else:
			LNS[str(I)] = s

	return R


# ======================
# Run All Algorithms
# ======================
print("\n======================")
print("RUN APRIORI ALGORITHMS")
print("======================")

data1 = pd.DataFrame({
	'A1': [1.2, 2.5, 3.1, 2.9, 1.5],
	'A2': [3.0, 3.5, 2.0, 2.2, 3.1],
	'C':  [1, 1, 1, 1, 1]
})

data2 = pd.DataFrame({
	'A1': [1.2, 1.3, 1.1, 1.9, 1.5],
	'A2': [1.6, 1.5, 1.0, 1.2, 1.1],
	'C':  [1, 1, 1, 1, 1]
})

data3 = pd.DataFrame({
	'A1': [1.2, 2.5, 3.1, 2.9, 1.5], 
	'A2': [5.0, 5.5, 6.0, 5.2, 5.1], 
	'C': [1, 1, 1, 1, 1]})

datasets = {"data1": data1, "data2": data2, "data3": data3}
epsilon = 0.4

# ======================
# Print Results
# ======================

for name, data in datasets.items():
	std_result = apriori_standard(data, epsilon, 'C')
	rnd_result = apriori_randomic(data, epsilon, 'C')

	print("\n=== Dataset:", name, "===")

	print("Standard Apriori:")
	for k, v in std_result.items():
		print("  itemset:", k, " support:", v)

	print("Randomic Apriori:")
	for k, v in rnd_result.items():
		print("  itemset:", k, " support:", v)


RUN APRIORI ALGORITHMS

=== Dataset: data1 ===
Standard Apriori:
  itemset: {'A1': (1, 4), 'A2': (0, 4)}  support: 1.0
  itemset: {'A1': (0, 3), 'A2': (0, 4)}  support: 0.8
  itemset: {'A1': (0, 4), 'A2': (1, 4)}  support: 1.0
  itemset: {'A1': (0, 4), 'A2': (0, 3)}  support: 0.4
  itemset: {'A1': (2, 4), 'A2': (0, 4)}  support: 0.6
  itemset: {'A1': (1, 3), 'A2': (0, 4)}  support: 0.8
  itemset: {'A1': (1, 4), 'A2': (1, 4)}  support: 1.0
  itemset: {'A1': (1, 4), 'A2': (0, 3)}  support: 0.4
  itemset: {'A1': (0, 2), 'A2': (0, 4)}  support: 0.4
  itemset: {'A1': (0, 3), 'A2': (1, 4)}  support: 0.8
  itemset: {'A1': (0, 4), 'A2': (2, 4)}  support: 0.8
  itemset: {'A1': (0, 4), 'A2': (1, 3)}  support: 0.4
  itemset: {'A1': (2, 3), 'A2': (0, 4)}  support: 0.4
  itemset: {'A1': (2, 4), 'A2': (1, 4)}  support: 0.6
  itemset: {'A1': (2, 4), 'A2': (0, 3)}  support: 0.4
  itemset: {'A1': (1, 2), 'A2': (0, 4)}  support: 0.4
  itemset: {'A1': (1, 3), 'A2': (1, 4)}  support: 0.8
  itemset: {'A1'