In [1]:
import sys
sys.path.append('../..')

In [2]:
import pandas as pd
from efficient_apriori import apriori

In [3]:
def appendPrefix(attribute,prefix):
    return list(map(lambda s:prefix+s,attribute.split()))

In [4]:
def preProcessNeighbors(nn,opposite_label_data,attribute):
    nn_values = pd.merge(nn,opposite_label_data,left_on=attribute,right_on='id')
    left_values, right_values  = nn_values['ltable_'+attribute],nn_values['rtable_'+attribute]
    left_values_prefixed = list(map(lambda att:tuple(appendPrefix(att,'L_')),left_values))
    right_values_prefixed = list(map(lambda att:tuple(appendPrefix(att,'R_')),right_values))
    return list(map(lambda l,r:l+r,left_values_prefixed,right_values_prefixed))

In [5]:
pos_nn = pd.read_csv('experiments/results/positivesNNHybridWithoutfilter.csv')
train_neg = pd.read_csv('../../Structured/itunes-amazon/train_negatives.csv')
transactionsOnTime = preProcessNeighbors(pos_nn,train_neg,'Time')

In [6]:
itemset,rules = apriori(transactionsOnTime,min_support=0.05,min_confidence=0.8)
rules_rhs = filter(lambda rule: len(rule.lhs) == 1 and len(rule.rhs)==1, rules)
for rule in sorted(rules_rhs, key=lambda rule: rule.lift):
  print(rule) # Prints the rule and its confidence, support, lift, ...

{R_3:16} -> {L_3:20} (conf: 0.980, supp: 0.371, lift: 2.488, conv: 30.303)
{L_3:20} -> {R_3:16} (conf: 0.942, supp: 0.371, lift: 2.488, conv: 10.768)
{R_5:17} -> {L_2:09:26} (conf: 1.000, supp: 0.303, lift: 3.300, conv: 696969696.970)
{L_2:09:26} -> {R_5:17} (conf: 1.000, supp: 0.303, lift: 3.300, conv: 696969696.970)


In [7]:
transactionsOnPrice = preProcessNeighbors(pos_nn,train_neg,'Price')
itemset,rules = apriori(transactionsOnPrice,min_support=0.05,min_confidence=0.8)
rules_rhs = filter(lambda rule: len(rule.lhs) == 2 and len(rule.rhs)==2, rules)
for rule in sorted(rules_rhs, key=lambda rule: rule.lift):
  print(rule) # Prints the rule and its confidence, support, lift, ...

{R_$, R_1.29} -> {L_Album, L_Only} (conf: 1.000, supp: 0.992, lift: 1.008, conv: 7575757.576)
{L_Only, R_1.29} -> {L_Album, R_$} (conf: 1.000, supp: 0.992, lift: 1.008, conv: 7575757.576)
{L_Only, R_$} -> {L_Album, R_1.29} (conf: 1.000, supp: 0.992, lift: 1.008, conv: 7575757.576)
{L_Album, R_1.29} -> {L_Only, R_$} (conf: 1.000, supp: 0.992, lift: 1.008, conv: 7575757.576)
{L_Album, R_$} -> {L_Only, R_1.29} (conf: 1.000, supp: 0.992, lift: 1.008, conv: 7575757.576)
{L_Album, L_Only} -> {R_$, R_1.29} (conf: 1.000, supp: 0.992, lift: 1.008, conv: 7575757.576)


## Inspect NN on critical attributes for negatives

In [35]:
negNN = pd.read_csv('experiments/results/negativesNNhybridWithoutfilter.csv')
trainPos = pd.read_csv('../../Structured/itunes-amazon/train_positives.csv')
transactions = preProcessNeighbors(negNN,trainPos,'Time')

In [36]:
itemsets, rules = apriori(transactions, min_support=0.05,  min_confidence=0.8)
rules_rhs = filter(lambda rule: len(rule.lhs) == 1 and len(rule.rhs)==1, rules)
for rule in sorted(rules_rhs, key=lambda rule: rule.lift):
  print(rule) # Prints the rule and its confidence, support, lift, ...

{R_3:20} -> {L_3:35} (conf: 1.000, supp: 0.381, lift: 2.626, conv: 619164619.165)
{L_3:35} -> {R_3:20} (conf: 1.000, supp: 0.381, lift: 2.626, conv: 619164619.165)
{R_2:59} -> {L_2:59} (conf: 1.000, supp: 0.184, lift: 5.427, conv: 815724815.725)
{L_2:59} -> {R_2:59} (conf: 1.000, supp: 0.184, lift: 5.427, conv: 815724815.725)
{R_3:44} -> {L_3:41} (conf: 1.000, supp: 0.069, lift: 14.536, conv: 931203931.204)
{L_3:41} -> {R_3:44} (conf: 1.000, supp: 0.069, lift: 14.536, conv: 931203931.204)
{R_3:15} -> {L_3:13} (conf: 1.000, supp: 0.054, lift: 15.074, conv: 933660933.661)
{L_3:13} -> {R_3:15} (conf: 0.815, supp: 0.054, lift: 15.074, conv: 5.108)
{R_4:49} -> {L_5:20} (conf: 1.000, supp: 0.052, lift: 19.381, conv: 948402948.403)
{L_5:20} -> {R_4:49} (conf: 1.000, supp: 0.052, lift: 19.381, conv: 948402948.403)


In [37]:
transactions = preProcessNeighbors(negNN,trainPos,'Price')

In [39]:
itemsets, rules = apriori(transactions, min_support=0.05,  min_confidence=0.8)
rules_rhs = filter(lambda rule: len(rule.lhs) == 2 and len(rule.rhs)==2, rules)
for rule in sorted(rules_rhs, key=lambda rule: rule.lift):
  print(rule) # Prints the rule and its confidence, support, lift, ...

{L_0.99, R_0.99} -> {L_$, R_$} (conf: 1.000, supp: 0.827, lift: 1.021, conv: 20408163.265)
{L_$, R_$} -> {L_0.99, R_0.99} (conf: 0.844, supp: 0.827, lift: 1.021, conv: 1.110)
{L_1.29, R_1.29} -> {L_$, R_$} (conf: 1.000, supp: 0.112, lift: 1.021, conv: 20408163.265)
{R_$, R_0.99} -> {L_$, L_0.99} (conf: 0.976, supp: 0.827, lift: 1.181, conv: 7.199)
{L_$, L_0.99} -> {R_$, R_0.99} (conf: 1.000, supp: 0.827, lift: 1.181, conv: 153061224.490)
{L_0.99, R_$} -> {L_$, R_0.99} (conf: 1.000, supp: 0.827, lift: 1.210, conv: 173469387.755)
{L_$, R_0.99} -> {L_0.99, R_$} (conf: 1.000, supp: 0.827, lift: 1.210, conv: 173469387.755)
{R_$, R_1.29} -> {L_$, L_1.29} (conf: 0.846, supp: 0.112, lift: 6.379, conv: 5.638)
{L_1.29, R_$} -> {L_$, R_1.29} (conf: 0.846, supp: 0.112, lift: 6.379, conv: 5.638)
{L_$, R_1.29} -> {L_1.29, R_$} (conf: 0.846, supp: 0.112, lift: 6.379, conv: 5.638)
{L_$, L_1.29} -> {R_$, R_1.29} (conf: 0.846, supp: 0.112, lift: 6.379, conv: 5.638)
