In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from itertools import product

## data load

In [None]:
store_data = pd.read_csv('onigiri.csv', sep=',')
store_array = store_data.values

In [None]:
store_data

In [4]:
def GTI(df, indexes): # Get True Indexes
    '''
    Input
        df: dataframe (row: transactions, col: items)
        index: column indexes num to be examined 
    Output
        list of boolen
    '''
    if type(indexes) == int:
        indexes = [indexes]
    elif type(indexes) == np.int64:
        indexes = [indexes]
        
    return df[:,indexes]==1

In [None]:
def support(array_2d,  indexes, m='num'):
    gti_b = GTI(array_2d, indexes)
    if gti_b.shape[1]==0:
        return 0
    b = np.all(gti_b, axis=1)
    if m =='num':
        return np.sum(b)
    elif m =='ratio':
        return np.sum(b)/len(b)
    elif m == 'bool':
        return b1

In [None]:
def confidence(array_2d, X_indexes, Y_indexes):
    sup_X = support(array_2d, X_indexes)
    X_Y_indexes = np.hstack([X_indexes, Y_indexes])
    return support(array_2d, X_Y_indexes)/sup_X
    

In [None]:
GTI(store_array,[1,2])

In [None]:
support(store_array, np.array([1,2]), m='ratio')

In [None]:
const = [True, False]
threshold = 0.2
for args in product(const, repeat=5):
    #print("args")
    #print(args)
    #print("items")
    #print(store_array[:,args])
    #print("support")
    sup = support(store_array, args, m='ratio') 
    if sup >= threshold:
        print("args")
        print(args)
        print("items")
        print(store_array[:,args])
        print("support")       
        print(support(store_array, args, m='ratio'))
        print()

In [None]:
def getF1(array_2D, minsup):
    '''
    Description
        Output frequent itemset from original tables (k=1)
    Input
        array_2D -> row: transactions, col: items
    Output
        frequent items' indexes: list
    '''
    return np.array([[col] for col in range(array_2D.shape[1]) if support(array_2D,col, m='ratio') >= minsup])

In [None]:
def getFkPlusOne(array_2D, indexes, minsup):
    '''
    Description
        Output frequent itemset from original tables (k=1)
    Input
        array_2D -> row: transactions, col: items
        indexes -> list of tuples [(),(),...()] 
    '''
    
    return np.array([col for col in indexes if support(array_2D,col, m='ratio') >= minsup])

In [None]:
F1 = getF1(store_array, minsup=0.4)

In [None]:
F1

In [None]:
def getCkPlusOne(prevCandidate, k):
    '''
    input
    prevCandidate: [(),(),...,()] # list of tuples
    k: length of next Candidate
    Output
      nextCandidate:[(),(),...,()] # list of tuples
     '''
    
    assert np.all(np.array([len(x) for x in prevCandidate])== k-1)
    assert k >1
    items = list(np.unique(np.array(prevCandidate).flatten())) 
    tmp_candidates = [x for x in combinations(items, k)]
    if k ==2:
        print("k=2, all combinations are output without filtering")
        return np.array(tmp_candidates)
        
    candidates = [
        candidate for candidate in tmp_candidates
        if all(
            x in prevCandidate
            for x in combinations(candidate, k - 1))
    ]
       
    return np.array(candidates)

In [None]:
C2 = getCkPlusOne(F1, 2)

In [None]:
C2

In [None]:
getFkPlusOne(store_array, C2, minsup=0.4)

In [None]:
def isEmpty(F):
    if len(F) < 1:
        return True
    else:
        return False

## extraction of Frequent Items

- k =1
- データベースを数え上げて$F_1$を生成
  - ループ先頭
  - $F_k$から$C_{k+1}$を生成
  - $C_{k+1}$中の集合が実際に頻出かどうかを、データベースを数え上げて $F_{k+1}$を生成
  - $F_{k+1}$が空ならばループを終了
  - $k= k+1;$ ループ先頭に戻る
- 出力: $F1, F2, ..., F_{k+1}$

In [None]:
db = store_array
minsum = 0.4
k = 1
F_list = []
F_now = getF1(store_array, minsum)
F_list.append(F_now)
while(True):
    print(k)
    print("F_now")
    print(F_now)
    print("calc C_k+1: k is set to %d" %  (k+1))
    C_next = getCkPlusOne(F_now, k+1)
    print("C_next")
    print(C_next)
    F_next = getFkPlusOne(store_array, C_next, minsum)
    if isEmpty(F_next):
        break
    k += 1
    F_now = F_next
    F_list.append(F_now)
    
del k

In [None]:
F_list

## 相関ルールの抽出

In [None]:
conf_list = []
minconf = 0.7

for F in F_list:
    k = len(F[0])
    if k == 1:
        pass
    elif k == 2:
        conf_list_inner = []
        for f_2 in F:
            A = f_2[0]
            B = f_2[1]
            conf_AB = confidence(store_array, A, B)
            if conf_AB >= minconf:
                conf_list_inner.append((np.array(A),np.array(B)))
            conf_BA = confidence(store_array, B, A)
            if conf_BA >= minconf:
                conf_list_inner.append((np.array(B),np.array(A)))
        conf_list.append(conf_list_inner)
    
    elif k >= 3:
        conf_list_inner = []
        for f_k in F:
            set_all = set(f_k)
            j= 1 
            array_antecedent = np.array(list(combinations(f_k, k-1 )))
            array_consequent = np.array([tuple(set_all - set(c))  for c in array_antecedent])
            conf = np.array([confidence(store_array, ant, con) for ant, con in zip(array_antecedent, array_consequent)])
            
            isHigher = conf > minconf
            array_antecedent_filtered_by_conf = array_antecedent[isHigher]
            array_consequent_filtered_by_conf = array_consequent[isHigher]
            
            conf_list_inner.append([(a,c) for a,c in zip(array_antecedent_filtered_by_conf, array_consequent_filtered_by_conf)])
            
            while(False):
                list_antecedent_new = list(combinations(f_k, k-(j+1) ))
                # filter antecedent by previous conf
                _res = [np.all([set(i) <= set(s) for s in list_antecedent_filtered_by_conf]) for i in list_antecedent_new]
                list_consequent_filtered_by_prev = list_antecedent_new[_res]
                conf = [confidence(store_array, ant, con) for ant, con in zip(set_antecedent, set_consequent)]
                conf_applied = [(list_antecedent[i], list_consequent[i]) for i,val in enumerate(conf) if val > minconf]
                #conf_list_inner.append(conf_applied)
                
                break
                
            
            

In [None]:
conf_list

In [None]:
conf_list_inner

In [None]:
tuple((array_antecedent_filtered_by_conf[0], array_consequent_filtered_by_conf[1]))

In [None]:
[(a,c) for a,c in zip(array_antecedent_filtered_by_conf, array_consequent_filtered_by_conf)]

In [None]:
list_antecedent

In [None]:
list_antecedent_filtered

In [None]:
list_consequent

In [None]:
list_consequent_filtered

In [None]:
[np.all([set(i) <= s for s in  [{1,2},{2,3}]]) for i in list_antecedent]

In [None]:
set(np.array((1,2,))) <= {1,2,3}

In [None]:
a = np.array((1,2,3),(2,3,4)); b = np.array((1,2,3))
np.in1d(a,b)

In [None]:
np.array((1,2,3))