In [2]:
import pandas as pd
import numpy as np
import seaborn as sns

dt = sns.load_dataset('titanic')
dt = dt[["class", "sex", "age", "alive"]]
dt = dt.dropna(axis=0)
dt.head()

Unnamed: 0,class,sex,age,alive
0,Third,male,22.0,no
1,First,female,38.0,yes
2,Third,female,26.0,yes
3,First,female,35.0,yes
4,Third,male,35.0,no


In [3]:
# 나이 범주화
child_idx = dt.age < 20
adult_idx = (dt.age >= 20) & (dt.age < 60)
old_idx = dt.age >= 60

dt.loc[child_idx, "age"] = "child"
dt.loc[adult_idx, "age"] = "adult"
dt.loc[old_idx, "age"] = "old"

In [5]:
dt.head(10)

Unnamed: 0,class,sex,age,alive
0,Third,male,adult,no
1,First,female,adult,yes
2,Third,female,adult,yes
3,First,female,adult,yes
4,Third,male,adult,no
6,First,male,adult,no
7,Third,male,child,no
8,Third,female,adult,yes
9,Second,female,child,yes
10,Third,female,child,yes


In [None]:
#pip install apyori

In [6]:
from apyori import apriori

records = []
for i in range(len(dt)):
    records.append([str(dt.values[i,j]) for j in range(0, len(dt.columns))])
    
records

[['Third', 'male', 'adult', 'no'],
 ['First', 'female', 'adult', 'yes'],
 ['Third', 'female', 'adult', 'yes'],
 ['First', 'female', 'adult', 'yes'],
 ['Third', 'male', 'adult', 'no'],
 ['First', 'male', 'adult', 'no'],
 ['Third', 'male', 'child', 'no'],
 ['Third', 'female', 'adult', 'yes'],
 ['Second', 'female', 'child', 'yes'],
 ['Third', 'female', 'child', 'yes'],
 ['First', 'female', 'adult', 'yes'],
 ['Third', 'male', 'adult', 'no'],
 ['Third', 'male', 'adult', 'no'],
 ['Third', 'female', 'child', 'no'],
 ['Second', 'female', 'adult', 'yes'],
 ['Third', 'male', 'child', 'no'],
 ['Third', 'female', 'adult', 'no'],
 ['Second', 'male', 'adult', 'no'],
 ['Second', 'male', 'adult', 'yes'],
 ['Third', 'female', 'child', 'yes'],
 ['First', 'male', 'adult', 'yes'],
 ['Third', 'female', 'child', 'no'],
 ['Third', 'female', 'adult', 'yes'],
 ['First', 'male', 'child', 'no'],
 ['First', 'male', 'adult', 'no'],
 ['Second', 'male', 'old', 'no'],
 ['First', 'male', 'adult', 'no'],
 ['First', 'ma

In [None]:
association_rules = apriori(records, min_support=0.005, min_confidence=0.8)
association_results = list(association_rules)

association_results[1]

# RelationRecord(
# items=frozenset({'male', 'old'}), 
# support=0.03081232492997199, 
# ordered_statistics=[OrderedStatistic(items_base=frozenset({'old'}), 
# items_add=frozenset({'male'}), 
# confidence=0.8461538461538461, 
# lift=1.3336729495669895)])

In [4]:
rules = []
for results in association_results:
    supp = results.support
    for orders in results.ordered_statistics:
        if(orders.items_add in [{'yes'},{'no'}]):
            conf = orders.confidence
            lift = orders.lift
            hypo = orders.items_base
            conc = orders.items_add
            rules.append([hypo,conc,supp,conf,lift])

rules[:5]

[[frozenset({'First', 'child'}),
  frozenset({'yes'}),
  0.023809523809523808,
  0.8095238095238095,
  1.993103448275862],
 [frozenset({'First', 'female'}),
  frozenset({'yes'}),
  0.11484593837535013,
  0.9647058823529412,
  2.3751724137931034],
 [frozenset({'Second', 'female'}),
  frozenset({'yes'}),
  0.09523809523809523,
  0.9189189189189189,
  2.262441752096924],
 [frozenset({'Second', 'male'}),
  frozenset({'no'}),
  0.11764705882352941,
  0.8484848484848484,
  1.4288164665523155],
 [frozenset({'Third', 'adult'}),
  frozenset({'no'}),
  0.27170868347338933,
  0.8016528925619834,
  1.3499532200218305]]

In [5]:
labels=["hypothesis","conclusion","support","confidence","lift"]
rules_dataframe = pd.DataFrame.from_records(rules, columns=labels)

rules_dataframe_sort = rules_dataframe.sort_values(["lift"], ascending=[False])
rules_dataframe_sort = rules_dataframe_sort.reset_index(drop = True)
rules_dataframe_sort

Unnamed: 0,hypothesis,conclusion,support,confidence,lift
0,"(old, female)",(yes),0.005602,1.0,2.462069
1,"(child, Second, female)",(yes),0.022409,1.0,2.462069
2,"(adult, First, female)",(yes),0.092437,0.970588,2.389655
3,"(First, female)",(yes),0.114846,0.964706,2.375172
4,"(child, First, female)",(yes),0.018207,0.928571,2.286207
5,"(Second, female)",(yes),0.095238,0.918919,2.262442
6,"(adult, Second, female)",(yes),0.072829,0.896552,2.207372
7,"(child, First)",(yes),0.02381,0.809524,1.993103
8,"(male, Third, old)",(no),0.005602,1.0,1.683962
9,"(male, adult, Second)",(no),0.10084,0.947368,1.595333
