## Association Rule Learning

### Import libraries

In [1]:
from apyori import apriori

# import numpy as np
import pandas as pd

### Load CSV

In [2]:
dataset = pd.read_csv('titanic.csv')
dataset.head()

rowCount = dataset.shape[0]
colCount = dataset.shape[1]

### Add CSV values to list and add column tags

In [3]:
records = []

for row in range(0,rowCount):
    records.append([str(dataset.values[row,col]) for col in range(0,colCount)])
    
def add_tags(tuple_list=records):
    for row in tuple_list:
        row[0] = "Class={}".format(row[0])
        row[1] = "Sex={}".format(row[1])
        row[2] = "Age={}".format(row[2])
        row[3] = "Survived={}".format(row[3])
        
add_tags()

### Run apriori algorithm

In [4]:
associationRules = apriori(records, min_support=0.00045, min_confidence=0.02, min_lift=1.4, max_length=6)
associationRules = list(associationRules)

### Display association rules

In [5]:
def print_rules(aprioriRules):
    for item in aprioriRules:
        for ruleNum in range(0,len(item[2])):
            a = item[2][ruleNum][0]
            b = item[2][ruleNum][1]
            outputString = '''Rule: {} -> {}
Support: {}
Confidence: {}
Lift: {}
======================================\n'''.format(str(list(a)),str(list(b)),str(item[1]),\
                                             str(item[2][ruleNum][2]),str(item[2][ruleNum][3]))
            yield(outputString)

#### Print association rules

In [6]:
for result in print_rules(associationRules):
    print(result)

Rule: ['Age=Child'] -> ['Class=2nd']
Support: 0.010904134484325307
Confidence: 0.22018348623853212
Lift: 1.7004345726702075

Rule: ['Class=2nd'] -> ['Age=Child']
Support: 0.010904134484325307
Confidence: 0.08421052631578947
Lift: 1.7004345726702077

Rule: ['Age=Child'] -> ['Class=3rd']
Support: 0.03589277601090413
Confidence: 0.7247706422018348
Lift: 2.2595186734932557

Rule: ['Class=3rd'] -> ['Age=Child']
Support: 0.03589277601090413
Confidence: 0.11189801699716714
Lift: 2.2595186734932557

Rule: ['Age=Child'] -> ['Sex=Female']
Support: 0.02044525215810995
Confidence: 0.41284403669724773
Lift: 1.9333398399375368

Rule: ['Sex=Female'] -> ['Age=Child']
Support: 0.02044525215810995
Confidence: 0.09574468085106383
Lift: 1.9333398399375368

Rule: ['Age=Child'] -> ['Survived=Yes']
Support: 0.025897319400272602
Confidence: 0.5229357798165137
Lift: 1.6188208880114583

Rule: ['Survived=Yes'] -> ['Age=Child']
Support: 0.025897319400272602
Confidence: 0.08016877637130802
Lift: 1.6188208880114583

### Prune rules by consequents

In [7]:
def prune_consequence(aprioriRules, support_minval, confidence_minval, consequence):
    for item in aprioriRules:
        for ruleNum in range(0,len(item[2])):
            a = item[2][ruleNum][0]
            b = item[2][ruleNum][1]
            support = str(item[1])
            confidence = str(item[2][ruleNum][2])
            lift = str(item[2][ruleNum][3])
            if consequence in str(list(b)) and float(support) > support_minval and float(confidence) > confidence_minval:
                
                outputString = '''Rule: {} -> {}
Support: {}
Confidence: {}
Lift: {}
======================================\n'''.format(str(list(a)),str(list(b)),support,\
                                             confidence ,lift)
                yield(outputString)

#### Print rules pruned by consequence

In [8]:
for val in prune_consequence(associationRules, 0.0045, 0.002, "Sex"):
    print(val)

Rule: ['Age=Child'] -> ['Sex=Female']
Support: 0.02044525215810995
Confidence: 0.41284403669724773
Lift: 1.9333398399375368

Rule: ['Class=1st'] -> ['Sex=Female']
Support: 0.06587914584279873
Confidence: 0.4461538461538462
Lift: 2.089328968903437

Rule: ['Class=2nd'] -> ['Sex=Female']
Support: 0.0481599273057701
Confidence: 0.37192982456140344
Lift: 1.7417394550205298

Rule: ['Class=3rd'] -> ['Sex=Female']
Support: 0.08905043162199
Confidence: 0.2776203966005666
Lift: 1.3000904104635045

Rule: ['Class=Crew'] -> ['Sex=Male']
Support: 0.39164016356201725
Confidence: 0.9740112994350282
Lift: 1.2384742172481207

Rule: ['Survived=Yes'] -> ['Sex=Female']
Support: 0.1562925942753294
Confidence: 0.48382559774964845
Lift: 2.2657449800999494

Rule: ['Survived=No'] -> ['Sex=Male']
Support: 0.6197183098591549
Confidence: 0.9154362416107382
Lift: 1.163994897622897

Rule: ['Class=1st', 'Age=Adult'] -> ['Sex=Female']
Support: 0.06542480690595184
Confidence: 0.4514106583072101
Lift: 2.1139465083705735

### Prune redundant rules

In [9]:
def prune_redundant(aprioriRules, support_minval, confidence_minval):
    ruleList = []
    for item in aprioriRules:
        for ruleNum in range(0,len(item[2])):
            a = item[2][ruleNum][0]
            b = item[2][ruleNum][1]
            support = str(item[1])
            confidence = str(item[2][ruleNum][2])
            lift = str(item[2][ruleNum][3])
            if float(support) > support_minval and float(confidence) > confidence_minval:
                if str(list(b)) not in ruleList:
                    ruleList.append("{}".format(str(list(b))))
                                 
                    outputString = '''Rule: {} -> {}
Support: {}
Confidence: {}
Lift: {}
======================================\n'''.format(str(list(a)),str(list(b)),support,\
                                             confidence ,lift)
                    yield(outputString)

#### Print rules pruned by redundant consequences

In [10]:
for val in prune_redundant(associationRules, 0.0045, 0.2):
    print(val)

Rule: ['Age=Child'] -> ['Class=2nd']
Support: 0.010904134484325307
Confidence: 0.22018348623853212
Lift: 1.7004345726702075

Rule: ['Age=Child'] -> ['Class=3rd']
Support: 0.03589277601090413
Confidence: 0.7247706422018348
Lift: 2.2595186734932557

Rule: ['Age=Child'] -> ['Sex=Female']
Support: 0.02044525215810995
Confidence: 0.41284403669724773
Lift: 1.9333398399375368

Rule: ['Age=Child'] -> ['Survived=Yes']
Support: 0.025897319400272602
Confidence: 0.5229357798165137
Lift: 1.6188208880114583

Rule: ['Sex=Female'] -> ['Class=1st']
Support: 0.06587914584279873
Confidence: 0.3085106382978724
Lift: 2.089328968903437

Rule: ['Class=3rd'] -> ['Survived=No']
Support: 0.23989095865515675
Confidence: 0.7478753541076488
Lift: 1.104747419054319

Rule: ['Class=Crew'] -> ['Sex=Male']
Support: 0.39164016356201725
Confidence: 0.9740112994350282
Lift: 1.2384742172481207

Rule: ['Sex=Male'] -> ['Class=Crew']
Support: 0.39164016356201725
Confidence: 0.49797804737146156
Lift: 1.2384742172481207

Rule: 