In [1]:
import sys
import os

from itertools import chain, combinations
from collections import defaultdict
from optparse import OptionParser

def subsets(arr):
    """ Returns non empty subsets of arr"""
    return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])

def returnItemsWithMinSupport(itemSet, transactionList, minSupport, freqSet):
        """calculates the support for items in the itemSet and returns a subset
       of the itemSet each of whose elements satisfies the minimum support"""
        _itemSet = set()
        localSet = defaultdict(int)

        for item in itemSet:
                for transaction in transactionList:
                        if item.issubset(transaction):
                                freqSet[item] += 1
                                localSet[item] += 1

        for item, count in localSet.items():
                support = float(count)/len(transactionList)

                if support >= minSupport:
                        _itemSet.add(item)

        return _itemSet

def joinSet(itemSet, length):
        """Join a set with itself and returns the n-element itemsets"""
        return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])

def getItemSetTransactionList(data_iterator):
    transactionList = list()
    itemSet = set()
    for record in data_iterator:
        transaction = frozenset(record)
        transactionList.append(transaction)
        for item in transaction:
            itemSet.add(frozenset([item]))              # Generate 1-itemSets
    return itemSet, transactionList

def runApriori(data_iter, minSupport, minConfidence):
    """
    run the apriori algorithm. data_iter is a record iterator
    Return both:
     - items (tuple, support)
     - rules ((pretuple, posttuple), confidence)
    """
    itemSet, transactionList = getItemSetTransactionList(data_iter)

    freqSet = defaultdict(int)
    largeSet = dict()
    # Global dictionary which stores (key=n-itemSets,value=support)
    # which satisfy minSupport

    assocRules = dict()
    # Dictionary which stores Association Rules

    oneCSet = returnItemsWithMinSupport(itemSet,
                                        transactionList,
                                        minSupport,
                                        freqSet)
    currentLSet = oneCSet
    k = 2
    while(currentLSet != set([])):
        largeSet[k-1] = currentLSet
        currentLSet = joinSet(currentLSet, k)
        currentCSet = returnItemsWithMinSupport(currentLSet,
                                                transactionList,
                                                minSupport,
                                                freqSet)
        currentLSet = currentCSet
        k = k + 1
        
        def getSupport(item):
            """local function which Returns the support of an item"""
            return float(freqSet[item])/len(transactionList)

    toRetItems = []
    for key, value in largeSet.items():
        toRetItems.extend([(tuple(item), getSupport(item))
                           for item in value])

    toRetRules = []
    for key, value in largeSet.items()[1:]:
        for item in value:
            _subsets = map(frozenset, [x for x in subsets(item)])
            for element in _subsets:
                remain = item.difference(element)
                if len(remain) > 0:
                    confidence = getSupport(item)/getSupport(element)
                    if confidence >= minConfidence:
                        toRetRules.append(((tuple(element), tuple(remain)),
                                           confidence))
    return toRetItems, toRetRules



def printResults(items, rules):
    
    """prints the generated itemsets sorted by support and the confidence rules sorted by confidence"""
    for item, support in sorted(items, key=lambda support:support[1]):#lambda (item, support): support):  #lambda support:support[1]):
        print ("item: %s , %.3f" % (str(item), support))
    print ("\n------------------------ RULES:")
    for rule, confidence in sorted(rules, key=lambda  confidence: confidence[1]):
        pre, post = rule
        print ("Rule: %s ==> %s , %.3f" % (str(pre), str(post), confidence))
        
def dataFromFile(fname):
        """Function which reads from the file and yields a generator"""
        
        file_iter = open(fname, 'rU')
        for line in file_iter:
                line = line.strip().rstrip(',')                         # Remove trailing comma
                record = frozenset(line.split(','))
                yield record
                print(record)

if __name__ == "__main__":
    optparser = OptionParser()
    optparser.add_option('-f', '--inputFile',
                         dest='input',
                         help='filename containing csv',
                         default=None)
    optparser.add_option('-s', '--minSupport',
                         dest='minS',
                         help='minimum support value',
                         default=0.15,
                         type='float')
    optparser.add_option('-c', '--minConfidence',
                         dest='minC',
                         help='minimum confidence value',
                         default=0.6,
                         type='float')

    (options, args) = optparser.parse_args()

    file_location = os.path.join(os.path.expanduser('./dataset'),'INTEGRATED-DATASET.csv')
    inFile = dataFromFile(file_location)
    minSupport = options.minS
    minConfidence = options.minC

    items, rules = runApriori(inFile, minSupport, minConfidence)

    printResults(items, rules)
    
    

# optparser = OptionParser()
#    optparser.add_option('-f', '--inputFile',
#                         dest='input',
#                         help='filename containing csv',
#                         default=None)
#    optparser.add_option('-s', '--minSupport',
#                         dest='minS',
#                         help='minimum support value',
#                         default=0.15,
#                         type='float')
#    optparser.add_option('-c', '--minConfidence',
#                         dest='minC',
#                         help='minimum confidence value',
#                         default=0.6,
#                         type='float')
#
#    (options, args) = optparser.parse_args()
#
#    inFile = None
#    if options.input is None:
#            inFile = sys.stdin
#    elif options.input is not None:
#   file_location = os.path.join(os.path.expanduser('~'),'Documents','Apriori-master','data2.csv')
#            inFile = dataFromFile(options.input)
#    else:
#            print ('No dataset filename specified, system with exit\n')
#            sys.exit('System will exit')
#
#        minSupport = options.minS
#    minConfidence = options.minC
#
#    items, rules = runApriori(inFile, minSupport, minConfidence)

#    printResults(items, rules)

    



frozenset({'11204', 'Brooklyn', 'LBE'})
frozenset({'MBE', '11411', 'Cambria Heights', 'BLACK', 'WBE'})
frozenset({'MBE', 'Yorktown Heights', '10598', 'BLACK'})
frozenset({'MBE', 'Long Beach', 'BLACK', '11561'})
frozenset({'MBE', 'Brooklyn', 'ASIAN', '11235'})
frozenset({'10010', 'MBE', 'ASIAN', 'New York', 'WBE'})
frozenset({'10026', 'MBE', 'ASIAN', 'New York'})
frozenset({'10026', 'MBE', 'New York', 'BLACK'})
frozenset({'MBE', '10034', 'HISPANIC', 'New York'})
frozenset({'MBE', 'Staten Island', '10303', 'BLACK', 'WBE'})
frozenset({'MBE', '10018', 'ASIAN', 'New York'})
frozenset({'MBE', '10034', 'HISPANIC', 'New York', 'WBE'})
frozenset({'10013', 'MBE', 'ASIAN', 'New York', 'WBE'})
frozenset({'MBE', 'Jamaica', 'BLACK', '11434'})
frozenset({'10022', 'New York', 'NON-MINORITY', 'WBE'})
frozenset({'MBE', 'Staten Island', '10304', 'BLACK'})
frozenset({'MBE', '10454', 'Bronx', 'BLACK'})
frozenset({'10801', 'NON-MINORITY', 'New Rochelle', 'WBE'})
frozenset({'NON-MINORITY', 'Staten Island', '

TypeError: 'dict_items' object is not subscriptable