In [1]:
import sys
import os.path

from itertools import chain, combinations
from collections import defaultdict
from optparse import OptionParser


In [2]:
def subsets(arr):
    """ Returns non empty subsets of arr"""
    return chain(*[combinations(arr, i + 1) for i, a in enumerate(arr)])

In [3]:
def returnItemsWithMinSupport(itemSet, transactionList, minSupport, freqSet):
        """calculates the support for items in the itemSet and returns a subset
       of the itemSet each of whose elements satisfies the minimum support"""
        _itemSet = set()
        localSet = defaultdict(int)

        for item in itemSet:
                for transaction in transactionList:
                        if item.issubset(transaction):
                                freqSet[item] += 1
                                localSet[item] += 1

        for item, count in localSet.items():
                support = float(count)/len(transactionList)

                if support >= minSupport:
                        _itemSet.add(item)

        return _itemSet

In [4]:
def joinSet(itemSet, length):
        """Join a set with itself and returns the n-element itemsets"""
        return set([i.union(j) for i in itemSet for j in itemSet if len(i.union(j)) == length])


In [5]:
def getItemSetTransactionList(data_iterator):
    transactionList = list()
    itemSet = set()
    for record in data_iterator:
        transaction = frozenset(record)
        transactionList.append(transaction)
        for item in transaction:
            itemSet.add(frozenset([item]))              # Generate 1-itemSets
    return itemSet, transactionList


In [44]:
def runApriori(data_iter, minSupport, minConfidence):
    """
    run the apriori algorithm. data_iter is a record iterator
    Return both:
     - items (tuple, support)
     - rules ((pretuple, posttuple), confidence)
    """
    itemSet, transactionList = getItemSetTransactionList(data_iter)

    freqSet = defaultdict(int)
    largeSet = dict()
    # Global dictionary which stores (key=n-itemSets,value=support)
    # which satisfy minSupport

    assocRules = dict()
    # Dictionary which stores Association Rules

    oneCSet = returnItemsWithMinSupport(itemSet,
                                        transactionList,
                                        minSupport,
                                        freqSet)

    currentLSet = oneCSet
    k = 2
    while(currentLSet != set([])):
        largeSet[k-1] = currentLSet
        currentLSet = joinSet(currentLSet, k)
        currentCSet = returnItemsWithMinSupport(currentLSet, transactionList, minSupport, freqSet)
        currentLSet = currentCSet
        k = k + 1

In [76]:
 def getSupport(item):
            """local function which Returns the support of an item"""
            return float(freqSet[item])/len(transactionList)

    toRetItems = []
    for key, value in largeSet.items():
        toRetItems.extend([(tuple(item), getSupport(item))
                        for item in value])

    toRetRules = []
    for key, value in largeSet.items()[1:]:
        for item in value:
            _subsets = map(frozenset, [x for x in subsets(item)])
            for element in _subsets:
                remain = item.difference(element)
                if len(remain) > 0:
                    confidence = getSupport(item)/getSupport(element)
                    if confidence >= minConfidence:
                        toRetRules.append(((tuple(element), tuple(remain)),confidence))
    return toRetItems, toRetRules


IndentationError: unindent does not match any outer indentation level (<tokenize>, line 5)

In [13]:
def printResults(items, rules):
    """prints the generated itemsets sorted by support and the confidence rules sorted by confidence"""
    for item, support in sorted(items, key=lambda (item, support): support):
        print "item: %s , %.3f" % (str(item), support)
    print "\n------------------------ RULES:"
    for rule, confidence in sorted(rules, key=lambda (rule, confidence): confidence):
        pre, post = rule
        print "Rule: %s ==> %s , %.3f" % (str(pre), str(post), confidence)

SyntaxError: invalid syntax (3703769297.py, line 3)

In [74]:
pip install threatspec python-magic-bin 

Collecting threatspec
  Downloading threatspec-0.5.0.tar.gz (1.6 MB)
Collecting python-magic-bin
  Downloading python_magic_bin-0.4.14-py2.py3-none-win_amd64.whl (409 kB)
Collecting graphviz
  Downloading graphviz-0.20-py3-none-any.whl (46 kB)
Collecting comment_parser
  Downloading comment_parser-1.2.4.tar.gz (8.3 kB)
Collecting python-magic==0.4.24
  Downloading python_magic-0.4.24-py2.py3-none-any.whl (12 kB)
Building wheels for collected packages: threatspec, comment-parser
  Building wheel for threatspec (setup.py): started
  Building wheel for threatspec (setup.py): finished with status 'done'
  Created wheel for threatspec: filename=threatspec-0.5.0-py3-none-any.whl size=22393 sha256=56a281b1f382ceb001973a064cbfec86e88679e9e831891b71ca145d6e4917bf
  Stored in directory: c:\users\aku\appdata\local\pip\cache\wheels\3c\4e\e7\0f39aedb8f7c5a675714bc3cc3fe1d06800185e46b18165b06
  Building wheel for comment-parser (setup.py): started
  Building wheel for comment-parser (setup.py): fini