In [128]:
from concepts import Context, Definition
import pandas as pd
import sys
import itertools as itt

def transform_nominal(dataframe, col_name, col_values):
    for value in col_values:
        dataframe[col_name + '_' + value] = dataframe[col_name].apply(lambda x: 'X' if x == value else None)
    del dataframe[col_name]
    
def transform_counternominal(dataframe, col_name, col_values):
    for value in col_values:
        dataframe[col_name + '_' + value] = dataframe[col_name].apply(lambda x: None if x == value else 'X')
    del dataframe[col_name]
    
def transform_ordinal(dataframe, col_name, col_values):
    dataframe[col_name] = dataframe[col_name].apply(lambda x: col_values.index(x))
    for i in xrange(len(col_values)):
        dataframe[col_name + '_<=' + col_values[i]] = dataframe[col_name].apply(lambda x: 'X' if x <= i else None)
    del dataframe[col_name]
    
def transform_interval(dataframe, col_name, col_values):
    dataframe[col_name] = dataframe[col_name].apply(lambda x: col_values.index(x))
    for i in xrange(len(col_values)):
        dataframe[col_name + '_<=' + col_values[i]] = dataframe[col_name].apply(lambda x: 'X' if x <= i else None)
    for i in xrange(len(col_values)):
        dataframe[col_name + '_>=' + col_values[i]] = dataframe[col_name].apply(lambda x: 'X' if x >= i else None)
    del dataframe[col_name]
    
def transform_column(dataframe, col_name, col_type):
    if (col_type[0] == 'Nominal'):
        transform_nominal(dataframe, col_name, col_type[1].split(';'))
    elif (col_type[0] == 'Counternominal'):
        transform_counternominal(dataframe, col_name, col_type[1].split(';'))
    elif (col_type[0] == 'Ordinal'):
        transform_ordinal(dataframe, col_name, col_type[1].split(';'))
    elif (col_type[0] == 'Interval'):
        transform_interval(dataframe, col_name, col_type[1].split(';'))

def transform_columns(dataframe, col_defs):
    dataframe.columns = ([''] + list(dataframe.columns[1:]))
    for i, col in enumerate(dataframe.columns):
        if (i == 0):
            continue
        transform_column(dataframe, col, col_defs[col])

def build_iceberg_lattice(filename, lattice, threshold):
    irreducable = []
    for i, (intent, extent) in enumerate(lattice):
        coverage = list(intent)
        if (len(intent) < threshold):
            continue
        is_irreducable = True
        for j, (intent1, extent1) in enumerate(lattice):
            if (j == i or len(intent1) < threshold or len(intent) <= len(intent1)):
                continue
            is_subset = True
            for obj in intent1:
                if (not(obj in intent)):
                    is_subset = False
                    break
            if is_subset:
                for obj in intent1:
                    if obj in coverage:
                        coverage.remove(obj)
                if (len(coverage) == 0):
                    is_irreducable = False
                    break
        if is_irreducable:
            irreducable.append((intent, extent))
            #print intent, extent
            #print '\n'
    df = Definition()
    for intent, extent in irreducable:
        obj_name = ';'.join(intent)
        df.add_object(obj_name, list(extent))
    conc = Context(*df)
    conc.tofile(filename='iceberg.' + filename, frmat='csv')
    
def print_lattice(cont):
    for extent, intent in cont.lattice:
        print('%r %r' % (extent, intent))
        
def is_sublist(parent, child):
    for el in child:
        if not(el in parent):
            return False
    return True
        
def find_implication_basis(cont):
    pseudointents = []
    objs = list(cont.objects)
    j = 1
    for i in xrange(1, len(objs)):
        for subset in itt.combinations(objs, i):
            #Checking first pseudointent condition
            subset = list(subset)
            #print str(j) + " " + str(subset)
            j += 1
            intension = list(cont.intension(subset))
            extension = list(cont.extension(intension))
            if (len(subset) >= len(extension)):
                #print "Rejected first: " + str((subset, extension))
                continue
                
            #checking second pseudointent condition
            second_passed = True
            for conf_intent, conf_extent in pseudointents:
                if (is_sublist(subset, conf_intent) and (not is_sublist(subset, conf_extent))):
                    #print "Rejected second: " + str((subset, extension))
                    second_passed = False
            if second_passed:
                pseudointents.append((subset, extension))
    for t in pseudointents:
        print t
            

In [129]:
filename = 'lect.csv'
iceberg_threshold = 1
draw_iceberg = True
cols_to_use= []

In [130]:
dataframe = pd.read_csv(filename)
if (len(cols_to_use) > 0):
    dataframe = dataframe[[dataframe.columns[0]] + cols_to_use]
col_info = pd.read_csv('cols.' + filename)
transform_columns(dataframe, col_info)
dataframe.to_csv('transformed.' + filename, index_label=False, index=False)

context = Context.fromfile('transformed.' + filename, frmat='csv')
lattice_str = str(context.lattice.graphviz())
f = open('lattice.dot', 'w')
f.write(lattice_str)
f.close()
#context.lattice.graphviz(view=True)

find_implication_basis(context)

#build_iceberg_lattice(filename, context.lattice, iceberg_threshold)
#iceberg_context = Context.fromfile(filename='iceberg.' + filename, frmat='csv')
#iceberg_context.lattice.graphviz(view=True)

#lattice_str = str(iceberg_context.lattice.graphviz())
#f = open('iceberg.dot', 'w')
#f.write(lattice_str)
#f.close()

(['e'], ['d', 'e'])
(['a', 'd'], ['a', 'b', 'c', 'd', 'e'])
(['b', 'c'], ['b', 'c', 'd'])
(['b', 'd'], ['b', 'c', 'd'])
(['c', 'd'], ['b', 'c', 'd'])
(['b', 'c', 'd', 'e'], ['a', 'b', 'c', 'd', 'e'])


In [96]:
print list(itt.combinations([1,2,3], 3))

[(1, 2, 3)]


In [56]:
print context.objects
abc = list(context.intension(['b', 'c']))
print abc
print list(context.extension(abc))

('a', 'b', 'c', 'd', 'e')
['f2']
['b', 'c', 'd']


In [97]:
print is_sublist(['a', 'b', 'c'],['b', 'c'])
print is_sublist(['a', 'b', 'c'],['b', 'c', 'd'])
print is_sublist(['a', 'b', 'c'],['b', 'c']) and not(is_sublist(['a', 'b', 'c'],['b', 'c', 'd']))

True
False
True
