In this file we transform data into a case base and run an experiment using AF-CBA, as introduced in H. Prakken, A top-level model of case-based argumentation for explanation, which we call AF-CBA. In Proceedings of the ECAI 2020 Workshop on Dialogue, Explanation and Argumentation for Human-Agent Interaction (DEXA HAI 2020), to appear. URL: http://www.staff.science.uu.nl/~prakk101/papers/xarg20ws.pdf

In [1]:
import import_ipynb
import pandas as pd

# the data sets are prepared in another file
import Data_preparation as d

# the objects Case, Comparison, Feature and CB are defined in another file
from Classes import Case, Comparison, Feature, CB

importing Jupyter notebook from Data_preparation.ipynb
importing Jupyter notebook from Classes.ipynb


In [2]:
# Receives a dataset, name of the outcome variables, list of binary features and 
# a list of feature correlations with the outcome variable,
# Returns a case base
def data_to_cb(data, y_name, binary, cor):
    cor_dict = dict(zip(data.columns, cor))
    
    features = []
    
    for col in data.columns:
        if col != y_name:
            
            # Tendencies are established based on the correlation with the outcome
            if col in binary:
                # Special treatment of factors, leaving value 0 undefined
                if cor_dict[col] > 0:
                    features.append(Feature(col, {0 : None, 1 : 1}, True))
                else:
                    features.append(Feature(col, {0 : None, 1 : 0}, True))

            else:
                if cor_dict[col] > 0:
                    features.append(Feature(col, 1, False))
                else:
                    features.append(Feature(col, 0, False))
                    
    cb = []
    
    for i, row in data.iterrows():
        outcome = row[y_name]
        
        f_s = {}
        
        for f in features:
            f_s[f] = row[f.name]
            
        cb.append(Case(i, f_s, outcome))
        
    return(CB(cb))
                
            

In [3]:
# Models to which the predictions can be compared 
# (Comparisons are not included)
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier

def log_regression(test, train, y_name):
    X_test = test.drop([y_name], axis=1)
    y_test = test[y_name].astype('int')
    X_train = train.drop([y_name], axis=1)
    y_train = train[y_name].astype('int')
    
    model = LogisticRegression(solver = 'lbfgs')
    model.fit(X_train, y_train)
    
    prediction_test = model.predict(X_test)

    X_test['prediction'] = prediction_test
    
    return X_test


def ada_predictions(test, train, y_name):
    X_train = train.drop([y_name], axis=1)
    y_train = train[y_name].astype('int')
    X_test = test.drop([y_name], axis = 1)
    y_test = test[y_name].astype('int')
    
    model = AdaBoostClassifier()
    model.fit(X_train, y_train)
    
    prediction_test = model.predict(X_test)

    X_test['prediction'] = prediction_test
    
    return X_test

  from numpy.core.umath_tests import inner1d


In [4]:
# Given a case base, returns a dataframe with results about the argument games tested on 20% of the case base
def experiment(cb):

    df = pd.DataFrame()
    
    test, train = cb.split(0.2)
    
    for focus in test.cases:
        best_precedents = focus.find_precedents(train)

        n_forced = 0
        for c in best_precedents:
            if not c.differences:
                n_forced += 1
                
        # make a copy of the case which has the opposite outcome
        switch_focus = Case(focus.name, focus.fs, focus.opposite_outcome)
        s_best_precedents = switch_focus.find_precedents(train)
        

        s_n_forced = 0
        for c in s_best_precedents:
            if not c.differences:
                s_n_forced += 1
                
                
        result = {'focus_name' : focus.name, 'focus_outcome' : focus.outcome, 'n_pre' : len(best_precedents), 'n_forced' : n_forced,
                 'forced' : n_forced > 0, 's_outcome' : switch_focus.outcome, 's_n_pre' : len(s_best_precedents), 's_n_forced' : s_n_forced,
                 's_forced' : s_n_forced > 0}
        df = df.append(result, ignore_index = True)
        
    return df


In [5]:
# Example of how precedents and focus cases can be compared
# NewPro, MissingCon and Better are no terms defined by Prakken, but are the differences
# that could be used for substitution, cancellation and compensation


def example_comparison(cb):
    # Given a case base prints the comparison for the first three focus cases with a precedent
    
    test, train = cb.split(0.2)
    
    for focus in test.cases[0:3]:
        best_precedents = focus.find_precedents(train)

        precedent = best_precedents[0]
        precedent.print_com()
                

In [6]:
# Run the experiment on the different data sets

def run_admission():
    data, y_name, binary, cor = d.get_admission()
    ad_cb = data_to_cb(data, y_name, binary, cor)
    admission_df = experiment(ad_cb)
    
    example_comparison(ad_cb)
    
    return admission_df

    
def run_churn():
    data, y_name, binary, cor = d.get_churn()
    data = data.iloc[0:100,:]
    churn_cb = data_to_cb(data, y_name, binary, cor)
    churn_df = experiment(churn_cb)
    
    example_comparison(churn_cb)
    
    return churn_df

def run_mushroom():
    data, y_name, binary, cor = d.get_mushroom()
    data = data.iloc[0:100,:]
    mush_cb = data_to_cb(data, y_name, binary, cor)
    mush_df = experiment(mush_cb)
    
    example_comparison(mush_cb)
    
    return mush_df
