In [7]:
from sklearn.metrics import (accuracy_score, f1_score, precision_score,
                             recall_score)
from sklearn.model_selection import KFold

from CBA import CBA
from CBAWL import CBAWL
from DataHandler import DataHandler

In [8]:
dataset_id = 19

cbawl = CBAWL(dataset_id, min_support=0.1, min_confidence=0.5, min_lift=1)
data = cbawl.dataHandler.loadData()
features = cbawl.dataHandler.oneHotEncoding(data)
features_importance = cbawl.dataHandler.getFeaturesImportance(data)
features = cbawl.dataHandler.delLowImportanceFeatures(data, features_importance)
X = features
y = data.iloc[:, -1]
kf = KFold(n_splits=2, shuffle=True, random_state=42)
precision = []
recall = []
f1 = []
accuracy = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    X_train_te = cbawl.dataHandler.oneHotEncoding(X_train)
    frequent_itemsets = cbawl.ruleGenerator.getFrequentItemsets(X_train_te)
    strong_rules, weak_rules, default_class = cbawl.model(frequent_itemsets, X_train)
    y_pred = cbawl.predict(strong_rules, weak_rules, default_class, X_test)

    precision.append(precision_score(y_test, y_pred, average='macro', zero_division=1))
    recall.append(recall_score(y_test, y_pred, average='macro', zero_division=1))
    f1.append(f1_score(y_test, y_pred, average='macro', zero_division=1))
    accuracy.append(accuracy_score(y_test, y_pred))

print('Precision:', sum(precision)/len(precision))
print('Recall:', sum(recall)/len(recall))
print('F1:', sum(f1)/len(f1))
print('Accuracy:', sum(accuracy)/len(accuracy))

Precision: 0.706479521873672
Recall: 0.42436461388074287
F1: 0.43104502710452386
Accuracy: 0.8252314814814814


In [9]:
dataHandler = DataHandler(dataset_id)

data = dataHandler.loadData()

data.head(5)

X = data.drop('class', axis=1)
y = data['class']

kf = KFold(n_splits=2, shuffle=True, random_state=42)

precision = []
recall = []
f1 = []
accuracy = []

for train_index, test_index in kf.split(data):
    train = data.iloc[train_index].reset_index(drop=True)
    test = data.iloc[test_index].reset_index(drop=True)
    cba = CBA(train)

    ruleitemset = cba.apriori(train, 0.1, 0.5)
    sorted_ruleitemset = sorted(ruleitemset, key=lambda x: (
        x['confidence'], x['support'], len(x['condition'])), reverse=True)

    rules, default_class = cba.prune(train, sorted_ruleitemset)

    predict_y = cba.predict(test, rules, default_class)

    test_y = test['class']

    accuracy.append(accuracy_score(test_y, predict_y))
    precision.append(precision_score(test_y, predict_y, average='macro', zero_division=1))
    recall.append(recall_score(test_y, predict_y, average='macro', zero_division=1))
    f1.append(f1_score(test_y, predict_y, average='macro', zero_division=1))

print('Precision:', sum(precision) / len(precision))
print('Recall:', sum(recall) / len(recall))
print('F1:', sum(f1) / len(f1))
print('Accuracy:', sum(accuracy) / len(accuracy))

Precision: 0.9161285614104634
Recall: 0.4774472838988968
F1: 0.44196048128740517
Accuracy: 0.8900462962962963
