In [73]:
from DataProcessing import DataProcessing
from Car import Car
from Cba import Cba
from Validation import Validation
import time
MIN_SUPPORT = 0.1
MIN_CONFIDENCE = 0.5
MIN_LIFT = 1.4

In [74]:
def compute_runtime(start_time, end_time):
    return end_time - start_time

## 原版 CBA


In [75]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  1.338712453842163
Macro F1 Score: 0.35839441136051303
Accuracy: 0.6676300578034682


## WCBA 隨機森林


In [76]:
data_procesing.get_feature_importances()
feature_importances = data_procesing.feature_importances
start_time = time.time()
car = Car(data=train_data, min_support=MIN_SUPPORT,
          min_confidence=MIN_CONFIDENCE, weights=feature_importances)
car.generate_frequent()
car.sort_rule(3, True)

cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'group')

Runtime:  0.9820315837860107
Macro F1 Score: 0.3654265873015873
Accuracy: 0.684971098265896


## APR 演算法


In [77]:
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.apr_cover()
rules = cba.strong_rules
spare_rules = cba.spare_rules
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'group', spare_rules)

Runtime:  0.9430956840515137
Macro F1 Score: 0.18198529411764705
Accuracy: 0.5722543352601156


## CBA 加上 LIFT(排序使用 confidence+support+lift 做 sort)


In [78]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE, MIN_LIFT)
car.generate_frequent()
car.sort_rule(2)
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.21889042854309082
Macro F1 Score: 0.29084892743039703
Accuracy: 0.5144508670520231


## CBA 加上隨機森林並拿掉重要性低的屬性


In [79]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data_procesing.delete_low_importance()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.7103350162506104
Macro F1 Score: 0.3758941344778255
Accuracy: 0.6763005780346821


## CBA 加上隨機森林並拿掉重要性低的屬性加上 lift


In [80]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data_procesing.delete_low_importance()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE, MIN_LIFT)
car.generate_frequent()
car.sort_rule(2)
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.22856497764587402
Macro F1 Score: 0.29084892743039703
Accuracy: 0.5144508670520231


## CBA 加上 APR 的剪枝方法


In [85]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.apr_cover()
rules = cba.strong_rules
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.9344675540924072
Macro F1 Score: 0.18198529411764705
Accuracy: 0.5722543352601156


## CBA 加上隨機森林並拿掉重要性低的屬性加上 lift 加上 APR 剪枝方法


In [86]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data_procesing.delete_low_importance()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE, MIN_LIFT)
car.generate_frequent()
car.sort_rule(2)
cba = Cba(train_data, car.rule)
cba.apr_cover()
rules = cba.strong_rules
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.03382706642150879
Macro F1 Score: 0.18198529411764705
Accuracy: 0.5722543352601156


In [106]:
from apyori import apriori
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
train_data, test_data = data_procesing.split_train_test_data()

print("apyori")
start_time = time.time()
association_rules = apriori(train_data, min_support=MIN_SUPPORT,
                            min_confidence=MIN_CONFIDENCE, min_lift=MIN_LIFT)
association_results = list(association_rules)
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))

apyori


TypeError: 'int' object is not iterable