In [6]:
from DataProcessing import DataProcessing
from Car import Car
from Cba import Cba
from Validation import Validation
import time
MIN_SUPPORT = 0.1
MIN_CONFIDENCE = 0.5
MIN_LIFT = 1

In [3]:
def compute_runtime(start_time, end_time):
    return end_time - start_time

## 原版 CBA


In [8]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  1.36702299118042
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00        46
           3       0.79      1.00      0.88       198
           4       0.00      0.00      0.00        39

    accuracy                           0.57       346
   macro avg       0.16      0.20      0.18       346
weighted avg       0.45      0.57      0.51       346



## WCBA 隨機森林


In [11]:
data_procesing.get_feature_importances()
feature_importances = data_procesing.feature_importances
start_time = time.time()
car = Car(data=train_data, min_support=MIN_SUPPORT,
          min_confidence=MIN_CONFIDENCE, weights=feature_importances)
car.generate_frequent()
car.sort_rule(3, True)

cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  1.1494834423065186
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00        46
           3       0.67      1.00      0.80       198
           4       0.00      0.00      0.00        39

    accuracy                           0.57       346
   macro avg       0.13      0.20      0.16       346
weighted avg       0.39      0.57      0.46       346



## APR 演算法


In [12]:
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.apr_cover()
rules = cba.strong_rules
spare_rules = cba.spare_rules
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'group', spare_rules)

Runtime:  0.5499815940856934
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           2       0.00      0.00      0.00        46
           3       0.57      1.00      0.73       198
           4       0.00      0.00      0.00        39

    accuracy                           0.57       346
   macro avg       0.14      0.25      0.18       346
weighted avg       0.33      0.57      0.42       346



## CBA 加上 LIFT(排序使用 confidence+support+lift 做 sort)


In [13]:
MIN_LIFT = 1
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()


car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE, MIN_LIFT)


car.generate_frequent()


car.sort_rule(2)


cba = Cba(train_data, car.rule)


cba.cover()


rules = cba.final_rules_


default = cba.default
end_time = time.time()


print("Runtime: ", compute_runtime(start_time, end_time))


test = Validation(test_data, rules, default, 'single')

Runtime:  0.965203046798706
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           2       0.00      0.00      0.00        46
           3       0.72      1.00      0.84       198
           4       0.46      0.85      0.59        39

    accuracy                           0.67       346
   macro avg       0.30      0.46      0.36       346
weighted avg       0.47      0.67      0.55       346



## CBA 加上隨機森林並拿掉重要性低的屬性


In [14]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data_procesing.delete_low_importance()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.8581068515777588
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           1       0.00      0.00      0.00         0
           2       0.00      0.00      0.00        46
           3       0.73      0.98      0.84       198
           4       0.00      0.00      0.00        39

    accuracy                           0.56       346
   macro avg       0.15      0.20      0.17       346
weighted avg       0.42      0.56      0.48       346



## CBA 加上隨機森林並拿掉重要性低的屬性加上 lift


In [15]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data_procesing.delete_low_importance()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE, MIN_LIFT)
car.generate_frequent()
car.sort_rule(2)
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.4817042350769043
              precision    recall  f1-score   support

           0       0.40      1.00      0.58        63
           2       0.00      0.00      0.00        46
           3       1.00      0.96      0.98       198
           4       0.00      0.00      0.00        39

    accuracy                           0.73       346
   macro avg       0.35      0.49      0.39       346
weighted avg       0.65      0.73      0.67       346



## CBA 加上 APR 的剪枝方法


In [16]:
# data_procesing = DataProcessing("car.data")
# data_procesing.read_data()
# data_procesing.process()
# data = data_procesing.encoded_data
# train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule(1)
cba = Cba(train_data, car.rule)
cba.apr_cover()
rules = cba.strong_rules
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.18986725807189941
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           2       0.00      0.00      0.00        46
           3       0.57      1.00      0.73       198
           4       0.00      0.00      0.00        39

    accuracy                           0.57       346
   macro avg       0.14      0.25      0.18       346
weighted avg       0.33      0.57      0.42       346



## CBA 加上隨機森林並拿掉重要性低的屬性加上 lift 加上 APR 剪枝方法


In [17]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data_procesing.delete_low_importance()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
start_time = time.time()
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE, MIN_LIFT)
car.generate_frequent()
car.sort_rule(2)
cba = Cba(train_data, car.rule)
cba.apr_cover()
rules = cba.strong_rules
default = cba.default
end_time = time.time()
print("Runtime: ", compute_runtime(start_time, end_time))
test = Validation(test_data, rules, default, 'single')

Runtime:  0.06847476959228516
              precision    recall  f1-score   support

           0       0.00      0.00      0.00        63
           2       0.00      0.00      0.00        46
           3       0.57      1.00      0.73       198
           4       0.00      0.00      0.00        39

    accuracy                           0.57       346
   macro avg       0.14      0.25      0.18       346
weighted avg       0.33      0.57      0.42       346

