In [1]:
from DataProcessing import DataProcessing
from Car import Car
from Cba import Cba
from Validation import Validation

## 原版 CBA


In [2]:
data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data = data_procesing.encoded_data
train_data, test_data = data_procesing.split_train_test_data()
MIN_SUPPORT = 0.1
MIN_CONFIDENCE = 0.8
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE)
car.generate_frequent()
car.sort_rule()
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
test = Validation(test_data, rules, default)

              precision    recall  f1-score   support

           0       0.37      0.84      0.51        63
           1       0.00      0.00      0.00        46
           2       0.95      0.97      0.96       198
           3       0.00      0.00      0.00        39

    accuracy                           0.71       346
   macro avg       0.33      0.45      0.37       346
weighted avg       0.61      0.71      0.64       346



## CBA 加入 LIFT


In [17]:
train_data, test_data = data_procesing.split_train_test_data()
MIN_SUPPORT = 0.1
MIN_CONFIDENCE = 0.8
MIN_LIFT = 1.4
car = Car(train_data, MIN_SUPPORT, MIN_CONFIDENCE, MIN_LIFT)
car.generate_frequent()
car.sort_rule()
cba = Cba(train_data, car.rule)
cba.cover()
rules = cba.final_rules_
default = cba.default
test = Validation(test_data, rules, default)

              precision    recall  f1-score   support

           0       0.27      1.00      0.42        63
           1       0.00      0.00      0.00        46
           2       1.00      0.57      0.72       198
           3       0.00      0.00      0.00        39

    accuracy                           0.51       346
   macro avg       0.32      0.39      0.29       346
weighted avg       0.62      0.51      0.49       346



In [22]:
import numpy as np

# Sample array of feature importances from Random Forest
# Assume these are between 0 and 1
feature_importances = np.array([0.1, 0.2, 0.05, 0.15, 0.5])

# Apply logarithmic scaling and then linearly scale up by a factor of 10
# Add 1 to avoid log(0)

data_procesing = DataProcessing("car.data")
data_procesing.read_data()
data_procesing.process()
data_procesing.get_feature_importances()
feature_importances = np.array(data_procesing.feature_importances)
scaled_importances = np.round(np.log(feature_importances + 1) * 10)
print(scaled_importances)

[2. 1. 1. 2. 1. 2.]


In [None]:
import pandas as pd


class RuleItem_Weight():
    def __init__(self):
        self.conditions: dict = {}
        self.class_: str = ""
        self.support: float = 0.0
        self.confidence: float = 0.0
        self.lift: float = 0.0
        self.is_ruleitem = False

    def set_ruleitem(self, item: dict, data: pd.DataFrame, min_support: float = 0.01, min_confidence: float = 0.1, min_lift: float = 0.0, weights: list = []):
        # Declare Instance Variables
        support: float = 0.0
        condition_support: float = 0.0
        confidence: float = 0.0
        # Definite Instance Variables
        conditions = item["conditions"]
        class_ = item["class"]
        data_frame_conditions = None

        importances: float = 0.0
        n = 0
        for key in conditions:
            importances += weights[key]
            n += 1
            if (data_frame_conditions is None):
                data_frame_conditions = data[key] == conditions[key]
            else:
                data_frame_conditions &= data[key] == conditions[key]

        data_frame_conditions_with_class = data_frame_conditions & (
            data["class"] == class_)

        weights = importances / n

        support = data[data_frame_conditions_with_class].shape[0] / \
            data.shape[0] * weights

        condition_support = data[data_frame_conditions].shape[0] / \
            data.shape[0]
        if support >= min_support:
            confidence = support / condition_support
            if confidence >= min_confidence:
                lift = confidence / \
                    (data[data["class"] == class_].shape[0] / data.shape[0])

                if (min_lift != 0.0):
                    if lift < min_lift:
                        del data
                        return False

                self.conditions = conditions
                self.class_ = class_
                self.support = support
                self.confidence = confidence
                self.is_ruleitem = True
                self.lift = lift
                del data
                return True
            else:
                del data
                return False
        else:
            del data
            return False

    def print_ruleitem(self):
        if not self.is_ruleitem:
            print("This is not a ruleitem")
            return
        string = ""
        for key, value in self.conditions.items():
            string += f"{key}={value} "
        string += f"=> class={self.class_} "
        string += f"(support={self.support}, confidence={self.confidence}, lift={self.lift})"
        print(string)