In [34]:
from ucimlrepo import fetch_ucirepo
import pandas as pd

# 提取 Car Evaluation 數據集
car_evaluation = fetch_ucirepo(id=19)

# 提取特徵和目標
features = car_evaluation.data.features
target = car_evaluation.data.targets

# 轉換為 pandas DataFrame
df = pd.DataFrame(features, columns=car_evaluation.feature_names)
df['target'] = target

# 打印數據幀前五行來查看特徵名稱和數據
print(df.head())

# 打印特徵名稱
print("Features:", df.columns.tolist())

  buying  maint doors persons lug_boot safety target
0  vhigh  vhigh     2       2    small    low  unacc
1  vhigh  vhigh     2       2    small    med  unacc
2  vhigh  vhigh     2       2    small   high  unacc
3  vhigh  vhigh     2       2      med    low  unacc
4  vhigh  vhigh     2       2      med    med  unacc
Features: ['buying', 'maint', 'doors', 'persons', 'lug_boot', 'safety', 'target']


In [35]:
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori

# 將特徵轉換為列表
features_list = df.drop('target', axis=1).values.tolist()

# 使用 TransactionEncoder 進行編碼
te = TransactionEncoder()
te_ary = te.fit(features_list).transform(features_list)

# 將編碼後的數據轉換為 DataFrame
te_df = pd.DataFrame(te_ary, columns=te.columns_)

# 使用 apriori 算法找出頻繁項集
frequent_itemsets = apriori(te_df, min_support=0.3, use_colnames=True)

# 打印頻繁項集
print(frequent_itemsets)

     support     itemsets
0   0.500000          (2)
1   0.500000          (4)
2   0.333333        (big)
3   0.625000       (high)
4   0.625000        (low)
5   0.750000        (med)
6   0.333333       (more)
7   0.333333      (small)
8   0.437500      (vhigh)
9   0.312500    (2, high)
10  0.312500     (2, low)
11  0.375000     (2, med)
12  0.312500    (4, high)
13  0.312500     (low, 4)
14  0.375000     (med, 4)
15  0.333333  (low, high)
16  0.430556  (med, high)
17  0.430556   (med, low)


In [37]:
# 創建特徵名稱對應的字典
feature_map = {}
for col in df.columns:
    unique_values = df[col].unique()
    for value in unique_values:
        feature_map[value] = col

print("Feature Map:", feature_map)

Feature Map: {'vhigh': 'maint', 'high': 'safety', 'med': 'safety', 'low': 'safety', '2': 'persons', '3': 'doors', '4': 'persons', '5more': 'doors', 'more': 'persons', 'small': 'lug_boot', 'big': 'lug_boot', 'unacc': 'target', 'acc': 'target', 'vgood': 'target', 'good': 'target'}


In [57]:
# 定義函數來生成字典格式
def format_itemset_to_dict(itemset, feature_map):
    formatted_dicts = [{feature_map[value]: value} for value in itemset]
    return formatted_dicts


# 格式化頻繁項集為字典列表
formatted_itemsets = frequent_itemsets['itemsets'].apply(
    lambda x: format_itemset_to_dict(x, feature_map))
frequent_itemsets['formatted_itemsets'] = formatted_itemsets

# 只保留支持度和格式化後的頻繁項集
formatted_frequent_itemsets = frequent_itemsets[[
    'support', 'formatted_itemsets']]

print(formatted_frequent_itemsets)

     support                       formatted_itemsets
0   0.500000                       [{'persons': '2'}]
1   0.500000                       [{'persons': '4'}]
2   0.333333                    [{'lug_boot': 'big'}]
3   0.625000                     [{'safety': 'high'}]
4   0.625000                      [{'safety': 'low'}]
5   0.750000                      [{'safety': 'med'}]
6   0.333333                    [{'persons': 'more'}]
7   0.333333                  [{'lug_boot': 'small'}]
8   0.437500                     [{'maint': 'vhigh'}]
9   0.312500   [{'persons': '2'}, {'safety': 'high'}]
10  0.312500    [{'persons': '2'}, {'safety': 'low'}]
11  0.375000    [{'persons': '2'}, {'safety': 'med'}]
12  0.312500   [{'persons': '4'}, {'safety': 'high'}]
13  0.312500    [{'safety': 'low'}, {'persons': '4'}]
14  0.375000    [{'safety': 'med'}, {'persons': '4'}]
15  0.333333  [{'safety': 'low'}, {'safety': 'high'}]
16  0.430556  [{'safety': 'med'}, {'safety': 'high'}]
17  0.430556   [{'safety': '

In [61]:
# 1. 特徵的頻繁項集與原始資料集做配對
# 2. 將包含該項集的樣本取其類別與項集做配對
# 3. 形成特徵->類別的形式的規則 {feature: (itemset), target: (class), support: (support), confidence: (confidence)}


# 定義函數來生成規則
def generate_rules(dataset, itemset):
    target_count = df['target'].value_counts()

    for i in range(dataset.shape[0]):
        instance = dataset.loc[i, :]
        instance_features = instance.drop('target').to_dict()
        instance_target = instance['target']

        if (check_itemset_in_instance(itemset, instance_features)):

            return {
                'feature': itemset,
                'target': instance_target,
                'support_count': 0,
            }
        else:
            return None


def check_itemset_in_instance(itemset, instance):
    itemset_pairs = [(list(item.keys())[0], list(item.values())[0])
                     for item in itemset]

    for item_key, item_value in itemset_pairs:
        if item_key not in instance:
            return False
        if item_key in instance and instance[item_key] != item_value:
            return False
    return True


rules = []

# for i in range(len(formatted_frequent_itemsets)):
#     new_rule = generate_rules(
#         df, formatted_frequent_itemsets.loc[i, 'formatted_itemsets'])

1210
