In [None]:
import pandas as pd

# Load the uploaded data to examine its structure
file_path = 'sales_data2.csv'
sales_data = pd.read_csv(file_path)

# Display the first few rows of the dataset
sales_data.head(10)


In [None]:
# Reload the data with the correct delimiter
sales_data = pd.read_csv(file_path, delimiter=';')

# Display the first few rows of the corrected dataset
sales_data.head()


In [None]:
# Split ITEMS column into lists of items for each transaction
sales_data['ITEMS'] = sales_data['ITEMS'].apply(lambda x: [item.strip() for item in x.split(',')])

# Display the formatted dataset
sales_data.head()


In [None]:
from itertools import combinations
from collections import defaultdict

# Prepare data for Apriori: list of transactions
transactions = sales_data['ITEMS'].tolist()

# Define parameters for Apriori
min_support = 0.1  # Minimum support threshold (e.g., 30%)
min_confidence = 0.9  # Minimum confidence threshold (e.g., 70%)
n_transactions = len(transactions)

# Step 1: Generate frequent itemsets
def get_frequent_itemsets(transactions, min_support):
    item_support = defaultdict(int)

    # Count individual items and itemsets
    for transaction in transactions:
        for itemset_size in range(1, len(transaction) + 1):
            for itemset in combinations(transaction, itemset_size):
                item_support[frozenset(itemset)] += 1

    # Filter itemsets by support threshold
    frequent_itemsets = {itemset: count / n_transactions
                         for itemset, count in item_support.items()
                         if count / n_transactions >= min_support}

    return frequent_itemsets

# Step 2: Generate association rules
def generate_rules(frequent_itemsets, transactions, min_confidence):
    rules = []
    for itemset in frequent_itemsets.keys():
        if len(itemset) > 1:
            for antecedent in combinations(itemset, len(itemset) - 1):
                antecedent = frozenset(antecedent)
                consequent = itemset - antecedent
                antecedent_support = sum(1 for t in transactions if antecedent <= set(t)) / len(transactions)
                rule_support = frequent_itemsets[itemset]
                confidence = rule_support / antecedent_support
                if confidence >= min_confidence:
                    rules.append((antecedent, consequent, confidence))
    return rules

# Execute Apriori
frequent_itemsets = get_frequent_itemsets(transactions, min_support)
rules = generate_rules(frequent_itemsets, transactions, min_confidence)

# Output results
frequent_itemsets_output = [(set(itemset), support) for itemset, support in frequent_itemsets.items()]
association_rules_output = [(set(antecedent), set(consequent), confidence) for antecedent, consequent, confidence in rules]

frequent_itemsets_output, association_rules_output


In [None]:
# Load the newly uploaded file to process the Apriori algorithm on it
file_path = 'sales_data2.csv'
sales_data = pd.read_csv(file_path, delimiter=';')

# Ensure data is properly formatted
sales_data['ITEMS'] = sales_data['ITEMS'].apply(lambda x: [item.strip() for item in x.split(',')])

# Prepare transactions list for Apriori
transactions = sales_data['ITEMS'].tolist()

# Parameters for Apriori
min_support = 0.1  # Minimum support threshold (e.g., 30%)
min_confidence = 0.9  # Minimum confidence threshold (e.g., 70%)
n_transactions = len(transactions)

# Step 1: Generate frequent itemsets
def get_frequent_itemsets(transactions, min_support):
    item_support = defaultdict(int)

    # Count individual items and itemsets
    for transaction in transactions:
        for itemset_size in range(1, len(transaction) + 1):
            for itemset in combinations(transaction, itemset_size):
                item_support[frozenset(itemset)] += 1

    # Filter itemsets by support threshold
    frequent_itemsets = {itemset: count / n_transactions
                         for itemset, count in item_support.items()
                         if count / n_transactions >= min_support}

    return frequent_itemsets

# Step 2: Generate association rules
def generate_rules(frequent_itemsets, transactions, min_confidence):
    rules = []
    for itemset in frequent_itemsets.keys():
        if len(itemset) > 1:
            for antecedent in combinations(itemset, len(itemset) - 1):
                antecedent = frozenset(antecedent)
                consequent = itemset - antecedent
                antecedent_support = sum(1 for t in transactions if antecedent <= set(t)) / len(transactions)
                rule_support = frequent_itemsets[itemset]
                confidence = rule_support / antecedent_support
                if confidence >= min_confidence:
                    rules.append((antecedent, consequent, confidence))
    return rules

# Execute Apriori
frequent_itemsets = get_frequent_itemsets(transactions, min_support)
rules = generate_rules(frequent_itemsets, transactions, min_confidence)

# Prepare results
frequent_itemsets_output = [(set(itemset), support) for itemset, support in frequent_itemsets.items()]
association_rules_output = [(set(antecedent), set(consequent), confidence) for antecedent, consequent, confidence in rules]

frequent_itemsets_output, association_rules_output


In [None]:
from itertools import combinations
from collections import defaultdict

file_path = 'sales_data2.csv'
sales_data = pd.read_csv(file_path, delimiter=';')

# Ensure data is properly formatted
sales_data['ITEMS'] = sales_data['ITEMS'].apply(lambda x: [item.strip() for item in x.split(',')])

# Prepare transactions list for Apriori
transactions = sales_data['ITEMS'].tolist()

# Parameter
min_support = 0.1  # Minimum support (30%)
n_transactions = len(transactions)

# Fungsi untuk menghitung Frequent Itemsets
def get_frequent_itemsets(transactions, min_support):
    item_support = defaultdict(int)

    # Menghitung jumlah kemunculan itemset
    for transaction in transactions:
        for size in range(2, len(transaction) + 1):  # Hanya pasangan atau lebih
            for itemset in combinations(transaction, size):
                item_support[frozenset(itemset)] += 1

    # Filter itemset berdasarkan min_support
    frequent_itemsets = {
        itemset: count / n_transactions
        for itemset, count in item_support.items()
        if count / n_transactions >= min_support
    }

    return frequent_itemsets

# Eksekusi
frequent_itemsets = get_frequent_itemsets(transactions, min_support)

# Tampilkan hasil
print("Frequent Itemsets (Item yang sering dibeli bersama):")
for itemset, support in frequent_itemsets.items():
    print(f"{set(itemset)}: {support:.2f}")
