In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
data = pd.read_csv('supermarket.csv', header=None)
transactions = data.apply(lambda x: x.dropna().tolist(), axis=1).tolist()
print(f"Liczba transakcji: {len(transactions)}")
print(f"Przykładowa transakcja: {transactions[0]}")

In [None]:
te = TransactionEncoder()
te_data = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_data, columns=te.columns_)
print(df.head())

### Zadanie 1: Najliczniejszy zbiór częsty (min support 20%)

In [None]:
frequent_itemsets = apriori(df, min_support=0.20, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
max_length = frequent_itemsets['length'].max()
largest_itemsets = frequent_itemsets[frequent_itemsets['length'] == max_length]
result = largest_itemsets.sort_values('support', ascending=False).iloc[0]
print(f"Produkty: {result['itemsets']}")
print(f"Wsparcie: {result['support']:.2f}")

### Zadanie 2: Histogram 10 najczęstszych produktów

In [None]:
product_counts = df.sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
product_counts.plot(kind='bar')
plt.xlabel('Produkty')
plt.ylabel('Liczba wystąpień')
plt.title('10 najczęściej występujących produktów')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

### Zadanie 3: Reguły asocjacyjne (support 25%, confidence 0.70)

In [None]:
frequent_itemsets_25 = apriori(df, min_support=0.25, use_colnames=True)
rules = association_rules(frequent_itemsets_25, metric="confidence", min_threshold=0.70)
print(rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

### Zadanie 4: Reguły prowadzące do high (support 16%, confidence 0.50)

In [None]:
frequent_itemsets_16 = apriori(df, min_support=0.16, use_colnames=True)
rules_high = association_rules(frequent_itemsets_16, metric="confidence", min_threshold=0.50)
rules_high_only = rules_high[rules_high['consequents'] == {'high'}]
print(rules_high_only[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

### Interpretacja wyników:
- coś tam coś tam
- tamto owamto
- wnioski