In [None]:
#find pattern where body style and the following: Dealer Region, gender, transmission, engine
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

In [29]:
data = pd.read_csv("car_data.csv")
#drop missing values
data.dropna(inplace=True)

In [30]:
# First, ensure that all columns used for the transaction encoder are of string type
data['Dealer_Region'] = data['Dealer_Region'].astype(str)
data['Gender'] = data['Gender'].astype(str)
data['Transmission'] = data['Transmission'].astype(str)
data['Engine'] = data['Engine'].astype(str)

# Now let's create a list of transactions where each transaction is a list of strings
transactions = data.drop('Body Style', axis=1).astype(str).values.tolist()

# TransactionEncoder's fit and transform data
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df = pd.DataFrame(te_ary, columns=te.columns_)

# Apply the apriori algorithm to get frequent itemsets
# Use min_support to find itemsets with a support of more than 0.5 (you can choose a different threshold)
frequent_itemsets = apriori(df, min_support=0.1, use_colnames=True)

# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

# Display the rules with a 'Body_Style' in the consequents
# Since the consequents will be a frozenset, we will need to convert the body styles to frozenset for the comparison
body_styles = data['Body Style'].unique().tolist()
body_styles_frozenset = [frozenset([bs]) for bs in body_styles]

# Now filter rules that have a body style as consequent
rules_with_body_style = rules[rules['consequents'].isin(body_styles_frozenset)]

rules_with_body_style