In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Load dataset
df = pd.read_csv('customer_data.csv')

# Drop 'id' column 
df = df.drop(['id'], axis=1)

# Use encoders for each column
le_product = LabelEncoder()
df['product_category'] = le_product.fit_transform(df['product_category'])

le_gender = LabelEncoder()
df['gender'] = le_gender.fit_transform(df['gender'])

le_education = LabelEncoder()
df['education'] = le_education.fit_transform(df['education'])

le_region = LabelEncoder()
df['region'] = le_region.fit_transform(df['region'])

le_loyalty = LabelEncoder()
df['loyalty_status'] = le_loyalty.fit_transform(df['loyalty_status'])

# Map purchase_frequency values
freq_map = {'rare': 1, 'occasional': 2, 'frequent': 3}
df['purchase_frequency'] = df['purchase_frequency'].map(freq_map)
df['purchase_frequency'] = pd.to_numeric(df['purchase_frequency'], errors='coerce')

# Fill the missing values
df['purchase_frequency'].fillna(df['purchase_frequency'].median(), inplace=True)
df.fillna(df.median(numeric_only=True), inplace=True)

# Scale numerical features
scaler = StandardScaler()
df[['age', 'income', 'purchase_frequency', 'purchase_amount']] = scaler.fit_transform(
    df[['age', 'income', 'purchase_frequency', 'purchase_amount']]
)

# Features and target
X = df.drop('product_category', axis=1)
y = df['product_category']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred, target_names=le_product.classes_))


Accuracy: 0.25
              precision    recall  f1-score   support

      Beauty       0.07      0.01      0.02      1032
       Books       0.14      0.09      0.11      2973
    Clothing       0.20      0.20      0.20      4037
 Electronics       0.30      0.57      0.40      6100
        Food       0.15      0.09      0.11      2896
      Health       0.09      0.03      0.05      1970
        Home       0.04      0.01      0.01       992

    accuracy                           0.25     20000
   macro avg       0.14      0.14      0.13     20000
weighted avg       0.19      0.25      0.20     20000

