In [91]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


In [105]:
bank_data = pd.read_csv("user_activity.csv")

# Add synthetic features
bank_data['Date'] = pd.to_datetime(bank_data['Date'])
bank_data = bank_data.sort_values(by='Date')
bank_data['days_since_last_purchase'] = (bank_data['Date'] - bank_data['Date'].shift(1)).dt.days.fillna(0)

# Encode categorical features
label_encoder = LabelEncoder()
bank_data['Category'] = label_encoder.fit_transform(bank_data['Category'])
bank_data['FavoriteCategory'] = label_encoder.transform(bank_data['FavoriteCategory'])
bank_data

Unnamed: 0,Category,Date,Cost,BalanceBefore,FavoriteCategory,days_since_last_purchase
0,3,2023-01-15,30,530,3,0.0
126,3,2023-01-15,30,530,3,0.0
127,1,2023-01-20,50,480,1,5.0
1,1,2023-01-20,50,480,1,0.0
2,6,2023-01-25,200,430,6,5.0
...,...,...,...,...,...,...
289,6,2026-11-20,80,104,1,10.0
290,5,2026-12-01,5,99,1,11.0
291,0,2026-12-10,15,84,1,9.0
292,2,2026-12-15,8,76,1,5.0


In [93]:
X = bank_data[['Category', 'Cost', 'BalanceBefore', 'days_since_last_purchase']]
y = bank_data['FavoriteCategory']

In [94]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [95]:
rf_model = RandomForestClassifier(n_estimators=100, random_state=1)
rf_model.fit(X_train_scaled, y_train)

y_pred_rf = rf_model.predict(X_test_scaled)

accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f'Random Forest Model Accuracy without balance_change: {accuracy_rf}')

Random Forest Model Accuracy without balance_change: 0.6610169491525424


In [110]:
new_data = pd.read_csv("test.csv")

new_data['Date'] = pd.to_datetime(bank_data['Date'])
new_data = new_data.sort_values(by='Date')
new_data['days_since_last_purchase'] = (new_data['Date'] - new_data['Date'].shift(1)).dt.days.fillna(0)
new_data = new_data.drop('Date', axis=1)

new_data['Category'] = label_encoder.transform(new_data['Category'])
new_data_scaled = scaler.transform(new_data)

# Make predictions
predictions = rf_model.predict(new_data_scaled)

# Decode the predictions to get category names
predicted_categories = label_encoder.inverse_transform(predictions)

# Display the predictions
print(f'Predicted Categories: {type(predicted_categories)}')

Predicted Categories: <class 'numpy.ndarray'>


In [None]:
# Standardize features
new_data_scaled = scaler.transform(new_data)

# Make predictions
predictions = rf_model.predict(new_data_scaled)

# Decode the predictions to get category names
predicted_categories = label_encoder.inverse_transform(predictions)

# Display the predictions
print(f'Predicted Categories: {predicted_categories}')