In [1]:
# Copyright 2022 NVIDIA Corporation. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ======================================================================

# Each user is responsible for checking the content of datasets and the
# applicable licenses and determining if suitable for the intended use.


<img src="https://developer.download.nvidia.com/notebooks/dlsw-notebooks/merlin_transformers4rec_getting-started-session-based-01-etl-with-nvtabular/nvidia_logo.png" style="width: 90px; float: right;">

# FSI Benchmark Models for Financial Product Recommendation


This notebook creates a simple benchmark model for financial product recommendation using the FSI synthetic demo dataset. We implement a rule-based baseline model to compare against the transformer-based models in subsequent notebooks:

1. **Rule-based Model**: Simple heuristic model using business rules and domain knowledge

The model predicts `converts_for_a_topup` (binary classification) and is evaluated using **log-loss** and **AUROC** metrics.

## Goal
Create baseline performance benchmarks to compare against the transformer-based recommendation models implemented in notebooks 01 and 02.


## Import Required Libraries


In [2]:
import os
import pandas as pd
import numpy as np
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Machine learning libraries
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss, roc_auc_score, classification_report

# Set random seed for reproducibility
np.random.seed(42)

# Display settings
pd.set_option('display.max_columns', None)

print("✅ Libraries imported successfully")


✅ Libraries imported successfully


## Define Input/Output Paths


In [3]:
INPUT_DATA_DIR = os.environ.get("INPUT_DATA_DIR", "./data/")
OUTPUT_DIR = os.environ.get("OUTPUT_DIR", "./benchmark_results/")

# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"Input data directory: {INPUT_DATA_DIR}")
print(f"Output directory: {OUTPUT_DIR}")


Input data directory: ./data/
Output directory: ./benchmark_results/


## Load and Explore FSI Synthetic Demo Data


In [4]:
# Load the synthetic FSI demo data
data_path = os.path.join(INPUT_DATA_DIR, "synthetic_fsi", "synthetic_demo_data.parquet")
print(f"Loading data from: {data_path}")

df = pd.read_parquet(data_path)
print(f"✅ Loaded {len(df):,} rows and {len(df.columns)} columns")
print(f"📊 Data shape: {df.shape}")
print(f"📋 Columns: {list(df.columns)}")

# Display basic information about the dataset
print("\n" + "=" * 50)
print("DATASET OVERVIEW")
print("=" * 50)
print(f"Dataset shape: {df.shape}")
print(f"Memory usage: {df.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
print(f"Date range: {df['session_date'].min()} to {df['session_date'].max()}")
print(f"Unique loans: {df['loan_id'].nunique():,}")

# Target distribution
print("\n" + "=" * 50)
print("TARGET VARIABLE DISTRIBUTION")
print("=" * 50)
target_counts = df['converts_for_a_topup'].value_counts()
target_pct = df['converts_for_a_topup'].value_counts(normalize=True) * 100
print(f"Target variable: converts_for_a_topup")
print(f"  - No conversion (0): {target_counts[0]:,} ({target_pct[0]:.2f}%)")
print(f"  - Conversion (1): {target_counts[1]:,} ({target_pct[1]:.2f}%)")
print(f"  - Conversion rate: {target_pct[1]:.2f}%")

# Check for missing values
print("\n" + "=" * 50)
print("MISSING VALUES")
print("=" * 50)
missing_values = df.isnull().sum()
if missing_values.sum() > 0:
    print(missing_values[missing_values > 0])
else:
    print("✅ No missing values found")

# Display sample data
print("\n" + "=" * 50)
print("SAMPLE DATA (First 5 rows)")
print("=" * 50)
print(df.head())


Loading data from: ./data/synthetic_fsi/synthetic_demo_data.parquet
✅ Loaded 440,787 rows and 18 columns
📊 Data shape: (440787, 18)
📋 Columns: ['session_date', 'loan_id', 'has_mobile_app', 'debtiq_enrolled', 'pa_eligible', 'topup_eligible', 'ita_eligible', 'email_sent_in_last_90_days', 'dm_sent_in_last_90_days', 'fico', 'income_', 'existing_loan_size_', 'current_loan_mob', 'offer___carousel', 'servicing___carousel', 'feature_sheet', 'bottom_sheet', 'converts_for_a_topup']

DATASET OVERVIEW
Dataset shape: (440787, 18)
Memory usage: 148.76 MB
Date range: 2025-05-01 00:00:00 to 2025-05-30 00:00:00
Unique loans: 148,570

TARGET VARIABLE DISTRIBUTION
Target variable: converts_for_a_topup
  - No conversion (0): 437,366 (99.22%)
  - Conversion (1): 3,421 (0.78%)
  - Conversion rate: 0.78%

MISSING VALUES
✅ No missing values found

SAMPLE DATA (First 5 rows)
  session_date  loan_id  has_mobile_app  debtiq_enrolled  pa_eligible  \
0   2025-05-22  4954838               1                1        

## Feature Engineering and Data Preparation


In [5]:
# Create additional features for modeling
print("🔧 Creating engineered features...")

# Convert session_date to datetime
df['session_date'] = pd.to_datetime(df['session_date'])

# Extract temporal features
df['day_of_week'] = df['session_date'].dt.dayofweek
df['month'] = df['session_date'].dt.month
df['day_of_month'] = df['session_date'].dt.day

# Create loan-to-income ratio
df['loan_to_income_ratio'] = df['existing_loan_size_'] / df['income_']

# Create marketing engagement score
df['marketing_engagement'] = df['email_sent_in_last_90_days'] + df['dm_sent_in_last_90_days']

# Create eligibility score (sum of all eligibility flags)
eligibility_cols = ['pa_eligible', 'topup_eligible', 'ita_eligible']
df['eligibility_score'] = df[eligibility_cols].sum(axis=1)

# Create FICO score categories
df['fico_category'] = pd.cut(df['fico'], 
                            bins=[0, 580, 669, 739, 799, 850], 
                            labels=['Poor', 'Fair', 'Good', 'Very Good', 'Excellent'])

# Create income categories
df['income_category'] = pd.cut(df['income_'], 
                              bins=[0, 30000, 50000, 75000, 100000, np.inf], 
                              labels=['Low', 'Lower-Mid', 'Mid', 'Upper-Mid', 'High'])

print(f"✅ Feature engineering completed. New shape: {df.shape}")

# Define feature sets for modeling
# Categorical features
categorical_features = [
    'offer___carousel', 'servicing___carousel', 'feature_sheet', 'bottom_sheet',
    'fico_category', 'income_category'
]

# Binary features
binary_features = [
    'has_mobile_app', 'debtiq_enrolled', 'pa_eligible', 'topup_eligible', 'ita_eligible'
]

# Numerical features
numerical_features = [
    'fico', 'income_', 'existing_loan_size_', 'current_loan_mob',
    'email_sent_in_last_90_days', 'dm_sent_in_last_90_days',
    'loan_to_income_ratio', 'marketing_engagement', 'eligibility_score',
    'day_of_week', 'month', 'day_of_month'
]

# All features for modeling
all_features = categorical_features + binary_features + numerical_features
target = 'converts_for_a_topup'

print(f"📋 Feature sets defined:")
print(f"  - Categorical features: {len(categorical_features)}")
print(f"  - Binary features: {len(binary_features)}")
print(f"  - Numerical features: {len(numerical_features)}")
print(f"  - Total features: {len(all_features)}")
print(f"  - Target: {target}")


🔧 Creating engineered features...
✅ Feature engineering completed. New shape: (440787, 26)
📋 Feature sets defined:
  - Categorical features: 6
  - Binary features: 5
  - Numerical features: 12
  - Total features: 23
  - Target: converts_for_a_topup


## Split Data into Train/Test Sets


In [6]:
# Prepare features and target
X = df[all_features].copy()
y = df[target].copy()

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

print(f"📊 Data split completed:")
print(f"  - Training set: {X_train.shape[0]:,} samples")
print(f"  - Test set: {X_test.shape[0]:,} samples")
print(f"  - Features: {X_train.shape[1]}")
print(f"  - Train conversion rate: {y_train.mean():.4f}")
print(f"  - Test conversion rate: {y_test.mean():.4f}")


📊 Data split completed:
  - Training set: 352,629 samples
  - Test set: 88,158 samples
  - Features: 23
  - Train conversion rate: 0.0078
  - Test conversion rate: 0.0078


## Rule-Based Model

### Model Implementation

This model uses business logic and domain knowledge to make predictions. The rules are based on:
- Customer financial profile (FICO, income, loan-to-income ratio)
- Eligibility flags
- Marketing engagement
- Historical interaction patterns


In [7]:
class RuleBasedModel:
    """Rule-based model for financial product recommendation"""
    
    def __init__(self):
        self.name = "Rule-Based Model"
        self.rules = {
            'high_fico_high_income': {'weight': 0.3, 'threshold': {'fico': 740, 'income_': 75000}},
            'high_eligibility': {'weight': 0.25, 'threshold': {'eligibility_score': 2}},
            'high_engagement': {'weight': 0.2, 'threshold': {'marketing_engagement': 15}},
            'low_loan_to_income': {'weight': 0.15, 'threshold': {'loan_to_income_ratio': 0.3}},
            'mobile_app_user': {'weight': 0.1, 'threshold': {'has_mobile_app': 1}}
        }
    
    def predict_proba(self, X):
        """Predict probabilities based on business rules"""
        probabilities = np.zeros(len(X))
        
        for rule_name, rule_config in self.rules.items():
            weight = rule_config['weight']
            
            if rule_name == 'high_fico_high_income':
                condition = ((X['fico'] >= rule_config['threshold']['fico']) & 
                           (X['income_'] >= rule_config['threshold']['income_']))
            elif rule_name == 'high_eligibility':
                condition = X['eligibility_score'] >= rule_config['threshold']['eligibility_score']
            elif rule_name == 'high_engagement':
                condition = X['marketing_engagement'] >= rule_config['threshold']['marketing_engagement']
            elif rule_name == 'low_loan_to_income':
                condition = X['loan_to_income_ratio'] <= rule_config['threshold']['loan_to_income_ratio']
            elif rule_name == 'mobile_app_user':
                condition = X['has_mobile_app'] == rule_config['threshold']['has_mobile_app']
            
            probabilities += condition.astype(float) * weight
        
        # Apply sigmoid to normalize probabilities
        probabilities = 1 / (1 + np.exp(-probabilities))
        
        # Return probabilities for both classes
        return np.column_stack([1 - probabilities, probabilities])
    
    def predict(self, X, threshold=0.5):
        """Make binary predictions"""
        probas = self.predict_proba(X)
        return (probas[:, 1] >= threshold).astype(int)

# Initialize and train rule-based model
print("🔧 Initializing Rule-Based Model...")
rule_model = RuleBasedModel()

# Make predictions
print("📊 Making predictions with Rule-Based Model...")
rule_train_probas = rule_model.predict_proba(X_train)
rule_test_probas = rule_model.predict_proba(X_test)

rule_train_preds = rule_model.predict(X_train)
rule_test_preds = rule_model.predict(X_test)

print("✅ Rule-Based Model predictions completed")


🔧 Initializing Rule-Based Model...
📊 Making predictions with Rule-Based Model...
✅ Rule-Based Model predictions completed


### Model Evaluation


In [8]:
# Evaluate Rule-Based Model
print("=" * 60)
print("RULE-BASED MODEL EVALUATION")
print("=" * 60)

# Calculate metrics
train_logloss = log_loss(y_train, rule_train_probas)
test_logloss = log_loss(y_test, rule_test_probas)

train_auc = roc_auc_score(y_train, rule_train_probas[:, 1])
test_auc = roc_auc_score(y_test, rule_test_probas[:, 1])

print(f"Training Metrics:")
print(f"  - Log-Loss: {train_logloss:.4f}")
print(f"  - AUROC: {train_auc:.4f}")

print(f"\nTest Metrics:")
print(f"  - Log-Loss: {test_logloss:.4f}")
print(f"  - AUROC: {test_auc:.4f}")

# Classification report
print(f"\nClassification Report (Test Set):")
print(classification_report(y_test, rule_test_preds))


RULE-BASED MODEL EVALUATION
Training Metrics:
  - Log-Loss: 1.0202
  - AUROC: 0.7480

Test Metrics:
  - Log-Loss: 1.0195
  - AUROC: 0.7520

Classification Report (Test Set):
              precision    recall  f1-score   support

           0       0.00      0.00      0.00     87474
           1       0.01      1.00      0.02       684

    accuracy                           0.01     88158
   macro avg       0.00      0.50      0.01     88158
weighted avg       0.00      0.01      0.00     88158



## Results Summary and Comparison


In [9]:
# Create summary of results
print("=" * 80)
print("BENCHMARK SUMMARY")
print("=" * 80)

print(f"📊 Dataset: FSI Synthetic Demo Data")
print(f"   - Total samples: {len(df):,}")
print(f"   - Features: {len(all_features)}")
print(f"   - Target: {target}")
print(f"   - Conversion rate: {df[target].mean():.2%}")

print(f"\n🎯 Rule-Based Model Performance:")
print(f"   - Training AUROC: {train_auc:.4f}")
print(f"   - Test AUROC: {test_auc:.4f}")
print(f"   - Training Log-Loss: {train_logloss:.4f}")
print(f"   - Test Log-Loss: {test_logloss:.4f}")

print(f"\n📋 Rule-Based Model Logic:")
print(f"   - High FICO + High Income (weight: 0.3)")
print(f"   - High Eligibility Score (weight: 0.25)")
print(f"   - High Marketing Engagement (weight: 0.2)")
print(f"   - Low Loan-to-Income Ratio (weight: 0.15)")
print(f"   - Mobile App Usage (weight: 0.1)")

print(f"\n🔮 Next Steps:")
print(f"   1. Compare this baseline with transformer models in notebooks 01-02")
print(f"   2. Analyze which business rules are most effective")
print(f"   3. Consider additional rule refinements based on domain expertise")
print(f"   4. Evaluate business impact and deployment considerations")


BENCHMARK SUMMARY
📊 Dataset: FSI Synthetic Demo Data
   - Total samples: 440,787
   - Features: 23
   - Target: converts_for_a_topup
   - Conversion rate: 0.78%

🎯 Rule-Based Model Performance:
   - Training AUROC: 0.7480
   - Test AUROC: 0.7520
   - Training Log-Loss: 1.0202
   - Test Log-Loss: 1.0195

📋 Rule-Based Model Logic:
   - High FICO + High Income (weight: 0.3)
   - High Eligibility Score (weight: 0.25)
   - High Marketing Engagement (weight: 0.2)
   - Low Loan-to-Income Ratio (weight: 0.15)
   - Mobile App Usage (weight: 0.1)

🔮 Next Steps:
   1. Compare this baseline with transformer models in notebooks 01-02
   2. Analyze which business rules are most effective
   3. Consider additional rule refinements based on domain expertise
   4. Evaluate business impact and deployment considerations


In [10]:
# Save benchmark results
print("\n💾 Saving benchmark results...")

# Create benchmark results structure
benchmark_results = {
    'dataset_info': {
        'total_samples': len(df),
        'features': len(all_features),
        'conversion_rate': float(df[target].mean()),
        'train_samples': len(X_train),
        'test_samples': len(X_test)
    },
    'rule_based_model': {
        'train_log_loss': float(train_logloss),
        'test_log_loss': float(test_logloss),
        'train_auroc': float(train_auc),
        'test_auroc': float(test_auc),
        'model_type': 'Rule-Based',
        'rules': {
            'high_fico_high_income': 0.3,
            'high_eligibility': 0.25,
            'high_engagement': 0.2,
            'low_loan_to_income': 0.15,
            'mobile_app_user': 0.1
        }
    },
    'timestamp': datetime.now().isoformat()
}

# Save to JSON file
import json
results_file = os.path.join(OUTPUT_DIR, 'benchmark_results.json')
with open(results_file, 'w') as f:
    json.dump(benchmark_results, f, indent=2)

print(f"✅ Benchmark results saved to: {results_file}")

# Display final metrics summary
print("\n" + "=" * 60)
print("FINAL BENCHMARK METRICS")
print("=" * 60)
print(f"Rule-Based Model Performance:")
print(f"  - Test AUROC: {test_auc:.4f}")
print(f"  - Test Log-Loss: {test_logloss:.4f}")
print(f"  - Training AUROC: {train_auc:.4f}")
print(f"  - Training Log-Loss: {train_logloss:.4f}")
print("=" * 60)



💾 Saving benchmark results...
✅ Benchmark results saved to: ./benchmark_results/benchmark_results.json

FINAL BENCHMARK METRICS
Rule-Based Model Performance:
  - Test AUROC: 0.7520
  - Test Log-Loss: 1.0195
  - Training AUROC: 0.7480
  - Training Log-Loss: 1.0202


## Conclusion


In [11]:
print("=" * 80)
print("BENCHMARK NOTEBOOK COMPLETED SUCCESSFULLY! 🎉")
print("=" * 80)

print("\n📊 This rule-based baseline model provides a simple, interpretable benchmark")
print("for comparing against more complex transformer-based models.")
print("\n🔍 Key advantages of the rule-based approach:")
print("  - Highly interpretable business logic")
print("  - Fast training and inference")
print("  - Easy to modify and tune rules")
print("  - No complex feature preprocessing required")
print("  - Transparent decision-making process")

print("\n📈 Use these baseline metrics to evaluate whether more complex models")
print("provide sufficient performance improvements to justify their complexity.")

print(f"\n🎯 Baseline Performance to Beat:")
print(f"  - Test AUROC: {test_auc:.4f}")
print(f"  - Test Log-Loss: {test_logloss:.4f}")

print(f"\n📂 Results have been saved to: {OUTPUT_DIR}")
print("=" * 80)


BENCHMARK NOTEBOOK COMPLETED SUCCESSFULLY! 🎉

📊 This rule-based baseline model provides a simple, interpretable benchmark
for comparing against more complex transformer-based models.

🔍 Key advantages of the rule-based approach:
  - Highly interpretable business logic
  - Fast training and inference
  - Easy to modify and tune rules
  - No complex feature preprocessing required
  - Transparent decision-making process

📈 Use these baseline metrics to evaluate whether more complex models
provide sufficient performance improvements to justify their complexity.

🎯 Baseline Performance to Beat:
  - Test AUROC: 0.7520
  - Test Log-Loss: 1.0195

📂 Results have been saved to: ./benchmark_results/
