# 03 - Train Risk Model

This notebook trains and evaluates models for identity risk scoring.

## Models Trained
- **Isolation Forest**: Unsupervised anomaly detection
- **Random Forest**: Supervised classification (best performer)
- **Logistic Regression**: Interpretable baseline

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
from src.models.risk_model import train_and_evaluate, RiskScorer, FEATURE_COLUMNS

## Load Feature Data

In [None]:
df = pd.read_parquet('../data/features.parquet')
print(f"Loaded {len(df)} samples")
print(f"Fraud rate: {df['is_fraudulent'].mean()*100:.1f}%")

## Train and Evaluate Models

In [None]:
scorer, metrics = train_and_evaluate(df, output_path='../models/risk_model.pkl')

## Test the Scorer

In [None]:
# Normal login
normal_login = {
    "failed_logins_24h": 0,
    "login_count_7d": 10,
    "device_age_days": 30,
    "is_new_device": 0,
    "ip_reputation_score": 0.0,
    "hour_of_day": 10,
    "is_unusual_hour": 0,
    "location_changed": 0,
    "mfa_used": 1,
    "vpn_detected": 0,
    "success": 1,
}
print("Normal login:", scorer.score(normal_login))

In [None]:
# Suspicious login
suspicious_login = {
    "failed_logins_24h": 3,
    "login_count_7d": 1,
    "device_age_days": 0,
    "is_new_device": 1,
    "ip_reputation_score": 0.9,
    "hour_of_day": 2,
    "is_unusual_hour": 1,
    "location_changed": 1,
    "mfa_used": 0,
    "vpn_detected": 1,
    "success": 0,
}
print("Suspicious login:", scorer.score(suspicious_login))

## Load and Test Saved Model

In [None]:
# Load saved model
loaded_scorer = RiskScorer.load('../models/risk_model.pkl')
print("Loaded model successfully")
print("Test score:", loaded_scorer.score(suspicious_login))