# 02 - Feature Engineering

This notebook computes features for the identity risk scoring model.

## Features Computed
- **failed_logins_24h**: Count of failed logins in past 24 hours
- **login_count_7d**: Running count of user logins
- **device_age_days**: Days since device first seen
- **is_new_device**: Whether device is new for this user
- **ip_reputation_score**: Mock IP risk score (0-1)
- **hour_of_day**: Hour of login event
- **is_unusual_hour**: Login outside normal hours (6am-10pm)
- **location_changed**: Location different from previous login

In [None]:
import sys
sys.path.insert(0, '..')

import pandas as pd
from src.features.feature_engineering import engineer_features

## Load Raw Data

In [None]:
df = pd.read_parquet('../data/logins.parquet')
print(f"Loaded {len(df)} events")
df.head()

## Compute Features

In [None]:
df_features = engineer_features(df, output_path='../data/features.parquet')

## Analyze Feature Correlations

In [None]:
feature_cols = [
    "failed_logins_24h", "login_count_7d", "device_age_days",
    "is_new_device", "ip_reputation_score", "hour_of_day",
    "is_unusual_hour", "location_changed", "mfa_used", "vpn_detected", "success"
]

print("Feature Correlation with Fraud:")
correlations = df_features[feature_cols + ["is_fraudulent"]].corr()["is_fraudulent"].drop("is_fraudulent")
print(correlations.sort_values(ascending=False).round(3))

## Feature Distributions

In [None]:
# Compare feature distributions between fraud and normal
print("\nFeature Means by Fraud Status:")
comparison = df_features.groupby('is_fraudulent')[feature_cols].mean().T
comparison.columns = ['Normal', 'Fraud']
comparison['Diff'] = comparison['Fraud'] - comparison['Normal']
print(comparison.round(3))

In [None]:
# Verify output
df_check = pd.read_parquet('../data/features.parquet')
print(f"\nSaved {len(df_check)} rows with {len(df_check.columns)} columns")
print(f"Feature columns: {[c for c in df_check.columns if c not in ['event_id', 'user_id', 'tenant_id', 'timestamp', 'ip', 'device_id', 'location_country', 'location_city', 'is_fraudulent']]}")