In [18]:
"""
BantAI Travel-Aware Risk-Based Authentication System
Separate implementation for intelligent travel vs threat detection

Distinguishes between:
- Legitimate Filipino travelers (OFWs, tourists, business)
- Account compromise/cyber attacks

Features:
- Travel plausibility analysis
- Behavioral consistency scoring
- Impossible travel detection
- OFW-friendly risk assessment
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import math
from geopy.distance import geodesic
import warnings
import pickle
import json
warnings.filterwarnings('ignore')

class BantAI_TravelAware:
    """
    Travel-Aware Risk-Based Authentication for Filipino Banking
    
    Smart enough to distinguish between:
    - Juan traveling to Dubai for work (legitimate)
    - Hacker accessing from Moscow (threat)
    """
    
    def __init__(self, cache_file=None, ml_model_path=None, geocode_delay=1.0):
        self.user_profiles = {}  # Store user behavioral baselines
        self.cache_file = cache_file
        self.ml_model_path = ml_model_path
        self.geocode_delay = geocode_delay
        self.model = None
        
        # Try to load cached location data first
        self.location_coordinates = self._load_cached_locations()
        if not self.location_coordinates:
            self.location_coordinates = self._load_location_data()
            self._cache_locations()
            
        self.travel_risk_zones = self._define_travel_zones()
        self.max_travel_speed_kmh = 900  # Commercial aircraft speed
        
    def _load_cached_locations(self):
        """Try to load location data from cache file"""
        if self.cache_file:
            try:
                with open(self.cache_file, 'r') as f:
                    return json.load(f)
            except (FileNotFoundError, json.JSONDecodeError):
                return None
        return None
        
    def _cache_locations(self):
        """Save location data to cache file"""
        if self.cache_file and self.location_coordinates:
            with open(self.cache_file, 'w') as f:
                json.dump(self.location_coordinates, f)
    
    def _load_location_data(self):
        """
        Load comprehensive geographic coordinates for travel distance calculations
        """
        return {
            # PHILIPPINES - Major Cities
            'Manila': (14.5995, 120.9842),
            'Quezon City': (14.6760, 121.0437),
            'Makati': (14.5547, 121.0244),
            'Cebu City': (10.3157, 123.8854),
            'Davao City': (7.1907, 125.4553),
            
            # Major OFW Destinations
            'Dubai': (25.2048, 55.2708),
            'Abu Dhabi': (24.4539, 54.3773),
            'Riyadh': (24.7136, 46.6753),
            'Singapore': (1.3521, 103.8198),
            'Hong Kong': (22.3193, 114.1694),
            
            # High Risk Areas
            'Moscow': (55.7558, 37.6176),
            'Beijing': (39.9042, 116.4074),
            'Tehran': (35.6892, 51.3890)
        }
        
    def load_model(self):
        """Load the ML model from the specified path"""
        if self.ml_model_path:
            try:
                with open(self.ml_model_path, 'rb') as f:
                    self.model = pickle.load(f)
                print("✅ ML model loaded successfully")
            except Exception as e:
                print(f"❌ Error loading ML model: {str(e)}")
                self.model = None

    def _define_travel_zones(self):
        """
        Define risk zones for different types of travel destinations
        """
        return {
            'ofw_hubs': {
                'locations': ['Dubai', 'Abu Dhabi', 'Riyadh'],
                'base_risk': 0.2,
                'description': 'Major OFW employment hubs'
            },
            'business_hubs': {
                'locations': ['Singapore', 'Hong Kong'],
                'base_risk': 0.3,
                'description': 'Major business centers'
            },
            'high_risk': {
                'locations': ['Moscow', 'Beijing', 'Tehran'],
                'base_risk': 0.8,
                'description': 'Known cybercrime hubs'
            },
            'philippines': {
                'locations': ['Manila', 'Quezon City', 'Makati', 'Cebu City', 'Davao City'],
                'base_risk': 0.1,
                'description': 'Domestic locations'
            }
        }

In [None]:
"""
Complete BantAI Travel-Aware Risk-Based Authentication System
Integrating your streamlined class with full ML functionality
"""

import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import math
from geopy.distance import geodesic
import warnings
import pickle
import json
import os
warnings.filterwarnings('ignore')

class BantAI_TravelAware:
    """
    Travel-Aware Risk-Based Authentication for Filipino Banking
    
    Smart enough to distinguish between:
    - Juan traveling to Dubai for work (legitimate)
    - Hacker accessing from Moscow (threat)
    """
    
    def __init__(self, cache_file=None, ml_model_path=None, geocode_delay=1.0):
        self.user_profiles = {}  # Store user behavioral baselines
        self.cache_file = cache_file
        self.ml_model_path = ml_model_path
        self.geocode_delay = geocode_delay
        
        # ML model components
        self.model = None
        self.scaler = None
        self.feature_columns = None
        self.model_info = {}
        
        # Try to load cached location data first
        self.location_coordinates = self._load_cached_locations()
        if not self.location_coordinates:
            self.location_coordinates = self._load_location_data()
            self._cache_locations()
            
        self.travel_risk_zones = self._define_travel_zones()
        self.max_travel_speed_kmh = 900  # Commercial aircraft speed
        
    def _load_cached_locations(self):
        """Try to load location data from cache file"""
        if self.cache_file:
            try:
                with open(self.cache_file, 'r') as f:
                    return json.load(f)
            except (FileNotFoundError, json.JSONDecodeError):
                return None
        return None
        
    def _cache_locations(self):
        """Save location data to cache file"""
        if self.cache_file and self.location_coordinates:
            with open(self.cache_file, 'w') as f:
                json.dump(self.location_coordinates, f)
    
    def _load_location_data(self):
        """Load comprehensive geographic coordinates for travel distance calculations"""
        return {
            # PHILIPPINES - Major Cities
            'Manila': (14.5995, 120.9842),
            'Quezon City': (14.6760, 121.0437),
            'Makati': (14.5547, 121.0244),
            'Taguig': (14.5176, 121.0509),
            'Pasig': (14.5764, 121.0851),
            'Cebu City': (10.3157, 123.8854),
            'Davao City': (7.1907, 125.4553),
            'Iloilo City': (10.7202, 122.5621),
            'Bacolod': (10.6740, 122.9540),
            
            # MIDDLE EAST - Major OFW Destinations
            'Dubai': (25.2048, 55.2708),
            'Abu Dhabi': (24.4539, 54.3773),
            'Sharjah': (25.3573, 55.4033),
            'Riyadh': (24.7136, 46.6753),
            'Jeddah': (21.4858, 39.1925),
            'Doha': (25.2854, 51.5310),
            'Kuwait City': (29.3117, 47.4818),
            'Manama': (26.2285, 50.5860),
            'Muscat': (23.5880, 58.3829),
            
            # ASIA PACIFIC - Business and Tourism
            'Singapore': (1.3521, 103.8198),
            'Hong Kong': (22.3193, 114.1694),
            'Tokyo': (35.6762, 139.6503),
            'Seoul': (37.5665, 126.9780),
            'Bangkok': (13.7563, 100.5018),
            'Kuala Lumpur': (3.1390, 101.6869),
            'Jakarta': (6.2088, 106.8456),
            
            # HIGH RISK LOCATIONS
            'Moscow': (55.7558, 37.6176),
            'St. Petersburg': (59.9311, 30.3609),
            'Beijing': (39.9042, 116.4074),
            'Shanghai': (31.2304, 121.4737),
            'Tehran': (35.6892, 51.3890),
            'Pyongyang': (39.0392, 125.7625),
            
            # NORTH AMERICA - Filipino Communities
            'Los Angeles': (34.0522, -118.2437),
            'San Francisco': (37.7749, -122.4194),
            'New York': (40.7128, -74.0060),
            'Toronto': (43.6532, -79.3832),
            'Vancouver': (49.2827, -123.1207),
            
            # OCEANIA
            'Sydney': (-33.8688, 151.2093),
            'Melbourne': (-37.8136, 144.9631),
        }
        
    def load_model(self):
        """Load the ML model from the specified path"""
        if not self.ml_model_path:
            print("No ML model path specified")
            return False
            
        if not os.path.exists(self.ml_model_path):
            print(f"ML model file not found: {self.ml_model_path}")
            return False
            
        try:
            with open(self.ml_model_path, 'rb') as f:
                model_data = pickle.load(f)
            
            # Extract components from the loaded data
            if isinstance(model_data, dict):
                self.model = model_data.get('model')
                self.scaler = model_data.get('scaler')
                self.feature_columns = model_data.get('feature_columns')
                self.model_info = model_data.get('model_info', {})
                
                print("✅ ML model loaded successfully")
                print(f"   Model type: {type(self.model)}")
                print(f"   Features: {self.feature_columns}")
                return True
            else:
                print("❌ Invalid model file format")
                return False
                
        except Exception as e:
            print(f"❌ Error loading ML model: {str(e)}")
            self.model = None
            return False


    def _define_travel_zones(self):
        """
        Define comprehensive risk zones for different types of travel destinations
        Based on Filipino travel patterns, OFW destinations, and threat intelligence
        """
        return {
            'ofw_hubs': {
                'locations': [
                    # Middle East - Major OFW employment destinations
                    'Dubai', 'Abu Dhabi', 'Sharjah', 'Ajman', 'Al Ain',
                    'Riyadh', 'Jeddah', 'Dammam', 'Mecca', 'Medina',
                    'Doha', 'Kuwait City', 'Manama', 'Muscat',
                    'Amman', 'Beirut', 'Baghdad', 'Tehran', 'Isfahan', 'Mashhad'
                ],
                'base_risk': 0.2,
                'description': 'Major OFW employment hubs in Middle East'
            },
            'business_hubs': {
                'locations': [
                    # Asia Pacific business centers
                    'Singapore', 'Hong Kong', 'Macau',
                    'Tokyo', 'Osaka', 'Nagoya', 'Kyoto', 'Yokohama',
                    'Seoul', 'Busan', 'Incheon',
                    'Bangkok', 'Phuket', 'Pattaya', 
                    'Kuala Lumpur', 'Johor Bahru', 'Penang',
                    'Jakarta', 'Bali', 'Surabaya',
                    'Ho Chi Minh City', 'Hanoi', 'Da Nang',
                    # Global financial centers
                    'London', 'Manchester', 'Birmingham', 'Edinburgh', 'Glasgow',
                    'Frankfurt', 'Zurich', 'Geneva', 'Amsterdam', 'Brussels'
                ],
                'base_risk': 0.25,
                'description': 'Regional and global business centers'
            },
            'diaspora_hubs': {
                'locations': [
                    # United States - Large Filipino communities
                    'Los Angeles', 'San Francisco', 'San Diego', 'San Jose',
                    'Las Vegas', 'Phoenix', 'Seattle', 'Portland', 'Sacramento', 'Fresno',
                    'New York', 'Jersey City', 'Philadelphia', 'Washington DC', 'Boston',
                    'Chicago', 'Detroit', 'Miami', 'Orlando', 'Tampa',
                    'Houston', 'Dallas', 'Austin', 'San Antonio', 'Denver', 'Atlanta',
                    'Honolulu', 'Anchorage',
                    # Canada - Filipino diaspora
                    'Toronto', 'Vancouver', 'Montreal', 'Calgary', 'Edmonton',
                    'Ottawa', 'Winnipeg', 'Quebec City', 'Hamilton',
                    # Australia/New Zealand - Filipino communities
                    'Sydney', 'Melbourne', 'Brisbane', 'Perth', 'Adelaide',
                    'Canberra', 'Gold Coast', 'Newcastle',
                    'Auckland', 'Wellington', 'Christchurch'
                ],
                'base_risk': 0.3,
                'description': 'Major Filipino diaspora communities'
            },
            'tourism_destinations': {
                'locations': [
                    # Europe - Common tourist destinations
                    'Paris', 'Lyon', 'Marseille', 'Rome', 'Milan', 'Naples', 'Venice',
                    'Madrid', 'Barcelona', 'Berlin', 'Munich', 'Vienna',
                    'Stockholm', 'Oslo', 'Copenhagen', 'Helsinki',
                    'Prague', 'Budapest', 'Warsaw', 'Athens', 'Istanbul', 'Ankara',
                    'Dublin',
                    # Other popular destinations
                    'Phnom Penh', 'Vientiane', 'Yangon', 'Colombo',
                    'Mumbai', 'Delhi', 'Bangalore', 'Chennai', 'Hyderabad', 'Kolkata', 'Pune', 'Ahmedabad'
                ],
                'base_risk': 0.35,
                'description': 'Popular tourist and cultural destinations'
            },
            'developing_markets': {
                'locations': [
                    # South/Southeast Asia
                    'Dhaka', 'Kathmandu', 'Karachi', 'Lahore', 'Islamabad', 'Kabul',
                    # Central Asia
                    'Tashkent', 'Almaty', 'Bishkek', 'Dushanbe',
                    # Africa
                    'Cairo', 'Alexandria', 'Cape Town', 'Johannesburg', 'Durban',
                    'Nairobi', 'Addis Ababa', 'Casablanca', 'Tunis', 'Algiers',
                    # South America
                    'São Paulo', 'Rio de Janeiro', 'Brasília', 'Salvador',
                    'Buenos Aires', 'Córdoba', 'Lima', 'Santiago', 'Quito', 'Montevideo'
                ],
                'base_risk': 0.45,
                'description': 'Developing markets with moderate risk'
            },
            'high_risk_regions': {
                'locations': [
                    # Africa - Higher risk areas
                    'Lagos', 'Abuja', 'Kano', 'Ibadan',
                    # South America - Crime hotspots
                    'Bogotá', 'Medellín', 'Caracas', 'La Paz'
                ],
                'base_risk': 0.65,
                'description': 'Higher risk regions with security concerns'
            },
            'cybercrime_hubs': {
                'locations': [
                    # Russia - Major cybercrime source
                    'Moscow', 'St. Petersburg', 'Novosibirsk', 'Yekaterinburg',
                    # China - State-sponsored threats
                    'Beijing', 'Shanghai', 'Shenzhen', 'Guangzhou', 
                    'Hangzhou', 'Chengdu',
                    # North Korea - State actors
                    'Pyongyang', 'Hamhung', 'Chongjin',
                    # Eastern Europe - Cybercrime centers
                    'Bucharest', 'Minsk', 'Kiev', 'Kharkiv',
                    'Chisinau', 'Tirana', 'Skopje', 'Sarajevo'
                ],
                'base_risk': 0.8,
                'description': 'Known cybercrime and state-sponsored threat locations'
            },
            'philippines_domestic': {
                'locations': [
                    # Metro Manila
                    'Manila', 'Quezon City', 'Makati', 'Taguig', 'Pasig',
                    'Mandaluyong', 'Marikina', 'Pasay', 'Parañaque',
                    'Las Piñas', 'Muntinlupa', 'Caloocan', 'Valenzuela',
                    'Malabon', 'Navotas',
                    # Luzon
                    'Baguio', 'Angeles', 'San Fernando', 'Dagupan',
                    'Cabanatuan', 'Olongapo', 'Batangas', 'Lipa',
                    'Lucena', 'Naga', 'Legazpi', 'Iloilo City',
                    'Vigan', 'Tuguegarao', 'Laoag',
                    # Visayas
                    'Cebu City', 'Mandaue', 'Lapu-Lapu', 
                    'Bacolod', 'Dumaguete', 'Tacloban', 'Ormoc',
                    'Tagbilaran', 'Roxas', 'Kalibo',
                    # Mindanao
                    'Davao City', 'Cagayan de Oro', 'Zamboanga', 'Butuan',
                    'Iligan', 'Cotabato', 'General Santos', 'Koronadal',
                    'Kidapawan', 'Dipolog', 'Pagadian', 'Marawi'
                ],
                'base_risk': 0.05,
                'description': 'Philippine domestic locations'
            }
        }
    
    def analyze_user_baseline(self, user_login_history):
        """Analyze user's normal behavior patterns from login history"""
        user_id = user_login_history[0]['user_id']
        
        # Extract behavioral patterns
        locations = [login['location'] for login in user_login_history]
        times = [datetime.strptime(login['timestamp'], '%Y-%m-%d %H:%M:%S') for login in user_login_history]
        devices = [login['device_type'] for login in user_login_history]
        countries = [login['country'] for login in user_login_history]
        
        # Calculate baseline patterns
        baseline = {
            'user_id': user_id,
            'home_locations': list(set(loc for loc, country in zip(locations, countries) if country == 'PH')),
            'common_devices': list(set(devices)),
            'typical_hours': [t.hour for t in times],
            'login_frequency': len(user_login_history),
            'countries_visited': list(set(countries)),
            'last_known_location': locations[-1],
            'last_login_time': times[-1],
            'travel_history': self._extract_travel_history(user_login_history)
        }
        
        # Store user profile
        self.user_profiles[user_id] = baseline
        
        print(f"Baseline established for User {user_id}")
        print(f"   Home locations: {baseline['home_locations']}")
        print(f"   Countries visited: {baseline['countries_visited']}")
        print(f"   Common devices: {baseline['common_devices']}")
        
        return baseline
    
    def _extract_travel_history(self, login_history):
        """Extract travel patterns from login history"""
        travels = []
        
        for i in range(1, len(login_history)):
            prev_login = login_history[i-1]
            curr_login = login_history[i]
            
            if prev_login['country'] != curr_login['country']:
                travel = {
                    'from_location': prev_login['location'],
                    'to_location': curr_login['location'],
                    'from_country': prev_login['country'],
                    'to_country': curr_login['country'],
                    'time_gap': (datetime.strptime(curr_login['timestamp'], '%Y-%m-%d %H:%M:%S') - 
                               datetime.strptime(prev_login['timestamp'], '%Y-%m-%d %H:%M:%S')).total_seconds() / 3600,
                    'distance_km': self._calculate_distance(prev_login['location'], curr_login['location'])
                }
                travels.append(travel)
        
        return travels
    
    def _calculate_distance(self, location1, location2):
        """Calculate distance between two locations"""
        if location1 in self.location_coordinates and location2 in self.location_coordinates:
            coord1 = self.location_coordinates[location1]
            coord2 = self.location_coordinates[location2]
            return geodesic(coord1, coord2).kilometers
        return 0
    
    def _get_location_zone(self, location):
        """Determine which risk zone a location belongs to"""
        for zone_name, zone_data in self.travel_risk_zones.items():
            if location in zone_data['locations']:
                return zone_name, zone_data['base_risk']
        return 'unknown', 0.5
    
    def analyze_travel_plausibility(self, user_id, new_login):
        """Analyze if travel to new location is physically plausible"""
        if user_id not in self.user_profiles:
            return {
                'plausible': False,
                'reason': 'No user baseline established',
                'risk_modifier': 0.5
            }
        
        profile = self.user_profiles[user_id]
        last_location = profile['last_known_location']
        last_time = profile['last_login_time']
        
        new_location = new_login['location']
        new_time = datetime.strptime(new_login['timestamp'], '%Y-%m-%d %H:%M:%S')
        
        # Calculate travel requirements
        distance_km = self._calculate_distance(last_location, new_location)
        time_gap_hours = (new_time - last_time).total_seconds() / 3600
        
        if distance_km == 0:  # Same location or unknown coordinates
            return {
                'plausible': True,
                'reason': 'Same location or local area',
                'risk_modifier': 0.0,
                'distance_km': distance_km,
                'time_gap_hours': time_gap_hours
            }
        
        # Calculate minimum travel time
        min_travel_time_hours = distance_km / self.max_travel_speed_kmh
        buffer_time_hours = 4  # Airport procedures, layovers, etc.
        required_time_hours = min_travel_time_hours + buffer_time_hours
        
        # Check plausibility
        if time_gap_hours >= required_time_hours:
            return {
                'plausible': True,
                'reason': f'Sufficient time for travel ({time_gap_hours:.1f}h vs {required_time_hours:.1f}h required)',
                'risk_modifier': 0.0,
                'distance_km': distance_km,
                'time_gap_hours': time_gap_hours,
                'required_time_hours': required_time_hours
            }
        else:
            return {
                'plausible': False,
                'reason': f'Impossible travel: {distance_km:.0f}km in {time_gap_hours:.1f}h (need {required_time_hours:.1f}h)',
                'risk_modifier': 0.8,
                'distance_km': distance_km,
                'time_gap_hours': time_gap_hours,
                'required_time_hours': required_time_hours
            }
    
    def analyze_behavioral_consistency(self, user_id, new_login):
        """Check if user behavior remains consistent despite location change"""
        if user_id not in self.user_profiles:
            return {'consistency_score': 0.5, 'factors': ['No baseline']}
        
        profile = self.user_profiles[user_id]
        consistency_factors = []
        consistency_score = 1.0
        
        # Device consistency
        if new_login['device_type'] in profile['common_devices']:
            consistency_factors.append('Known device type')
        else:
            consistency_score -= 0.3
            consistency_factors.append('New device type')
        
        # Time pattern consistency
        new_hour = datetime.strptime(new_login['timestamp'], '%Y-%m-%d %H:%M:%S').hour
        if new_hour in profile['typical_hours'] or abs(new_hour - np.mean(profile['typical_hours'])) <= 3:
            consistency_factors.append('Consistent login time')
        else:
            consistency_score -= 0.2
            consistency_factors.append('Unusual login time')
        
        # Previous travel history
        if new_login['country'] in profile['countries_visited']:
            consistency_factors.append('Previously visited country')
            consistency_score += 0.1
        else:
            consistency_factors.append('First visit to country')
        
        return {
            'consistency_score': max(0, min(1, consistency_score)),
            'factors': consistency_factors
        }
    
    def ml_risk_prediction(self, last_login, new_login):
        """Calculate ML-based risk score for login pair"""
        if self.model is None or self.scaler is None:
            return 0.5  # Default risk if no model
        
        try:
            # Create feature vector
            features = self._create_feature_vector(last_login, new_login)
            
            # Scale features
            features_scaled = self.scaler.transform([features])
            
            # Get prediction
            risk_prob = self.model.predict_proba(features_scaled)[0][1]
            return risk_prob
            
        except Exception as e:
            print(f"ML prediction error: {e}")
            return 0.5
    
    def _create_feature_vector(self, last_login, new_login):
        """Create feature vector for ML model"""
        features = []
        
        # Time difference
        if isinstance(last_login.get('timestamp'), str):
            last_time = datetime.strptime(last_login['timestamp'], '%Y-%m-%d %H:%M:%S')
        else:
            last_time = last_login.get('timestamp', datetime.now())
        
        new_time = datetime.strptime(new_login['timestamp'], '%Y-%m-%d %H:%M:%S')
        time_diff = (new_time - last_time).total_seconds() / 3600
        features.append(time_diff)
        
        # Distance
        distance = self._calculate_distance(
            last_login.get('location', 'Manila'),
            new_login.get('location', 'Manila')
        )
        features.append(distance)
        
        # Device type (encoded)
        device_encoding = {'mobile': 0, 'desktop': 1, 'tablet': 2}
        features.append(device_encoding.get(new_login.get('device_type', 'mobile'), 0))
        
        # Technical indicators
        features.append(1 if new_login.get('is_attack_ip', False) else 0)
        features.append(1 if new_login.get('login_successful', True) else 0)
        features.append(new_login.get('latency', 100))
        
        return features
    
    def calculate_travel_aware_risk(self, user_id, new_login):
        """Calculate comprehensive travel-aware risk score"""
        risk_components = {}
        
        # 1. Location zone risk
        zone, base_location_risk = self._get_location_zone(new_login['location'])
        risk_components['location_zone'] = base_location_risk
        
        # 2. Travel plausibility
        travel_analysis = self.analyze_travel_plausibility(user_id, new_login)
        risk_components['travel_plausibility'] = travel_analysis['risk_modifier']
        
        # 3. Behavioral consistency
        behavior_analysis = self.analyze_behavioral_consistency(user_id, new_login)
        behavior_risk = 1 - behavior_analysis['consistency_score']
        risk_components['behavioral_inconsistency'] = behavior_risk
        
        # 4. Technical indicators
        technical_risk = 0.0
        technical_factors = []
        
        if new_login.get('is_attack_ip', False):
            technical_risk += 0.4
            technical_factors.append('Known attack IP')
        
        if new_login.get('high_latency', False):
            technical_risk += 0.2
            technical_factors.append('High network latency')
        
        if not new_login.get('login_successful', True):
            technical_risk += 0.3
            technical_factors.append('Failed login attempt')
        
        risk_components['technical_indicators'] = technical_risk
        
        # 5. ML Model prediction
        ml_risk = 0.5
        if self.model is not None and user_id in self.user_profiles:
            try:
                last_login_data = {
                    'timestamp': self.user_profiles[user_id]['last_login_time'].strftime('%Y-%m-%d %H:%M:%S'),
                    'location': self.user_profiles[user_id]['last_known_location'],
                }
                ml_risk = self.ml_risk_prediction(last_login_data, new_login)
            except Exception as e:
                print(f"ML prediction failed: {e}")
        
        risk_components['ml_prediction'] = ml_risk
        
        # Calculate weighted final risk score
        weights = {
            'location_zone': 0.2,
            'travel_plausibility': 0.25,
            'behavioral_inconsistency': 0.15,
            'technical_indicators': 0.15,
            'ml_prediction': 0.25
        }
        
        final_risk = sum(risk_components[component] * weights[component] 
                        for component in risk_components)
        final_risk = min(1.0, final_risk)
        
        return {
            'final_risk_score': final_risk,
            'risk_components': risk_components,
            'travel_analysis': travel_analysis,
            'behavior_analysis': behavior_analysis,
            'location_zone': zone,
            'technical_factors': technical_factors
        }
    
    def generate_travel_aware_explanation(self, risk_analysis):
        """Generate human-readable explanation for the risk decision"""
        risk_score = risk_analysis['final_risk_score']
        travel_info = risk_analysis['travel_analysis']
        behavior_info = risk_analysis['behavior_analysis']
        
        # Determine risk level
        if risk_score < 0.3:
            risk_level = "LOW"
            action = "ALLOW"
        elif risk_score < 0.6:
            risk_level = "MEDIUM"
            action = "ALLOW_WITH_OTP"
        else:
            risk_level = "HIGH"
            action = "BLOCK" if not travel_info['plausible'] else "STRICT_VERIFICATION"
        
        # Build explanation factors
        explanation_factors = []
        
        # Travel plausibility
        if travel_info['plausible']:
            explanation_factors.append(f"Travel is plausible ({travel_info['reason']})")
        else:
            explanation_factors.append(f"⚠ {travel_info['reason']}")
        
        # Behavioral consistency
        consistency_pct = behavior_info['consistency_score'] * 100
        explanation_factors.append(f"Behavior consistency: {consistency_pct:.0f}%")
        
        # Location zone
        zone = risk_analysis['location_zone']
        zone_info = self.travel_risk_zones.get(zone, {})
        explanation_factors.append(f"Location: {zone_info.get('description', 'Unknown zone')}")
        
        # Technical factors
        if risk_analysis['technical_factors']:
            explanation_factors.extend([f"⚠ {factor}" for factor in risk_analysis['technical_factors']])
        
        return {
            'risk_score': risk_score,
            'risk_level': risk_level,
            'action': action,
            'explanation_factors': explanation_factors,
            'travel_plausible': travel_info['plausible'],
            'behavior_consistent': behavior_info['consistency_score'] > 0.7,
            'recommendation': self._get_recommendation(risk_score, travel_info, behavior_info)
        }
    
    def _get_recommendation(self, risk_score, travel_info, behavior_info):
        """Generate specific recommendations based on analysis"""
        if not travel_info['plausible']:
            return "BLOCK: Impossible travel detected. Manual review required."
        
        if risk_score < 0.3 and behavior_info['consistency_score'] > 0.8:
            return "ALLOW: Legitimate travel with consistent behavior."
        
        if risk_score < 0.6 and travel_info['plausible']:
            return "ALLOW with SMS OTP: Possible legitimate travel, verify with additional authentication."
        
        return "STRICT VERIFICATION: High-risk login requiring manual review and multiple authentication factors."



if __name__ == "__main__":
    system = test_complete_system()

Testing Complete BantAI Travel-Aware System
✅ ML model loaded successfully
   Model type: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
   Features: ['time_diff', 'distance', 'device_type', 'is_attack_ip', 'login_successful', 'latency']

Establishing user baseline...
Baseline established for User juan_dela_cruz_123
   Home locations: ['Makati', 'Manila']
   Countries visited: ['PH']
   Common devices: ['mobile']

Analyzing test scenarios...

OFW Travel to Dubai
------------------------------
Risk Score: 0.059 (LOW)
Action: ALLOW
Recommendation: ALLOW: Legitimate travel with consistent behavior.
Analysis factors:
  - Travel is plausible (Same location or local area)
  - Behavior consistency: 100%
  - Location: Major OFW employment hubs in Middle East

Impossible Travel Attack
------------------------------
Risk Score: 0.510 (MEDIUM)
Action: ALLOW_WITH_OTP
Recommendation: ALLOW with SMS OTP: Possible legitimate travel, verify with additional authentication.
Analysis factors:


In [20]:
# Test the complete system
def test_complete_system():
    """Test the complete BantAI system with ML integration"""
    print("Testing Complete BantAI Travel-Aware System")
    print("=" * 50)
    
    # Initialize system
    bantai_system = BantAI_TravelAware(
        cache_file="geocache.json",
        ml_model_path="bantai_model.pkl",
        geocode_delay=1.0
    )
    
    # Load ML model
    model_loaded = bantai_system.load_model()
    
    # Sample user baseline
    user_id = "juan_dela_cruz_123"
    juan_history = [
        {
            'user_id': user_id,
            'timestamp': '2024-01-01 09:00:00',
            'location': 'Manila',
            'country': 'PH',
            'device_type': 'mobile',
            'login_successful': True
        },
        {
            'user_id': user_id,
            'timestamp': '2024-01-03 14:30:00',
            'location': 'Makati',
            'country': 'PH',
            'device_type': 'mobile',
            'login_successful': True
        }
    ]
    
    # Establish baseline
    print("\nEstablishing user baseline...")
    baseline = bantai_system.analyze_user_baseline(juan_history)
    
    # Test scenarios
    test_scenarios = [
        {
            'name': 'OFW Travel to Dubai',
            'login': {
                'user_id': user_id,
                'timestamp': '2024-01-15 10:00:00',
                'location': 'Dubai',
                'country': 'AE',
                'device_type': 'mobile',
                'login_successful': True,
                'is_attack_ip': False,
                'latency': 120
            }
        },
        {
            'name': 'Impossible Travel Attack',
            'login': {
                'user_id': user_id,
                'timestamp': '2024-01-15 11:00:00',
                'location': 'Moscow',
                'country': 'RU',
                'device_type': 'desktop',
                'login_successful': False,
                'is_attack_ip': True,
                'latency': 300
            }
        }
    ]
    
    # Analyze scenarios
    print("\nAnalyzing test scenarios...")
    for scenario in test_scenarios:
        print(f"\n{scenario['name']}")
        print("-" * 30)
        
        # Calculate risk
        risk_analysis = bantai_system.calculate_travel_aware_risk(
            user_id, scenario['login']
        )
        
        # Generate explanation
        explanation = bantai_system.generate_travel_aware_explanation(risk_analysis)
        
        # Display results
        print(f"Risk Score: {explanation['risk_score']:.3f} ({explanation['risk_level']})")
        print(f"Action: {explanation['action']}")
        print(f"Recommendation: {explanation['recommendation']}")
        
        print("Analysis factors:")
        for factor in explanation['explanation_factors']:
            print(f"  - {factor}")
    
    return bantai_system


In [None]:
bantai_system = BantAI_TravelAware(
    cache_file="geocache.json",
    ml_model_path="bantai_model.pkl",
    geocode_delay=1.0
)

bantai_system.load_model() # true if the model can run and exist

✅ ML model loaded successfully
   Model type: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
   Features: ['time_diff', 'distance', 'device_type', 'is_attack_ip', 'login_successful', 'latency']


True

In [23]:
test_complete_system()

Testing Complete BantAI Travel-Aware System
✅ ML model loaded successfully
   Model type: <class 'sklearn.ensemble._forest.RandomForestClassifier'>
   Features: ['time_diff', 'distance', 'device_type', 'is_attack_ip', 'login_successful', 'latency']

Establishing user baseline...
Baseline established for User juan_dela_cruz_123
   Home locations: ['Makati', 'Manila']
   Countries visited: ['PH']
   Common devices: ['mobile']

Analyzing test scenarios...

OFW Travel to Dubai
------------------------------
Risk Score: 0.059 (LOW)
Action: ALLOW
Recommendation: ALLOW: Legitimate travel with consistent behavior.
Analysis factors:
  - Travel is plausible (Same location or local area)
  - Behavior consistency: 100%
  - Location: Major OFW employment hubs in Middle East

Impossible Travel Attack
------------------------------
Risk Score: 0.510 (MEDIUM)
Action: ALLOW_WITH_OTP
Recommendation: ALLOW with SMS OTP: Possible legitimate travel, verify with additional authentication.
Analysis factors:


<__main__.BantAI_TravelAware at 0x1cc5a85e020>

# behavior analysis, when the user log in dagdag feature