In [None]:
# ===================================================================
# PHASE 4: SMART JOB PRIORITIZATION ENGINE
# Production-Level Code with ML-based Scoring and User Customization
# ===================================================================
# Purpose: Automatically prioritize job postings based on user profile
#          and preferences with optional user customization
# ===================================================================

from __future__ import annotations
import os
import json
import logging
import sys
from datetime import datetime, timedelta
from typing import List, Dict, Optional, Tuple, Set
from dataclasses import dataclass, field, asdict
from collections import defaultdict
import pandas as pd
import numpy as np
import re

# Import config loader
try:
    from config_loader import (
        load_config,
        create_user_profile_from_config,
        create_weights_from_config,
        get_input_output_paths,
        get_incremental_processing_config,
        get_company_reputation_config,
        get_skills_scoring_config,
        get_location_scoring_config,
        get_salary_scoring_config,
        get_deadline_urgency_config,
        get_logging_config
    )
    CONFIG_LOADER_AVAILABLE = True
except ImportError:
    print("Warning: config_loader.py not found. Using defaults.")
    CONFIG_LOADER_AVAILABLE = False

# ===================================================================
# GLOBAL CONFIG INITIALIZATION
# ===================================================================

if CONFIG_LOADER_AVAILABLE:
    try:
        config = load_config("config.json")
        user_profile_data = create_user_profile_from_config(config)
        weights_data = create_weights_from_config(config)
        paths_config = get_input_output_paths(config)
        incremental_config = get_incremental_processing_config(config)
        company_rep_config = get_company_reputation_config(config)
        skills_config = get_skills_scoring_config(config)
        location_config = get_location_scoring_config(config)
        salary_config = get_salary_scoring_config(config)
        deadline_config = get_deadline_urgency_config(config)
        logging_config = get_logging_config(config)
        CONFIG_LOADED = True
    except Exception as e:
        print(f"Warning: Could not load config.json: {e}")
        CONFIG_LOADED = False
else:
    CONFIG_LOADED = False

# Default configuration if config not loaded
if not CONFIG_LOADED:
    config = {}
    user_profile_data = {
        'user_id': 'USER_001',
        'name': 'Sample User',
        'skills': ['python', 'machine learning', 'sql'],
        'primary_skills': ['python', 'machine learning'],
        'education': ['B.Tech'],
        'experience_years': 0.0,
        'preferred_locations': ['Bangalore', 'Remote'],
        'preferred_companies': [],
        'preferred_work_mode': 'Remote',
        'min_expected_salary': 300000,
        'max_expected_salary': 800000,
        'must_have_skills': ['python'],
        'avoid_keywords': []
    }
    weights_data = {
        'skills_match_weight': 0.30,
        'experience_match_weight': 0.05,
        'education_match_weight': 0.05,
        'location_match_weight': 0.15,
        'completeness_weight': 0.05,
        'salary_competitiveness_weight': 0.15,
        'company_reputation_weight': 0.10,
        'deadline_urgency_weight': 0.10,
        'posting_freshness_weight': 0.05,
        'preference_bonus_weight': 0.10
    }
    paths_config = {
        'input_file': '../Phase 3/structured_job_postings.json',
        'output_csv': 'prioritized_jobs.csv',
        'top_recommendations_csv': 'top_recommendations.csv',
        'top_n': 20
    }
    incremental_config = {
        'enabled': False,
        'state_directory': 'state',
        'state_file': 'prioritized_job_ids.txt',
        'force_full_reprocess': False
    }
    company_rep_config = {
        'tier_scores': {'faang': 1.0, 'unicorn': 0.95, 'mnc': 0.85, 'unknown': 0.5},
        'faang_companies': {'google', 'microsoft', 'amazon', 'apple', 'meta', 'facebook', 'netflix'},
        'unicorn_companies': {'flipkart', 'swiggy', 'zomato', 'paytm'},
        'mnc_companies': {'ibm', 'oracle', 'sap', 'cisco', 'intel', 'nvidia', 'adobe'}
    }
    location_config = {
        'exact_match_score': 1.0,
        'remote_score': 1.0,
        'tier1_cities_score': 0.8,
        'tier2_cities_score': 0.6,
        'other_cities_score': 0.4,
        'tier1_cities': {'bangalore', 'mumbai', 'delhi', 'hyderabad', 'pune', 'chennai'},
        'tier2_cities': {'kolkata', 'ahmedabad', 'gurgaon', 'noida'}
    }
    salary_config = {
        'ideal_salary_lpa': 8.0,
        'min_acceptable_lpa': 3.0,
        'max_expected_lpa': 15.0,
        'below_min_penalty': 0.5,
        'above_max_bonus': 0.2,
        'missing_salary_score': 0.5
    }
    deadline_config = {
        'days_thresholds': {'urgent': 3, 'soon': 7, 'moderate': 14, 'relaxed': 30},
        'urgency_scores': {'urgent': 1.0, 'soon': 0.9, 'moderate': 0.7, 'relaxed': 0.5, 'no_deadline': 0.3},
        'expired_penalty': 0.0
    }
    logging_config = {'level': 'INFO', 'file': 'job_prioritization.log'}

# ===================================================================
# LOGGING CONFIGURATION
# ===================================================================

logging.basicConfig(
    level=getattr(logging, logging_config['level']),
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler(logging_config['file'], encoding='utf-8'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger("JobPrioritization")

# ===================================================================
# USER PROFILE DATA CLASS
# ===================================================================

@dataclass
class UserProfile:
    """User profile containing skills, preferences, and requirements."""
    
    user_id: str
    name: str
    skills: List[str] = field(default_factory=list)
    primary_skills: List[str] = field(default_factory=list)
    education: List[str] = field(default_factory=list)
    experience_years: float = 0.0
    preferred_locations: List[str] = field(default_factory=list)
    preferred_companies: List[str] = field(default_factory=list)
    preferred_work_mode: str = "Any"
    min_expected_salary: float = 0.0
    max_expected_salary: float = float('inf')
    must_have_skills: List[str] = field(default_factory=list)
    avoid_keywords: List[str] = field(default_factory=list)
    
    @classmethod
    def from_config(cls, config: Dict) -> 'UserProfile':
        """Create UserProfile from config dictionary."""
        if CONFIG_LOADER_AVAILABLE:
            user_data = create_user_profile_from_config(config)
        else:
            user_data = user_profile_data
        return cls(**user_data)
    
    @classmethod
    def create_sample_profile(cls) -> 'UserProfile':
        """Create a sample user profile for testing."""
        return cls(**user_profile_data)

# ===================================================================
# PRIORITIZATION WEIGHTS CONFIGURATION
# ===================================================================

@dataclass
class PrioritizationWeights:
    """Configurable weights for job prioritization scoring."""
    
    skills_match_weight: float = 0.30
    experience_match_weight: float = 0.05
    education_match_weight: float = 0.05
    location_match_weight: float = 0.15
    completeness_weight: float = 0.05
    salary_competitiveness_weight: float = 0.15
    company_reputation_weight: float = 0.10
    deadline_urgency_weight: float = 0.10
    posting_freshness_weight: float = 0.05
    preference_bonus_weight: float = 0.10
    
    @classmethod
    def from_config(cls, config: Dict) -> 'PrioritizationWeights':
        """Create PrioritizationWeights from config dictionary."""
        return cls(**weights_data)
    
    @classmethod
    def get_default(cls) -> 'PrioritizationWeights':
        """Get default weights."""
        return cls()
    
    def to_dict(self) -> Dict[str, float]:
        """Convert to dictionary."""
        return asdict(self)

# ===================================================================
# INCREMENTAL PROCESSING UTILITIES
# ===================================================================

def load_processed_job_ids(state_file: str) -> set:
    """Load already processed job IDs from state file."""
    if not os.path.exists(state_file):
        return set()
    
    with open(state_file, 'r', encoding='utf-8') as f:
        return set(line.strip() for line in f if line.strip())

def save_processed_job_ids(state_file: str, job_ids: set):
    """Save processed job IDs to state file."""
    os.makedirs(os.path.dirname(state_file), exist_ok=True)
    with open(state_file, 'w', encoding='utf-8') as f:
        for job_id in sorted(job_ids):
            f.write(f"{job_id}\n")

def get_new_jobs_to_process(jobs_df: pd.DataFrame, state_file: str, force_reprocess: bool = False):
    """
    Filter jobs to only process new ones.
    
    Args:
        jobs_df: DataFrame with all jobs
        state_file: Path to state file
        force_reprocess: If True, process all jobs
        
    Returns:
        Tuple of (new_jobs_df, processed_ids)
    """
    if force_reprocess:
        logger.info("Force reprocess enabled - processing all jobs")
        return jobs_df, set()
    
    processed_ids = load_processed_job_ids(state_file)
    
    if not processed_ids:
        logger.info("No previous state found - processing all jobs")
        return jobs_df, set()
    
    # Assume job_id column exists
    if 'job_id' not in jobs_df.columns:
        logger.warning("No job_id column found - processing all jobs")
        return jobs_df, set()
    
    new_jobs = jobs_df[~jobs_df['job_id'].isin(processed_ids)]
    
    logger.info(f"Total jobs: {len(jobs_df)}")
    logger.info(f"Already processed: {len(processed_ids)}")
    logger.info(f"New jobs to process: {len(new_jobs)}")
    
    return new_jobs, processed_ids

# ===================================================================
# JOB SCORER BASE CLASS
# ===================================================================

class JobScorer:
    """Base class for calculating individual scoring components."""
    
    def __init__(self, user_profile: UserProfile):
        """Initialize scorer with user profile."""
        self.user = user_profile
        self.logger = logging.getLogger("JobScorer")
    
    def calculate_skills_match(self, job: pd.Series) -> float:
        """Calculate skills match score."""
        try:
            job_skills_str = str(job.get('skills', '')).lower()
            if not job_skills_str or job_skills_str == 'nan':
                return 0.0
            
            # Count matching skills
            matches = sum(1 for skill in self.user.primary_skills 
                         if skill.lower() in job_skills_str)
            
            if len(self.user.primary_skills) == 0:
                return 0.5
            
            return min(matches / len(self.user.primary_skills), 1.0)
        except:
            return 0.0
    
    def calculate_experience_match(self, job: pd.Series) -> float:
        """Calculate experience match score."""
        try:
            exp_str = str(job.get('experience_required', '')).lower()
            
            if 'fresher' in exp_str or 'entry' in exp_str:
                return 1.0 if self.user.experience_years <= 2 else 0.8
            
            # Extract years
            import re
            years = re.findall(r'(\d+)', exp_str)
            if years:
                required_exp = float(years[0])
                if self.user.experience_years >= required_exp:
                    return 1.0
                elif self.user.experience_years >= required_exp * 0.7:
                    return 0.8
                else:
                    return 0.5
            
            return 0.7
        except:
            return 0.7
    
    def calculate_education_match(self, job: pd.Series) -> float:
        """Calculate education match score."""
        try:
            edu_str = str(job.get('education_required', '')).lower()
            
            if not edu_str or edu_str == 'nan':
                return 0.8
            
            for user_edu in self.user.education:
                if user_edu.lower() in edu_str:
                    return 1.0
            
            return 0.6
        except:
            return 0.8
    
    def calculate_completeness_score(self, job: pd.Series) -> float:
        """Calculate data completeness score."""
        try:
            required_fields = ['company_name', 'position', 'skills']
            important_fields = ['location_city', 'salary_range', 'experience_required']
            
            required_score = sum(1 for f in required_fields if pd.notna(job.get(f))) / len(required_fields)
            important_score = sum(1 for f in important_fields if pd.notna(job.get(f))) / len(important_fields)
            
            return 0.6 * required_score + 0.4 * important_score
        except:
            return 0.5
    
    def calculate_posting_freshness(self, job: pd.Series) -> float:
        """Calculate posting freshness score."""
        try:
            timestamp_str = str(job.get('extraction_timestamp', ''))
            
            if not timestamp_str or timestamp_str.lower() == 'nan':
                return 0.6
            
            posting_time = datetime.fromisoformat(timestamp_str.replace('Z', '+00:00'))
            posting_time_naive = posting_time.replace(tzinfo=None)
            now_naive = datetime.now().replace(tzinfo=None)
            days_old = (now_naive - posting_time_naive).days
            
            if days_old <= 3:
                return 1.0
            elif days_old <= 7:
                return 0.8
            elif days_old <= 14:
                return 0.6
            elif days_old <= 30:
                return 0.4
            else:
                return 0.2
        except:
            return 0.6
    
    def calculate_preference_bonus(self, job: pd.Series) -> float:
        """Calculate preference bonus score."""
        try:
            bonus = 0.0
            
            company_name = str(job.get('company_name', ''))
            if company_name in self.user.preferred_companies:
                bonus += 0.4
            
            location = str(job.get('location_city', ''))
            if location in self.user.preferred_locations:
                bonus += 0.3
            
            work_mode = str(job.get('work_mode', ''))
            if work_mode == self.user.preferred_work_mode or self.user.preferred_work_mode == 'Any':
                bonus += 0.3
            
            return min(bonus, 1.0)
        except:
            return 0.0

# Continue in next message due to length...

# ===================================================================
# CONFIGURABLE JOB SCORER (ENHANCED WITH CONFIG)
# ===================================================================

class ConfigurableJobScorer(JobScorer):
    """Enhanced JobScorer that uses configuration for scoring logic."""
    
    def __init__(self, user_profile: UserProfile, config_dict: Dict):
        super().__init__(user_profile)
        self.config = config_dict
        self.company_rep_config = company_rep_config
        self.location_config = location_config
        self.salary_config = salary_config
        self.deadline_config = deadline_config
    
    def calculate_company_reputation(self, job: pd.Series) -> float:
        """Calculate company reputation using config-based company lists."""
        try:
            company_name = str(job.get('company_name', '')).lower().strip()
            
            if not company_name or company_name == 'nan':
                return self.company_rep_config['tier_scores'].get('unknown', 0.5)
            
            # Check FAANG
            if any(faang in company_name for faang in self.company_rep_config['faang_companies']):
                return self.company_rep_config['tier_scores'].get('faang', 1.0)
            
            # Check Unicorns
            if any(unicorn in company_name for unicorn in self.company_rep_config['unicorn_companies']):
                return self.company_rep_config['tier_scores'].get('unicorn', 0.95)
            
            # Check MNCs
            if any(mnc in company_name for mnc in self.company_rep_config['mnc_companies']):
                return self.company_rep_config['tier_scores'].get('mnc', 0.85)
            
            return self.company_rep_config['tier_scores'].get('unknown', 0.5)
            
        except Exception as e:
            self.logger.error(f"Error calculating company reputation: {e}")
            return 0.5
    
    def calculate_location_match(self, job: pd.Series) -> float:
        """Calculate location match using config-based tier scoring."""
        try:
            location = str(job.get('location_city', '')).lower().strip()
            
            if not location or location == 'nan':
                return 0.5
            
            # Check if it's a preferred location
            if location in [loc.lower() for loc in self.user.preferred_locations]:
                return self.location_config['exact_match_score']
            
            # Check for remote
            if 'remote' in location or 'wfh' in location:
                return self.location_config['remote_score']
            
            # Check tier 1 cities
            if location in self.location_config['tier1_cities']:
                return self.location_config['tier1_cities_score']
            
            # Check tier 2 cities
            if location in self.location_config['tier2_cities']:
                return self.location_config['tier2_cities_score']
            
            return self.location_config['other_cities_score']
            
        except Exception as e:
            self.logger.error(f"Error calculating location match: {e}")
            return 0.5
    
    def calculate_salary_competitiveness(self, job: pd.Series) -> float:
        """Calculate salary competitiveness using config-based thresholds."""
        try:
            salary_str = str(job.get('salary_range', ''))
            
            if not salary_str or salary_str == 'nan':
                return self.salary_config['missing_salary_score']
            
            # Extract salary (assuming LPA format)
            salary_match = re.search(r'(\d+(?:\.\d+)?)', salary_str)
            
            if not salary_match:
                return self.salary_config['missing_salary_score']
            
            salary_lpa = float(salary_match.group(1))
            
            ideal = self.salary_config['ideal_salary_lpa']
            min_acceptable = self.salary_config['min_acceptable_lpa']
            max_expected = self.salary_config['max_expected_lpa']
            
            if salary_lpa < min_acceptable:
                return self.salary_config['below_min_penalty']
            elif salary_lpa > max_expected:
                return min(1.0 + self.salary_config['above_max_bonus'], 1.0)
            else:
                # Linear interpolation
                if salary_lpa <= ideal:
                    return 0.5 + 0.5 * (salary_lpa - min_acceptable) / (ideal - min_acceptable)
                else:
                    return 1.0
            
        except Exception as e:
            self.logger.error(f"Error calculating salary competitiveness: {e}")
            return 0.5
    
    def calculate_deadline_urgency(self, job: pd.Series) -> float:
        """Calculate deadline urgency using config-based thresholds."""
        try:
            deadline_str = str(job.get('application_deadline', ''))
            
            if not deadline_str or deadline_str == 'nan' or deadline_str == 'None':
                return self.deadline_config['urgency_scores'].get('no_deadline', 0.3)
            
            try:
                deadline = datetime.strptime(deadline_str, '%Y-%m-%d')
                days_remaining = (deadline - datetime.now()).days
                
                if days_remaining < 0:
                    return self.deadline_config['expired_penalty']
                
                thresholds = self.deadline_config['days_thresholds']
                scores = self.deadline_config['urgency_scores']
                
                if days_remaining <= thresholds.get('urgent', 3):
                    return scores.get('urgent', 1.0)
                elif days_remaining <= thresholds.get('soon', 7):
                    return scores.get('soon', 0.9)
                elif days_remaining <= thresholds.get('moderate', 14):
                    return scores.get('moderate', 0.7)
                elif days_remaining <= thresholds.get('relaxed', 30):
                    return scores.get('relaxed', 0.5)
                else:
                    return scores.get('no_deadline', 0.3)
                    
            except:
                return self.deadline_config['urgency_scores'].get('no_deadline', 0.3)
                
        except Exception as e:
            self.logger.error(f"Error calculating deadline urgency: {e}")
            return 0.5

# ===================================================================
# SMART PRIORITIZATION ENGINE
# ===================================================================

class SmartPrioritizationEngine:
    """Main prioritization engine that orchestrates scoring and ranking."""
    
    def __init__(self, user_profile: UserProfile, weights: Optional[PrioritizationWeights] = None):
        """Initialize prioritization engine."""
        self.user_profile = user_profile
        self.weights = weights or PrioritizationWeights.get_default()
        self.scorer = ConfigurableJobScorer(user_profile, config)
        self.logger = logging.getLogger("PrioritizationEngine")
        
        self.logger.info("="*70)
        self.logger.info("SMART PRIORITIZATION ENGINE INITIALIZED")
        self.logger.info("="*70)
        self.logger.info(f"User: {user_profile.name} ({user_profile.user_id})")
        self.logger.info(f"Skills: {', '.join(user_profile.primary_skills[:5])}")
        self.logger.info(f"Experience: {user_profile.experience_years} years")
        self.logger.info("="*70 + "\n")
    
    def calculate_job_priority(self, job: pd.Series) -> Dict[str, float]:
        """Calculate comprehensive priority score for a job."""
        try:
            # Calculate all scoring components
            scores = {
                'skills_match': self.scorer.calculate_skills_match(job),
                'experience_match': self.scorer.calculate_experience_match(job),
                'education_match': self.scorer.calculate_education_match(job),
                'location_match': self.scorer.calculate_location_match(job),
                'completeness': self.scorer.calculate_completeness_score(job),
                'salary_competitiveness': self.scorer.calculate_salary_competitiveness(job),
                'company_reputation': self.scorer.calculate_company_reputation(job),
                'deadline_urgency': self.scorer.calculate_deadline_urgency(job),
                'posting_freshness': self.scorer.calculate_posting_freshness(job),
                'preference_bonus': self.scorer.calculate_preference_bonus(job)
            }
            
            # Calculate weighted total (0-1 scale)
            final_score = (
                scores['skills_match'] * self.weights.skills_match_weight +
                scores['experience_match'] * self.weights.experience_match_weight +
                scores['education_match'] * self.weights.education_match_weight +
                scores['location_match'] * self.weights.location_match_weight +
                scores['completeness'] * self.weights.completeness_weight +
                scores['salary_competitiveness'] * self.weights.salary_competitiveness_weight +
                scores['company_reputation'] * self.weights.company_reputation_weight +
                scores['deadline_urgency'] * self.weights.deadline_urgency_weight +
                scores['posting_freshness'] * self.weights.posting_freshness_weight +
                scores['preference_bonus'] * self.weights.preference_bonus_weight
            )
            
            # Scale to 0-100
            scores['final_priority_score'] = final_score * 100
            scores['priority_tier'] = self._get_priority_tier(scores['final_priority_score'])
            
            return scores
            
        except Exception as e:
            self.logger.error(f"Error calculating job priority: {e}")
            return {'final_priority_score': 0.0, 'priority_tier': 'Error'}
    
    def _get_priority_tier(self, score: float) -> str:
        """Get priority tier based on score."""
        if score >= 80:
            return "Highly Recommended"
        elif score >= 65:
            return "Recommended"
        elif score >= 50:
            return "Consider"
        else:
            return "Not Recommended"
    
    def prioritize_jobs(self, jobs_df: pd.DataFrame, save_output: bool = True, 
                       output_path: str = "prioritized_jobs.csv") -> pd.DataFrame:
        """Prioritize all jobs in the DataFrame."""
        self.logger.info(f"Prioritizing {len(jobs_df)} job postings...")
        
        # Calculate scores for each job
        results = []
        for idx, job in jobs_df.iterrows():
            scores = self.calculate_job_priority(job)
            result = job.to_dict()
            result.update(scores)
            results.append(result)
            
            if (idx + 1) % 50 == 0:
                self.logger.info(f"   Processed {idx + 1}/{len(jobs_df)} jobs")
        
        # Create result DataFrame
        result_df = pd.DataFrame(results)
        result_df = result_df.sort_values('final_priority_score', ascending=False)
        
        # Save if requested
        if save_output:
            result_df.to_csv(output_path, index=False)
            self.logger.info(f"Saved prioritized jobs to: {output_path}")
        
        return result_df
    
    def get_recommended_jobs(self, df: pd.DataFrame, min_score: float = 65.0, 
                            max_results: int = 20) -> pd.DataFrame:
        """Get top recommended jobs."""
        recommended = df[df['final_priority_score'] >= min_score]
        return recommended.head(max_results)

# ===================================================================
# MAIN EXECUTION WITH CONFIG
# ===================================================================

def main_with_config():
    """Main execution function using config.json settings."""
    logger.info("\n" + "="*70)
    logger.info("STARTING JOB PRIORITIZATION WITH CONFIG")
    logger.info("="*70 + "\n")
    
    try:
        # Get input file path
        input_file = paths_config['input_file']
        
        # Convert to absolute path if relative
        if not os.path.isabs(input_file):
            current_dir = os.getcwd()
            input_file = os.path.join(current_dir, input_file)
        
        logger.info(f"Loading jobs from: {input_file}")
        
        # Load jobs (support both CSV and JSON)
        if input_file.endswith('.json'):
            with open(input_file, 'r', encoding='utf-8') as f:
                jobs_data = json.load(f)
            jobs_df = pd.DataFrame(jobs_data)
        else:
            jobs_df = pd.read_csv(input_file)
        
        logger.info(f"Loaded {len(jobs_df)} job postings\n")
        
        # Incremental processing
        if incremental_config['enabled']:
            state_dir = incremental_config['state_directory']
            state_file = os.path.join(state_dir, incremental_config['state_file'])
            
            new_jobs_df, processed_ids = get_new_jobs_to_process(
                jobs_df,
                state_file,
                incremental_config['force_full_reprocess']
            )
            
            if len(new_jobs_df) == 0:
                logger.info("No new jobs to process!")
                return None, None
            
            jobs_to_process = new_jobs_df
        else:
            jobs_to_process = jobs_df
            processed_ids = set()
        
        # Create user profile from config
        user_profile = UserProfile(**user_profile_data)
        
        # Create weights from config
        weights = PrioritizationWeights(**weights_data)
        
        # Initialize engine
        engine = SmartPrioritizationEngine(
            user_profile=user_profile,
            weights=weights
        )
        
        # Prioritize jobs
        prioritized_df = engine.prioritize_jobs(
            jobs_df=jobs_to_process,
            save_output=True,
            output_path=paths_config['output_csv']
        )
        
        # If incremental, merge with existing results
        if incremental_config['enabled'] and len(processed_ids) > 0:
            existing_file = paths_config['output_csv']
            if os.path.exists(existing_file):
                existing_df = pd.read_csv(existing_file)
                prioritized_df = pd.concat([existing_df, prioritized_df], ignore_index=True)
                prioritized_df = prioritized_df.sort_values('final_priority_score', ascending=False)
                prioritized_df.to_csv(existing_file, index=False)
                logger.info(f"Merged with existing results")
        
        # Update state file
        if incremental_config['enabled'] and 'job_id' in prioritized_df.columns:
            new_processed_ids = set(prioritized_df['job_id'].astype(str))
            all_processed_ids = processed_ids.union(new_processed_ids)
            save_processed_job_ids(state_file, all_processed_ids)
            logger.info(f"Updated state file: {len(all_processed_ids)} total processed jobs")
        
        # Get top recommendations
        top_n = paths_config['top_n']
        recommendations = engine.get_recommended_jobs(
            df=prioritized_df,
            min_score=0.0,
            max_results=top_n
        )
        
        # Save recommendations
        recommendations.to_csv(paths_config['top_recommendations_csv'], index=False)
        logger.info(f"Saved top {len(recommendations)} recommendations\n")
        
        logger.info("="*70)
        logger.info("PRIORITIZATION COMPLETED")
        logger.info("="*70)
        
        return prioritized_df, engine
        
    except Exception as e:
        logger.error(f"PIPELINE FAILED: {e}")
        import traceback
        traceback.print_exc()
        raise

# ===================================================================
# ENTRY POINT
# ===================================================================

if __name__ == "__main__":
    print("\n" + "="*70)
    print("PHASE 4: JOB PRIORITIZATION ENGINE")
    print("="*70)
    print(f"Config loaded: {CONFIG_LOADED}")
    print(f"Config loader available: {CONFIG_LOADER_AVAILABLE}")
    print("="*70 + "\n")
    
    # Run the pipeline
    prioritized_df, engine = main_with_config()
    
    # Display top 10 results
    if prioritized_df is not None:
        print("\n" + "="*70)
        print("TOP 10 RECOMMENDATIONS")
        print("="*70 + "\n")
        
        top_10 = prioritized_df.head(10)
        
        for idx, (_, job) in enumerate(top_10.iterrows(), 1):
            print(f"#{idx} | Score: {job['final_priority_score']:.1f}/100 | {job.get('priority_tier', 'N/A')}")
            print(f"   Position: {job.get('position', 'N/A')}")
            print(f"   Company: {job.get('company_name', 'N/A')}")
            print(f"   Location: {job.get('location_city', 'N/A')}")
            
            # Display skills (truncated)
            skills = str(job.get('skills', 'N/A'))
            if len(skills) > 60:
                skills = skills[:60] + "..."
            print(f"   Skills: {skills}")
            
            # Display salary if available
            if 'salary_range' in job and pd.notna(job['salary_range']):
                print(f"   Salary: {job['salary_range']}")
            
            print()
        
        print("="*70)
        print(f"Total jobs processed: {len(prioritized_df)}")
        print(f"Output saved to: {paths_config['output_csv']}")
        print(f"Top recommendations saved to: {paths_config['top_recommendations_csv']}")
        print("="*70)
