In [26]:
import pandas as pd
import random

# Set random seed for reproducibility
random.seed(42)

panelists = [f'Panelist {i}' for i in range(1, 26)]
roles = ['Software Engineer', 'Data Scientist']
progress_stages = ['Applied', 'Screening', 'Online Assessment', 'Technical Interview', 'Managerial Interview', 'Offer']
interview_stages = ['Technical Interview', 'Managerial Interview']

sources_list = ['LinkedIn', 'Naukri', 'Referral', 'Internal', 'Other']

# Realistic stage rejection rates by source
stage_reject_rates = {
    'Screening':           {'Referral': 0.05, 'Internal': 0.05, 'LinkedIn': 0.07, 'Naukri': 0.08, 'Other': 0.10},
    'Online Assessment':   {'Referral': 0.09, 'Internal': 0.08, 'LinkedIn': 0.11, 'Naukri': 0.12, 'Other': 0.17},
    'Technical Interview': {'Referral': 0.16, 'Internal': 0.14, 'LinkedIn': 0.21, 'Naukri': 0.22, 'Other': 0.27},
    'Managerial Interview':{'Referral': 0.12, 'Internal': 0.13, 'LinkedIn': 0.19, 'Naukri': 0.18, 'Other': 0.25}
}

# Offer acceptance rates by source
offer_accept_rates = {'Referral': 0.80, 'Internal': 0.80, 'LinkedIn': 0.70, 'Naukri': 0.70, 'Other': 0.65}

reasons_for_rejection = [
    'Poor skill match', 'Insufficient experience', 'Code test failed', 'Cultural misfit',
    'No show/interview abandoned', 'Role withdrawn'
]
offer_rejection_reasons = [
    'Compensation mismatch', 'Offer declined - better opportunity', 'Delays in process', 'Cultural misfit'
]

hiring_rows = []
feedback_rows = []
funnel_log = []
candidate_id = 1
panelist_load = {name: 0 for name in panelists}

for role in roles:
    for _ in range(250):
        candidate_name = f'Candidate {candidate_id}'
        source = random.choice(sources_list)
        source_score_bias = 3 if source in ['Referral', 'Internal'] else 0
        salary_expectation = random.randint(1200000, 3500000)
        resume_score = min(10, max(2, random.gauss(7 + source_score_bias, 2)))
        application_date = pd.Timestamp('2025-09-01') + pd.Timedelta(days=random.randint(0, 70))
        current_date = application_date
        stage_history = []
        current_stage = 'Applied'
        status = 'In Process'
        rejection_reason = ''
        final_interviewer = ''
        final_feedback = ''
        fatigue_panelists = set()
        progressed = True

        for idx, next_stage in enumerate(progress_stages[1:], start=1):
            stage_status = 'In Process'
            panelist_fatigue_flag = 'No'
            interviewer = None
            comm_score = None
            tech_score = None
            feedback_text = ''

            # Assign interviewer and check load for interview stages
            if next_stage in interview_stages:
                interviewer = random.choice(panelists)
                panelist_load[interviewer] += 1
                panelist_fatigue_flag = 'Yes' if panelist_load[interviewer] > 20 else 'No'
                if panelist_fatigue_flag == 'Yes':
                    fatigue_panelists.add(interviewer)
                comm_score = min(10, max(3, random.gauss(resume_score, 2)))
                tech_score = min(10, max(3, random.gauss(resume_score, 2)))
                feedback_text = random.choice([
                    f"Strong problem-solving, explained {random.choice(['DP', 'system design', 'API design'])} well.",
                    f"Needs improvement in {random.choice(['communication', 'code clarity', 'time complexity'])}.",
                    "Great cultural fit, asked thoughtful questions.",
                    "Struggled with edge cases in coding round.",
                    "Excellent domain knowledge in ML pipelines."
                ])
                feedback_rows.append({
                    'CandidateName': candidate_name,
                    'AppliedRole': role,
                    'InterviewStage': next_stage,
                    'Interviewer': interviewer,
                    'InterviewDate': current_date.strftime('%Y-%m-%d'),
                    'CommunicationScore': int(comm_score),
                    'TechnicalScore': int(tech_score),
                    'FeedbackText': feedback_text,
                    'Recommended': random.choice(['Yes', 'No', 'Maybe']),
                    'PanelistFatigueFlag': panelist_fatigue_flag
                })
                final_interviewer = interviewer
                final_feedback = feedback_text

            # Progress through stages with realistic reject probabilities
            reject_prob = stage_reject_rates.get(next_stage, {}).get(source, 0.10) + random.uniform(-0.02, 0.02)
            reject_prob = max(0.03, min(0.35, reject_prob))

            if random.random() < reject_prob and current_stage != 'Offer':
                status = 'Rejected'
                stage_status = 'Rejected'
                rejection_reason = random.choice(reasons_for_rejection)
                progressed = False
                # Log this stage in the funnel
                stage_history.append({
                    'CandidateName': candidate_name,
                    'AppliedRole': role,
                    'FunnelStage': next_stage,
                    'StageOrder': idx,
                    'Status': stage_status,
                    'InterviewDate': current_date.strftime('%Y-%m-%d'),
                    'Panelist': interviewer if interviewer else '',
                    'PanelistFatigueFlag': panelist_fatigue_flag,
                    'FeedbackText': feedback_text,
                    'RejectionReason': rejection_reason,
                    'ResumeScore': int(resume_score)
                })
                break
            else:
                days_to_progress = random.randint(1, 7)
                current_date += pd.Timedelta(days=days_to_progress)
                current_stage = next_stage
                # Log successful stage
                stage_history.append({
                    'CandidateName': candidate_name,
                    'AppliedRole': role,
                    'FunnelStage': next_stage,
                    'StageOrder': idx,
                    'Status': 'Passed',
                    'InterviewDate': current_date.strftime('%Y-%m-%d'),
                    'Panelist': interviewer if interviewer else '',
                    'PanelistFatigueFlag': panelist_fatigue_flag,
                    'FeedbackText': feedback_text,
                    'RejectionReason': '',
                    'ResumeScore': int(resume_score)
                })

        # Offer stage
        if progressed and current_stage == 'Offer':
            accept_prob = offer_accept_rates[source]
            status = 'Accepted' if random.random() < accept_prob else 'Rejected'
            if status == 'Rejected':
                rejection_reason = random.choice(offer_rejection_reasons)
                feedback_rows.append({
                    'CandidateName': candidate_name,
                    'AppliedRole': role,
                    'InterviewStage': 'Offer',
                    'Interviewer': 'HR Team',
                    'InterviewDate': current_date.strftime('%Y-%m-%d'),
                    'CommunicationScore': None,
                    'TechnicalScore': None,
                    'FeedbackText': f"Declined due to {rejection_reason.lower()}.",
                    'Recommended': 'N/A',
                    'PanelistFatigueFlag': 'No'
                })
                stage_history.append({
                    'CandidateName': candidate_name,
                    'AppliedRole': role,
                    'FunnelStage': 'Offer',
                    'StageOrder': len(progress_stages),
                    'Status': 'Rejected',
                    'InterviewDate': current_date.strftime('%Y-%m-%d'),
                    'Panelist': 'HR Team',
                    'PanelistFatigueFlag': 'No',
                    'FeedbackText': f"Declined due to {rejection_reason.lower()}.",
                    'RejectionReason': rejection_reason,
                    'ResumeScore': int(resume_score)
                })
            else:
                stage_history.append({
                    'CandidateName': candidate_name,
                    'AppliedRole': role,
                    'FunnelStage': 'Offer',
                    'StageOrder': len(progress_stages),
                    'Status': 'Accepted',
                    'InterviewDate': current_date.strftime('%Y-%m-%d'),
                    'Panelist': 'HR Team',
                    'PanelistFatigueFlag': 'No',
                    'FeedbackText': 'Offer accepted.',
                    'RejectionReason': '',
                    'ResumeScore': int(resume_score)
                })

        # Avoid invalid no-show
        if rejection_reason == 'No show/interview abandoned' and current_stage not in interview_stages:
            rejection_reason = random.choice([r for r in reasons_for_rejection if r != 'No show/interview abandoned'])

        days_in_pipeline = (current_date - application_date).days
        hiring_rows.append({
            'CandidateName': candidate_name,
            'AppliedRole': role,
            'InterviewStage': current_stage,
            'Status': status,
            'Feedback': final_feedback,
            'Interviewer': final_interviewer,
            'RejectionReason': rejection_reason,
            'InterviewDate': current_date.strftime('%Y-%m-%d'),
            'ApplicationDate': application_date.strftime('%Y-%m-%d'),
            'DaysInPipeline': days_in_pipeline,
            'Source': source,
            'SalaryExpectation': salary_expectation,
            'ResumeScore': int(resume_score)
        })
        candidate_id += 1

        funnel_log.extend(stage_history)

# Save all files
hiring_df = pd.DataFrame(hiring_rows)
feedback_df = pd.DataFrame(feedback_rows)
funnel_df = pd.DataFrame(funnel_log)

hiring_df.to_csv('hiring_tracker_sample.csv', index=False)
feedback_df.to_csv('feedback_logs_sample.csv', index=False)
funnel_df.to_csv('funnel_logs_sample.csv', index=False)

print("Generated realistic hiring funnel data:")
print("  → hiring_tracker_sample.csv  (per candidate outcome)")
print("  → feedback_logs_sample.csv   (interview feedback)")
print("  → funnel_logs_sample.csv     (stage-by-stage, fatigue, rejection, scores)")




Generated realistic hiring funnel data:
  → hiring_tracker_sample.csv  (per candidate outcome)
  → feedback_logs_sample.csv   (interview feedback)
  → funnel_logs_sample.csv     (stage-by-stage, fatigue, rejection, scores)


In [14]:
pwd

'/Users/vj/Learn/aiml/IK/AgenticAI/AgenticAI For Tech Leaders/Capstone-2-HiringInsights'