# Job Search Analytics
This notebook analyzes job application data from Notion to provide insights into job search patterns, 
application success rates, and conversion funnels.


## Configuration and Setup


In [None]:
# Install required packages
!python3 -m pip install -r ./required.txt


In [None]:
# Import libraries for data retrieval
import os
from dotenv import load_dotenv
from notion_client import AsyncClient
from datetime import datetime
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np


## Data Retrieval Functions


In [None]:
async def get_notion_db():
    """
    Retrieve job application data from Notion database.

    Returns:
        list: Raw results from Notion database query
    """
    load_dotenv()
    token_v2 = os.getenv("NOTION_TOKEN", None)
    database_id = os.getenv("NOTION_DB_ID", None)

    if not token_v2 or not database_id:
        raise ValueError("Missing required environment variables: NOTION_TOKEN or NOTION_DB_ID")

    # Request the database
    client = AsyncClient(auth=token_v2)
    all_results = []
    start_cursor = None

    while True:
        try:
            response = await client.databases.query(
                database_id=database_id,
                start_cursor=start_cursor,
            )
            all_results.extend(response["results"])

            if not response.get("has_more"):
                break

            start_cursor = response["next_cursor"]
        except Exception as e:
            print(f"Error querying Notion database: {e}")
            break

    return all_results

def extract_property_value(properties, property_name, property_type, default=None):
    """
    Safely extract a property value from Notion properties.

    Args:
        properties (dict): Properties dictionary from Notion
        property_name (str): Name of the property to extract
        property_type (str): Type of the property (title, multi_select, select, date)
        default: Default value if property doesn't exist

    Returns:
        The extracted property value or default
    """
    try:
        prop = properties.get(property_name, {})

        if property_type == "title" and prop.get("title"):
            return prop["title"][0]["text"]["content"]
        elif property_type == "multi_select" and prop.get("multi_select"):
            return prop["multi_select"][0]["name"]
        elif property_type == "select" and prop.get("select"):
            return prop["select"]["name"]
        elif property_type == "date" and prop.get("date"):
            return prop["date"]["start"]
        return default
    except (KeyError, IndexError):
        return default

def process_notion_data(notion_db):
    """
    Process raw Notion database results into a structured format.

    Args:
        notion_db (list): Raw results from Notion database query

    Returns:
        list: Processed data as a list of dictionaries
    """
    data = []

    for result in notion_db:
        properties = result.get("properties", {})

        data.append({
            "Name": extract_property_value(properties, "Name", "title", "Unknown"),
            "Job Category": extract_property_value(properties, "Job Category", "multi_select"),
            "Status": extract_property_value(properties, "Status", "select"),
            "Applied Date": extract_property_value(properties, "Applied", "date"),
            "End Date": extract_property_value(properties, "End Date", "date"),
            "Reject Reason": extract_property_value(properties, "Reject Reason", "multi_select")
        })

    return data


## Data Retrieval and Processing


In [None]:
# Get the notion database and process the data
async def fetch_notion_data():
    try:
        notion_db = await get_notion_db()
        return process_notion_data(notion_db)
    except Exception as e:
        print(f"Error retrieving data: {e}")
        return []

# Use asyncio to run the async function
try:
    data = await fetch_notion_data()
    print(f"Successfully retrieved {len(data)} job applications from Notion")
except Exception as e:
    print(f"Error retrieving data: {e}")
    data = []


# Analytics


In [None]:
# Create DataFrame from processed data
df = pd.DataFrame(data)

# Display basic statistics
print("Basic statistics of the dataset:")
print(df.describe())


## Visualization Functions


In [None]:
def plot_job_categories_by_reject_reason(dataframe):
    """
    Create a stacked bar chart showing job categories and their reject reasons.

    Args:
        dataframe (DataFrame): Pandas DataFrame containing job application data
    """
    # Create pivot table
    pivot = dataframe.pivot_table(
        index=["Job Category"], 
        columns=["Reject Reason"], 
        values=["Name"], 
        aggfunc="count"
    )

    # Plot the data
    fig, ax = plt.subplots(figsize=(12, 8))
    pivot.plot(kind="bar", stacked=True, ax=ax)

    # Add labels and styling
    plt.title('Reject Reason by Job Category', fontsize=14)
    plt.xlabel('Job Category', fontsize=12)
    plt.ylabel('Count', fontsize=12)
    plt.legend(title='Reject Reason')
    plt.yscale('log')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()

    # Display the pivot table
    print("Job Categories by Reject Reason:")
    print(pivot)

    return fig


## Job Categories Analysis


In [None]:
# Plot job categories by reject reason
job_categories_fig = plot_job_categories_by_reject_reason(df)
plt.show()


## Response Time Analysis


In [None]:
def calculate_response_durations(data):
    """
    Calculate the duration in days between application date and response date.

    Args:
        data (list): List of dictionaries containing job application data

    Returns:
        list: List of durations in days
        DataFrame: DataFrame with duration statistics
    """
    durations = []

    # Filter entries with both Applied Date and End Date
    valid_entries = [entry for entry in data if entry["Applied Date"] and entry["End Date"]]

    if not valid_entries:
        print("No entries with both Applied Date and End Date found")
        return [], pd.DataFrame()

    try:
        for entry in valid_entries:
            start = datetime.strptime(entry["Applied Date"], "%Y-%m-%d")
            end = datetime.strptime(entry["End Date"], "%Y-%m-%d")
            duration = (end - start).days
            durations.append(duration)

        # Create DataFrame for statistics
        durations_df = pd.DataFrame(durations, columns=["Duration"])

        return durations, durations_df
    except Exception as e:
        print(f"Error calculating response durations: {e}")
        return [], pd.DataFrame()

def plot_response_time_distribution(durations):
    """
    Plot the distribution of response times.

    Args:
        durations (list): List of durations in days

    Returns:
        Figure: Matplotlib figure object
    """
    if not durations:
        print("No duration data to plot")
        return None

    fig, ax = plt.subplots(figsize=(10, 6))

    # Create histogram with dynamic bin size
    max_duration = max(durations) if durations else 0
    bins = range(0, max_duration + 2)

    ax.hist(durations, bins=bins, color='skyblue', edgecolor='black', alpha=0.7)

    # Add labels and styling
    ax.set_title("Distribution of Response Times", fontsize=14)
    ax.set_xlabel("Days between Application and Response", fontsize=12)
    ax.set_ylabel("Number of Applications", fontsize=12)
    ax.grid(True, linestyle='--', alpha=0.7)

    # Add mean and median lines
    if durations:
        mean_duration = np.mean(durations)
        median_duration = np.median(durations)

        ax.axvline(mean_duration, color='red', linestyle='dashed', linewidth=1, 
                  label=f'Mean: {mean_duration:.1f} days')
        ax.axvline(median_duration, color='green', linestyle='dashed', linewidth=1,
                  label=f'Median: {median_duration:.1f} days')
        ax.legend()

    plt.tight_layout()
    return fig


In [None]:
# Calculate response durations
durations, durations_df = calculate_response_durations(data)

# Display statistics
if not durations_df.empty:
    print("Response Time Statistics (in days):")
    print(durations_df.describe())

# Plot response time distribution
response_time_fig = plot_response_time_distribution(durations)
if response_time_fig:
    plt.show()

## Application Rate Analysis


In [None]:
def plot_application_rate_by_category(dataframe):
    """
    Plot the application rate by job category over time.

    Args:
        dataframe (DataFrame): Pandas DataFrame containing job application data

    Returns:
        Figure: Matplotlib figure object
    """
    if dataframe.empty or "Job Category" not in dataframe.columns or "Applied Date" not in dataframe.columns:
        print("DataFrame is missing required columns for application rate analysis")
        return None

    try:
        # Group data by job category and application date
        grouped = dataframe.groupby(["Job Category", "Applied Date"]).size().unstack(fill_value=0)

        # Sort and clean up the data
        grouped = grouped.reindex(sorted(grouped.columns), axis=1)  # Sort columns by date
        grouped = grouped.reindex(sorted(grouped.index), axis=0)  # Sort index by job category
        grouped = grouped.fillna(0)  # Fill NaN values with 0

        if grouped.empty:
            print("No application rate data to plot after grouping")
            return None

        # Create the plot
        fig, ax = plt.subplots(figsize=(15, 10))
        grouped.T.plot(kind="bar", stacked=True, ax=ax, colormap='viridis')

        # Add labels and styling
        ax.set_title("Application Rate by Job Category Over Time", fontsize=16)
        ax.set_xlabel("Application Date", fontsize=14)
        ax.set_ylabel("Number of Applications", fontsize=14)
        ax.tick_params(axis='x', rotation=45)
        ax.grid(axis="y", linestyle='--', alpha=0.7)

        # Add total applications line
        if not grouped.empty:
            totals = grouped.T.sum(axis=1)
            ax2 = ax.twinx()
            totals.plot(kind='line', color='red', marker='o', ax=ax2, label='Total Applications')
            ax2.set_ylabel('Total Applications', color='red', fontsize=14)
            ax2.tick_params(axis='y', labelcolor='red')

            # Add legend for both axes
            lines1, labels1 = ax.get_legend_handles_labels()
            lines2, labels2 = ax2.get_legend_handles_labels()
            ax.legend(lines1 + lines2, labels1 + labels2, loc='upper left', fontsize=12)

        plt.tight_layout()
        return fig

    except Exception as e:
        print(f"Error plotting application rate: {e}")
        return None


In [None]:
# Plot application rate by job category
application_rate_fig = plot_application_rate_by_category(df)
if application_rate_fig:
    plt.show()

    # Print summary statistics
    if not df.empty and "Job Category" in df.columns:
        category_counts = df["Job Category"].value_counts()
        print("\nTotal Applications by Job Category:")
        print(category_counts)

## Application Funnel Analysis


In [None]:
def setup_application_funnel(dataframe):
    """
    Prepare the dataframe for funnel analysis by adding resolved stages and stage indices.

    Args:
        dataframe (DataFrame): Pandas DataFrame containing job application data

    Returns:
        DataFrame: DataFrame with added funnel analysis columns
        list: List of funnel steps
        dict: Mapping of stages to indices
    """
    if dataframe.empty:
        print("DataFrame is empty, cannot perform funnel analysis")
        return dataframe, [], {}

    # Define funnel steps and stage mapping
    funnel_steps = ["Applied", "Screening Invited", "Interview In Progress", "Offer"]
    stage_map = {
        "Applied": 0,
        "Screening Invited": 1,
        "Interview In Progress": 2,
        "Offer": 3
    }

    # Function to resolve the application stage based on status and reject reason
    def resolve_stage(row):
        if row["Status"] == "Rejected":
            reason = (row.get("Reject Reason") or "").lower()
            if "screening" in reason:
                return "Screening Invited"
            elif "technical" in reason:
                return "Interview In Progress"
            else:
                return "Applied"  # rejected early
        return row["Status"]

    # Create a copy to avoid modifying the original dataframe
    df_funnel = dataframe.copy()

    # Add resolved stage and stage index columns
    df_funnel["Resolved Stage"] = df_funnel.apply(resolve_stage, axis=1)
    df_funnel["Stage Index"] = df_funnel["Resolved Stage"].map(stage_map)

    return df_funnel, funnel_steps, stage_map

def build_funnel_counts(dataframe, funnel_steps):
    """
    Build funnel counts for the entire dataframe and by job category.

    Args:
        dataframe (DataFrame): Pandas DataFrame with funnel analysis columns
        funnel_steps (list): List of funnel steps

    Returns:
        DataFrame: DataFrame with funnel counts by job category and total
    """
    if dataframe.empty or "Stage Index" not in dataframe.columns:
        print("DataFrame is missing required columns for funnel analysis")
        return pd.DataFrame()

    # Function to count applications at each funnel step
    def count_funnel_steps(subset):
        counts = []
        for i in range(len(funnel_steps)):
            # Ensure we're working with a Series by using pd.Series if needed
            comparison = subset["Stage Index"] >= i
            if isinstance(comparison, bool):
                count = 1 if comparison else 0
            else:
                count = comparison.sum()
            counts.append(count)
        return pd.Series(counts, index=funnel_steps)

    try:
        # Calculate funnel counts by job category
        funnel_per_category = dataframe.groupby("Job Category").apply(count_funnel_steps, include_groups=False)

        # Add total row
        funnel_per_category.loc["Total"] = count_funnel_steps(dataframe)

        return funnel_per_category
    except Exception as e:
        print(f"Error building funnel counts: {e}")
        return pd.DataFrame()

def plot_total_funnel(funnel_data, funnel_steps):
    """
    Plot the total application funnel.

    Args:
        funnel_data (DataFrame): DataFrame with funnel counts
        funnel_steps (list): List of funnel steps

    Returns:
        Figure: Matplotlib figure object
    """
    if funnel_data.empty or "Total" not in funnel_data.index:
        print("Funnel data is missing the Total row")
        return None

    try:
        fig, ax = plt.subplots(figsize=(10, 6))

        # Get total funnel data
        total_funnel = funnel_data.loc["Total"]

        # Create horizontal bar chart
        bars = ax.barh(funnel_steps[::-1], total_funnel[::-1], color="skyblue", 
                      edgecolor='navy', alpha=0.8)

        # Add data labels
        for bar in bars:
            width = bar.get_width()
            ax.text(width * 1.05, bar.get_y() + bar.get_height()/2, 
                   f'{int(width)}', va='center')

        # Add labels and styling
        ax.set_title("Application Funnel (Total)", fontsize=16)
        ax.set_xlabel("Number of Applications", fontsize=14)
        ax.set_xscale("log")
        ax.grid(axis='x', linestyle='--', alpha=0.7)

        plt.tight_layout()
        return fig
    except Exception as e:
        print(f"Error plotting total funnel: {e}")
        return None

def plot_funnel_by_category(funnel_data):
    """
    Plot the application funnel by job category.

    Args:
        funnel_data (DataFrame): DataFrame with funnel counts

    Returns:
        Figure: Matplotlib figure object
    """
    if funnel_data.empty:
        print("Funnel data is empty")
        return None

    try:
        # Drop the Total row for category comparison
        category_funnel = funnel_data.drop("Total", errors='ignore')

        if category_funnel.empty:
            print("No category data available for funnel analysis")
            return None

        fig, ax = plt.subplots(figsize=(12, 8))

        # Create bar chart
        category_funnel.T.plot(kind="bar", ax=ax, colormap='viridis')

        # Add labels and styling
        ax.set_title("Application Funnel by Job Category", fontsize=16)
        ax.set_ylabel("Number of Applications", fontsize=14)
        ax.set_xlabel("Application Stage", fontsize=14)
        ax.grid(axis='y', linestyle='--', alpha=0.7)
        ax.set_yscale('log')
        ax.legend(title="Job Category", fontsize=12)

        plt.tight_layout()
        return fig
    except Exception as e:
        print(f"Error plotting funnel by category: {e}")
        return None


In [None]:
# Setup application funnel
df_funnel, funnel_steps, stage_map = setup_application_funnel(df)

# Build funnel counts
funnel_per_category = build_funnel_counts(df_funnel, funnel_steps)

# Display total funnel counts
if not funnel_per_category.empty and "Total" in funnel_per_category.index:
    print("Total Application Funnel:")
    print(funnel_per_category.loc["Total"])

# Plot total funnel
total_funnel_fig = plot_total_funnel(funnel_per_category, funnel_steps)
if total_funnel_fig:
    plt.show()


In [None]:
# Display funnel counts by category
if not funnel_per_category.empty:
    print("\nApplication Funnel by Job Category:")
    print(funnel_per_category.T)

# Plot funnel by category
category_funnel_fig = plot_funnel_by_category(funnel_per_category)
if category_funnel_fig:
    plt.show()

## Conversion Rate Analysis


In [None]:
def calculate_conversion_rates(funnel_data, funnel_steps):
    """
    Calculate conversion rates from the 'Applied' stage to later stages.

    Args:
        funnel_data (DataFrame): DataFrame with funnel counts
        funnel_steps (list): List of funnel steps

    Returns:
        dict: Dictionary of conversion rates from Applied to later stages
        dict: Dictionary of conversion rates by job category
    """
    if funnel_data.empty or "Total" not in funnel_data.index:
        print("Funnel data is missing the Total row")
        return {}, {}

    try:
        # Calculate total conversion rates
        total_funnel = funnel_data.loc["Total"]
        applied_count = total_funnel["Applied"]

        conversion_from_applied = {}
        for stage in funnel_steps[1:]:  # skip "Applied" itself
            count = total_funnel[stage]
            if applied_count > 0:
                conversion_from_applied[f"Applied → {stage}"] = round((count / applied_count) * 100, 2)
            else:
                conversion_from_applied[f"Applied → {stage}"] = 0.0

        # Calculate conversion rates by category
        conversion_from_applied_per_cat = {}

        for category, row in funnel_data.iterrows():
            if category == "Total":
                continue

            applied = row["Applied"]
            if applied <= 0:
                continue

            category_result = {}
            for stage in funnel_steps[1:]:  # skip "Applied"
                stage_count = row[stage]
                rate = round((stage_count / applied) * 100, 2)
                category_result[f"Applied → {stage}"] = rate

            conversion_from_applied_per_cat[category] = category_result

        return conversion_from_applied, conversion_from_applied_per_cat

    except Exception as e:
        print(f"Error calculating conversion rates: {e}")
        return {}, {}

def plot_total_conversion_rates(conversion_rates):
    """
    Plot the total conversion rates from Applied to later stages.

    Args:
        conversion_rates (dict): Dictionary of conversion rates

    Returns:
        Figure: Matplotlib figure object
    """
    if not conversion_rates:
        print("No conversion rate data to plot")
        return None

    try:
        fig, ax = plt.subplots(figsize=(10, 6))

        # Get stages and rates
        stages = list(conversion_rates.keys())
        rates = list(conversion_rates.values())

        # Create horizontal bar chart
        bars = ax.barh(stages[::-1], rates[::-1], color="orange", edgecolor='darkgoldenrod', alpha=0.8)

        # Add data labels
        for bar in bars:
            width = bar.get_width()
            ax.text(width + 0.5, bar.get_y() + bar.get_height()/2, 
                   f'{width}%', va='center')

        # Add labels and styling
        ax.set_title("Conversion Rates from Applied to Later Stages", fontsize=16)
        ax.set_xlabel("Conversion Rate (%)", fontsize=14)
        ax.set_xlim(0, max(rates) * 1.2)  # Dynamic limit based on data
        ax.grid(axis="x", linestyle='--', alpha=0.7)

        plt.tight_layout()
        return fig
    except Exception as e:
        print(f"Error plotting total conversion rates: {e}")
        return None

def plot_conversion_rates_by_category(conversion_rates_per_cat):
    """
    Plot conversion rates by job category.

    Args:
        conversion_rates_per_cat (dict): Dictionary of conversion rates by category

    Returns:
        Figure: Matplotlib figure object
    """
    if not conversion_rates_per_cat:
        print("No category conversion rate data to plot")
        return None

    try:
        # Convert to DataFrame for display and plotting
        conversion_df = pd.DataFrame(conversion_rates_per_cat).T

        if conversion_df.empty:
            print("Conversion DataFrame is empty")
            return None

        fig, ax = plt.subplots(figsize=(12, 8))

        # Create horizontal bar chart
        conversion_df.plot(kind="barh", ax=ax, stacked=False, colormap='tab20c')

        # Add labels and styling
        ax.set_title("Conversion Rates from Applied to Later Stages by Job Category", fontsize=16)
        ax.set_xlabel("Conversion Rate (%)", fontsize=14)

        # Set x-axis limit dynamically based on data
        max_rate = conversion_df.max().max()
        ax.set_xlim(0, max(35, max_rate * 1.2))

        ax.grid(axis="x", linestyle='--', alpha=0.7)
        ax.legend(title="Application Stage", fontsize=12)

        plt.tight_layout()
        return fig
    except Exception as e:
        print(f"Error plotting conversion rates by category: {e}")
        return None


In [None]:
# Calculate conversion rates
conversion_rates, conversion_rates_per_cat = calculate_conversion_rates(funnel_per_category, funnel_steps)

# Display total conversion rates
if conversion_rates:
    print("Conversion Rates from Applied to Later Stages:")
    for stage, rate in conversion_rates.items():
        print(f"{stage}: {rate}%")

# Plot total conversion rates
total_conversion_fig = plot_total_conversion_rates(conversion_rates)
if total_conversion_fig:
    plt.show()


In [None]:
# Display conversion rates by category
if conversion_rates_per_cat:
    print("\nConversion Rates from Applied to Later Stages by Job Category:")
    for category, rates in conversion_rates_per_cat.items():
        print(f"{category}:")
        for stage, rate in rates.items():
            print(f"  {stage}: {rate}%")

# Plot conversion rates by category
category_conversion_fig = plot_conversion_rates_by_category(conversion_rates_per_cat)
if category_conversion_fig:
    plt.show()

## Category-Specific Conversion Analysis


In [None]:
def analyze_category_group_conversion(funnel_data, funnel_steps, category_group, group_name):
    """
    Analyze conversion rates for a specific group of job categories.

    Args:
        funnel_data (DataFrame): DataFrame with funnel counts
        funnel_steps (list): List of funnel steps
        category_group (list): List of job categories to analyze
        group_name (str): Name of the category group for display

    Returns:
        dict: Dictionary of conversion rates for the category group
    """
    if funnel_data.empty:
        print(f"No funnel data available for {group_name} analysis")
        return {}

    try:
        # Filter for the selected categories
        available_categories = [cat for cat in category_group if cat in funnel_data.index]

        if not available_categories:
            print(f"None of the specified categories for {group_name} exist in the data")
            return {}

        filtered_funnel = funnel_data.loc[available_categories]

        # Sum up the values across the selected categories for total conversion calculation
        total_funnel_selected = filtered_funnel.sum()

        # Calculate conversion rates from "Applied" to other stages
        applied_count = total_funnel_selected["Applied"]
        conversion_from_applied = {}

        for stage in funnel_steps[1:]:  # Skip "Applied" itself
            count = total_funnel_selected[stage]
            if applied_count > 0:
                conversion_from_applied[f"Applied → {stage}"] = round((count / applied_count) * 100, 2)
            else:
                conversion_from_applied[f"Applied → {stage}"] = 0.0

        # Print the conversion rates
        print(f"Conversion Rates for {group_name} (Categories: {', '.join(available_categories)}):")
        print(f"Total applications: {applied_count}")
        for stage, rate in conversion_from_applied.items():
            print(f"{stage}: {rate}%")

        return conversion_from_applied

    except Exception as e:
        print(f"Error analyzing {group_name} conversion: {e}")
        return {}

def plot_category_group_comparison(conversion_rates_dict):
    """
    Plot a comparison of conversion rates between different category groups.

    Args:
        conversion_rates_dict (dict): Dictionary mapping group names to their conversion rates

    Returns:
        Figure: Matplotlib figure object
    """
    if not conversion_rates_dict:
        print("No category group data to plot")
        return None

    try:
        # Convert to DataFrame for plotting
        comparison_data = {}
        for group_name, rates in conversion_rates_dict.items():
            comparison_data[group_name] = rates

        comparison_df = pd.DataFrame(comparison_data)

        if comparison_df.empty:
            print("Comparison DataFrame is empty")
            return None

        fig, ax = plt.subplots(figsize=(12, 8))

        # Create bar chart
        comparison_df.plot(kind="bar", ax=ax, colormap='Set3')

        # Add labels and styling
        ax.set_title("Conversion Rate Comparison Between Category Groups", fontsize=16)
        ax.set_ylabel("Conversion Rate (%)", fontsize=14)
        ax.set_xlabel("Application Stage", fontsize=14)
        ax.grid(axis='y', linestyle='--', alpha=0.7)
        ax.legend(title="Category Group", fontsize=12)

        # Add value labels on bars
        for container in ax.containers:
            ax.bar_label(container, fmt='%.1f%%', padding=3)

        plt.tight_layout()
        return fig
    except Exception as e:
        print(f"Error plotting category group comparison: {e}")
        return None


### Technical Role Conversion Analysis


In [None]:
# Define category groups
technical_roles = ["Technical Lead", "Senior Engineer", "Staff Engineer"]
solution_roles = ["Post Sale Engineer", "Sales Engineer", "Software/Solution Architect"]

# Analyze technical roles
technical_conversion = analyze_category_group_conversion(
    funnel_per_category, 
    funnel_steps, 
    technical_roles, 
    "Technical Roles"
)


### Solution Role Conversion Analysis


In [None]:
# Analyze solution roles
solution_conversion = analyze_category_group_conversion(
    funnel_per_category, 
    funnel_steps, 
    solution_roles, 
    "Solution Roles"
)


### Role Type Comparison


In [None]:
# Compare technical and solution roles
if technical_conversion and solution_conversion:
    role_comparison = {
        "Technical Roles": technical_conversion,
        "Solution Roles": solution_conversion
    }

    comparison_fig = plot_category_group_comparison(role_comparison)
    if comparison_fig:
        plt.show()


## Summary and Conclusions


In [None]:
def print_analysis_summary(df, funnel_per_category):
    """
    Print a summary of the job application analysis.

    Args:
        df (DataFrame): The main DataFrame with job application data
        funnel_per_category (DataFrame): DataFrame with funnel counts by job category
    """
    print("=" * 50)
    print("JOB SEARCH ANALYTICS SUMMARY")
    print("=" * 50)

    # Basic statistics
    total_applications = len(df)
    unique_categories = df["Job Category"].nunique()

    print(f"\nTotal Applications: {total_applications}")
    print(f"Unique Job Categories: {unique_categories}")

    # Application status breakdown
    if "Status" in df.columns:
        status_counts = df["Status"].value_counts()
        print("\nApplication Status Breakdown:")
        for status, count in status_counts.items():
            percentage = (count / total_applications) * 100
            print(f"  {status}: {count} ({percentage:.1f}%)")

    # Top job categories
    if "Job Category" in df.columns:
        top_categories = df["Job Category"].value_counts().head(5)
        print("\nTop 5 Job Categories:")
        for category, count in top_categories.items():
            percentage = (count / total_applications) * 100
            print(f"  {category}: {count} ({percentage:.1f}%)")

    # Funnel summary
    if not funnel_per_category.empty and "Total" in funnel_per_category.index:
        total_funnel = funnel_per_category.loc["Total"]
        print("\nApplication Funnel Summary:")
        applied = total_funnel["Applied"]
        for stage in total_funnel.index[1:]:  # Skip "Applied"
            count = total_funnel[stage]
            percentage = (count / applied) * 100
            print(f"  {stage}: {count} ({percentage:.1f}% of Applied)")

    # Response time summary
    response_times = []
    for _, row in df.iterrows():
        if row.get("Applied Date") and row.get("End Date"):
            try:
                start = datetime.strptime(row["Applied Date"], "%Y-%m-%d")
                end = datetime.strptime(row["End Date"], "%Y-%m-%d")
                duration = (end - start).days
                response_times.append(duration)
            except:
                pass

    if response_times:
        avg_response = sum(response_times) / len(response_times)
        median_response = sorted(response_times)[len(response_times) // 2]
        print(f"\nResponse Time Analysis:")
        print(f"  Average Response Time: {avg_response:.1f} days")
        print(f"  Median Response Time: {median_response} days")

    print("\nKey Insights:")
    print("  1. Review the job categories with the highest conversion rates")
    print("  2. Consider focusing on application types with better response times")
    print("  3. Analyze rejection reasons to improve future applications")

    print("=" * 50)

# Print summary of the analysis
print_analysis_summary(df, funnel_per_category)
