# Functions

In [13]:
import json
import re
from pathlib import Path
import textwrap
from IPython.display import display, Markdown

from project_config import (
    JOB_POSTING_URLS_FILE,
    JOB_POSTING_URLS_FILTERED_FILE,
    JOB_POSTING_URLS_TO_EXCLUDE_FILE,
    JOB_DESCRIPTIONS_JSON_FILE,
    JOB_REQUIREMENTS_JSON_FILE,
)


def clean_json_string(json_string: str):
    """
    Remove unwanted control characters from the JSON string.
    """
    # Remove control characters (non-printable characters)
    return re.sub(r"[\x00-\x1F\x7F]", "", json_string)


def load_and_decode_json(json_file):
    """Load a JSON file and decode all Unicode escape sequences."""
    with open(json_file, "r", encoding="utf-8") as f:
        json_string = f.read()
        cleaned_json_string = clean_json_string(json_string)  # Clean the JSON string
        data = json.loads(cleaned_json_string)  # Decode the cleaned JSON

    return data


def format_json_readable(json_obj, indent=2, wrap_width=80):
    """
    Formats JSON with indentation and wraps long text for easier readability.
    """
    formatted_json = json.dumps(
        json_obj, indent=indent, ensure_ascii=False
    )  # Pretty print JSON

    # Wrap long lines within values
    formatted_json = "\n".join(
        [
            textwrap.fill(line, width=wrap_width) if len(line) > wrap_width else line
            for line in formatted_json.split("\n")
        ]
    )

    # Optional: Simulate line breaks after lists or objects (but avoid unnecessary newlines)
    formatted_json = formatted_json.replace(
        "]", "]"
    )  # Avoid adding newlines after closing brackets

    return formatted_json


def display_json_pretty(json_input: dict | str | Path, wrap_width: int = 120):
    """Display JSON from a string or a file in a readable format."""
    if isinstance(json_input, dict):  # If it's already a dict, just format it
        data = json_input
    elif isinstance(json_input, Path) or (
        isinstance(json_input, str) and not json_input.strip().startswith(("{", "["))
    ):
        data = load_and_decode_json(
            str(json_input)
        )  # Convert Path to string if necessary
    else:
        data = json.loads(json_input)  # If it's a JSON string, parse it

    # Format JSON for readability
    formatted_json = format_json_readable(data, wrap_width=wrap_width)

    # Display with Markdown to prevent horizontal scrolling
    display(Markdown(f"```json\n{formatted_json}\n```"))


# # Example usage
# json_data = {"name": "John Doe", "age": 30, "city": "New York"}

# # Format the JSON data for readability
# formatted_json = format_json_readable(json_data)

# # Directly display the formatted JSON string without json.loads()
# display_json_pretty(
#     formatted_json
# )  # No need to call json.loads again, since it's already formatted

# Check Files

## Check Job URLs, Descriptions/Postings, & Requirements

### Job Posting URLs

#### Run Pipeline to Filter URLs File

In [26]:
from pipelines.filter_job_posting_urls_mini_pipeline import (
    run_filtering_job_posting_urls_mini_pipe_line as filter_urls,
)

filter_urls()

2025-03-13 15:56:18,323 - pipelines.filter_job_posting_urls_mini_pipeline - INFO - Loading main job postings from C:\github\job_bot\input_output\input\job_posting_urls.json
2025-03-13 15:56:18,336 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\input\job_posting_urls.json
2025-03-13 15:56:18,337 - pipelines.filter_job_posting_urls_mini_pipeline - INFO - Loading exclusion URLs from C:\github\job_bot\input_output\input\job_posting_urls_to_exclude.json
2025-03-13 15:56:18,350 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\input\job_posting_urls_to_exclude.json
2025-03-13 15:56:18,351 - pipelines.filter_job_posting_urls_mini_pipeline - INFO - Excluding 19 URLs from main job postings.
2025-03-13 15:56:18,352 - pipelines.filter_job_posting_urls_mini_pipeline - INFO - Filtered out 19 job postings; 13 remain.
2025-03-13 15:56:18,354 - utils.generic_utils - INFO - Data successfully saved to C:\github\job_bot\input_output\input\job_p

#### All URLs

In [27]:
json_input = JOB_POSTING_URLS_FILE

data = load_and_decode_json(json_file=json_input)
print(len(data))
companies = sorted(
    [job_data.get("company", "Unknown Company") for job_data in data.values()]
)
print(companies)
# for key, value in data.items():


# display_json_pretty(json_file, wrap_width=100)

32
['Accenture', 'Adobe', 'Advisor360 Degrees', 'Airtable', 'Amazon', 'Amazon', 'Amazon', 'Amazon', 'Amplitude', 'Blend', 'Capital One', 'DEPT', 'Deloitte', 'Deloitte', 'Deloitte', 'DigitalOcean', 'Figma', 'Flextronics', 'Glean', 'Google', 'Liberty Mutual', 'Liberty Mutual', 'Liberty Mutual Insurance', 'Meta', 'Microsoft', 'MongoDB', 'Oracle', 'PwC', 'Snowflake', 'TRACE3', 'ThermoFisher Scientific', 'Veeva']


#### Filtered URLs

In [28]:
json_input = JOB_POSTING_URLS_FILTERED_FILE

data = load_and_decode_json(json_file=json_input)
print(len(data))
companies = sorted(
    [job_data.get("company", "Unknown Company") for job_data in data.values()]
)
display(companies)
# for key, value in data.items():


# display_json_pretty(json_file, wrap_width=100)

13


['Accenture',
 'Advisor360 Degrees',
 'Airtable',
 'Amazon',
 'Blend',
 'Deloitte',
 'Deloitte',
 'Deloitte',
 'DigitalOcean',
 'Glean',
 'Liberty Mutual',
 'Snowflake',
 'Veeva']

### Job Postings/Descriptions

In [29]:
json_input = JOB_DESCRIPTIONS_JSON_FILE

data = load_and_decode_json(json_file=json_input)
print(len(data))
for key in data.keys():
    print(key) if "snowflake" in key else None
# json_file = r"C:\github\job_bot\input_output\preprocessing\jobpostings.json"
# display_json_pretty(JOB_REQUIREMENTS_JSON_FILE, wrap_width=100)

30
https://careers.snowflake.com/us/en/job/SNCOUS5AF10A9C7A01464788ABD17AECBEE52EEXTERNALENUS1CC71A00229E4662B768527743E6164F/Director-Product-Marketing-Analytics?utm_source=Q2P9NP2NNP&utm_medium=phenom-feeds&gh_src=ed5543a62


### Requirements

In [30]:
json_input = JOB_REQUIREMENTS_JSON_FILE

data = load_and_decode_json(json_file=json_input)
print(len(data))
print(data.keys())
# json_file = r"C:\github\job_bot\input_output\preprocessing\jobpostings.json"
# display_json_pretty(JOB_REQUIREMENTS_JSON_FILE, wrap_width=100)

30
dict_keys(['https://www.google.com/about/careers/applications/jobs/results/113657145978692294-ai-market-intelligence-principal/?src=Online/LinkedIn/linkedin_us&utm_source=linkedin&utm_medium=jobposting&utm_campaign=contract&utm_medium=jobboard&utm_source=linkedin', 'https://www.capitalonecareers.com/job/-/-/234/66270465536?p_sid=ep3Sfxb&p_uid=sDBMWC5VxQ&source=rd_linkedin_job_posting_tm&ss=paid&utm_campaign=capone_all_jobs_24&utm_content=pj_board&utm_medium=jobad&utm_source=linkedin+slotted&dclid=CPGV3bef44gDFUEGTwgd4DoHPg', 'https://boards.greenhouse.io/embed/job_app?token=7600823002&gh_src=ab9f35b82', 'https://www.amazon.jobs/en/jobs/2696123/research-manager-strategy-and-insights-gca-marketing?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid', 'https://www.amazon.jobs/en/jobs/2742527/sr-generative-ai-strategist-generative-ai-innovation-center?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=so

## Check Iteration 0

### Functions and Imports

In [14]:
import json
from pathlib import Path
import textwrap
from IPython.display import display, Markdown
from project_config import (
    JOB_POSTING_URLS_FILE,
    JOB_DESCRIPTIONS_JSON_FILE,
    JOB_REQUIREMENTS_JSON_FILE,
    ITERATE_0_OPENAI_DIR,
    mapping_file_name,
    REQS_FILES_ITERATE_0_OPENAI_DIR,
    RESPS_FILES_ITERATE_0_OPENAI_DIR,
    SIMILARITY_METRICS_ITERATE_0_OPENAI_DIR,
    ITERATE_0_ANTHROPIC_DIR,
    REQS_FILES_ITERATE_0_ANTHROPIC_DIR,
    RESPS_FILES_ITERATE_0_ANTHROPIC_DIR,
    SIMILARITY_METRICS_ITERATE_0_ANTHROPIC_DIR,
    # URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI,
    # URL_TO_FILE_MAPPING_FILE_ITERATE_0_ANTHROPIC,
)


def load_and_decode_json(json_file):
    """Load a JSON file and decode all Unicode escape sequences."""
    with open(json_file, "r", encoding="utf-8") as f:
        data = json.load(f)  # JSON decoder auto-converts \u2013 and \u2014
    return data


def format_json_readable(json_obj, indent=2, wrap_width=80):
    """
    Formats JSON with indentation and wraps long text for easier readability.
    """
    formatted_json = json.dumps(
        json_obj, indent=indent, ensure_ascii=False
    )  # Pretty print JSON

    # Wrap long lines within values
    formatted_json = "\n".join(
        [
            textwrap.fill(line, width=wrap_width) if len(line) > wrap_width else line
            for line in formatted_json.split("\n")
        ]
    )

    # Simulate line breaks: Insert extra spacing between key sections
    formatted_json = formatted_json.replace("{", "{\n\n")  # Before nested objects
    formatted_json = formatted_json.replace("},", "},\n\n")  # After objects
    formatted_json = formatted_json.replace("]", "]\n\n")  # After lists

    return formatted_json


def display_json_pretty(json_file, wrap_width=100):
    """Loads, decodes, formats, and displays JSON with simulated line breaks in Jupyter Notebook."""

    # Load and decode JSON
    data = load_and_decode_json(json_file)

    # Format JSON for readability
    formatted_json = format_json_readable(data, wrap_width=wrap_width)

    # Display with Markdown to prevent horizontal scrolling
    display(Markdown(f"```json\n{formatted_json}\n```"))

### OpenAI I/O Iterate 0

#### Mapping File (OpenAI pipeline)

##### Check all records from mapping file

In [None]:
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

directory = ITERATE_0_OPENAI_DIR
mapping_file = directory / mapping_file_name

file_mapping_model = load_mappings_model_from_json(mapping_file)

print("Job URLs:")
print(f"Number of URLs: {len(file_mapping_model.root.keys())}")

for index, url in enumerate(file_mapping_model.root.keys(), start=1):
    print(f"{index}. {url}")
    # print()

print("\n")

print("sim_metrics paths:")
for index, jobpaths in enumerate(file_mapping_model.root.values(), start=1):
    print(f"{index}. {Path(jobpaths.sim_metrics).name}")
    # print()

Exception ignored in: <bound method IPythonKernel._clean_thread_parent_frames of <ipykernel.ipkernel.IPythonKernel object at 0x000001F77EC7D810>>
Traceback (most recent call last):
  File "c:\github\job_bot\env\Lib\site-packages\ipykernel\ipkernel.py", line 775, in _clean_thread_parent_frames
    def _clean_thread_parent_frames(

KeyboardInterrupt: 


##### Check for specific records

In [None]:
search_terms = ["blend", "Amazon", "Blend", "Snowflake"]


# Find URLs that contain the search term
matching_urls = [
    url
    for url in file_mapping_model.root.keys()
    if any(term.lower() in str(url).lower() for term in search_terms)
]

print("Matching URLs:")
for index, url in enumerate(matching_urls, start=1):
    print(f"{index}. {url}")
    print()

Matching URLs:
1. https://www.amazon.jobs/en/jobs/2696123/research-manager-strategy-and-insights-gca-marketing?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid

2. https://www.amazon.jobs/en/jobs/2742527/sr-generative-ai-strategist-generative-ai-innovation-center?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid

3. https://www.amazon.jobs/en/jobs/2684745/product-manager-artificial-general-intelligence-data-services?cmpid=SPLICX0248M&ss=paid&utm_campaign=cxro&utm_content=job_posting&utm_medium=social_media&utm_source=linkedin.com

4. https://careers.snowflake.com/us/en/job/SNCOUS5AF10A9C7A01464788ABD17AECBEE52EEXTERNALENUS1CC71A00229E4662B768527743E6164F/Director-Product-Marketing-Analytics?utm_source=Q2P9NP2NNP&utm_medium=phenom-feeds&gh_src=ed5543a62

5. https://jobs.smartrecruiters.com/Blend360/744000042638791-director-ai-strategy?trid=2d92f286-613b-4d

#### Sim Metrics (OpenAI Pipeline)

##### Check all records

In [None]:
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

directory = SIMILARITY_METRICS_ITERATE_0_OPENAI_DIR
file_list = get_file_names(directory_path=directory)

print("sim_metrics files in sim_metrics folder")
print(f"Number of files: {len(file_list)}")

for index, file_name in enumerate(file_list, start=1):
    print(f"{index}. {file_name}")
    # print()

sim_metrics files in sim_metrics folder
Number of files: 29
1. Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_sim_metrics_iter0.csv
2. Adobe_Sr__Director__Applied_AI_ML__Discovery__sim_metrics_iter0.csv
3. Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_sim_metrics_iter0.csv
4. Airtable_Product_Manager__AI_sim_metrics_iter0.csv
5. Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_sim_metrics_iter0.csv
6. Amazon_Research_Manager_-_Strategy_and_Insights_GCA_Marketing_sim_metrics_iter0.csv
7. Amazon_Sr__Generative_AI_Strategist__Generative_AI_Innovation_Center_sim_metrics_iter0.csv
8. Amazon_Web_Services__Inc__Senior_Manger__Partner_Strategy__GenAI_Innovation_Center_sim_metrics_iter0.csv
9. Amplitude_Marketing_Strategy___Analytics_Manager_sim_metrics_iter0.csv
10. Blend_Director__AI_Strategy_sim_metrics_iter0.csv
11. Capital_One_Director__AI_Platforms_sim_metrics_iter0.csv
12. Deloitte_AI_Data_Specialist_sim_metrics_iter0.csv
13. Deloitte_Global_B

##### Missing records (similarity metrics files)

In [None]:
from pathlib import Path
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json
from project_config import SIMILARITY_METRICS_ITERATE_0_OPENAI_DIR

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

sim_metrics_dir = SIMILARITY_METRICS_ITERATE_0_OPENAI_DIR
file_list_sim_metrics_dir = get_file_names(directory_path=sim_metrics_dir)

file_mapping_model = load_mappings_model_from_json(mapping_file)
file_list_mapping_file = [
    Path(jobpaths.sim_metrics).name for jobpaths in file_mapping_model.root.values()
]

missing_files = set(file_list_mapping_file) - set(
    file_list_sim_metrics_dir
)  # Elements in list1 but not in list2

print(f"Number of missing files in sim metrics folder: {len(missing_files)}")
for idx, f_name in enumerate(missing_files, start=1):
    print(f"{idx}. {f_name}")

2025-03-10 11:27:11,198 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_0\url_to_file_mapping.json
2025-03-10 11:27:11,199 - evaluation_optimization.create_mapping_file - INFO - Loaded and validated mapping file from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_0\url_to_file_mapping.json


Number of missing files in sim metrics folder: 0


In [None]:
from pathlib import Path
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings, Requirements
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

sim_metrics_dir = SIMILARITY_METRICS_ITERATE_0_OPENAI_DIR
file_list_sim_metrics_dir = get_file_names(directory_path=sim_metrics_dir)

requirements_dir = REQS_FILES_ITERATE_0_OPENAI_DIR
file_list_reqs_dir = get_file_names(requirements_dir)
print(f"Files in reqs dir: {len(file_list_reqs_dir)}")

responsibilities_dir = RESPS_FILES_ITERATE_0_OPENAI_DIR
file_list_resps_dir = get_file_names(responsibilities_dir)
print(f"Files in resps dir: {len(file_list_resps_dir)}")

file_mapping_model = load_mappings_model_from_json(mapping_file)
file_list_mapping_file = [
    Path(jobpaths.sim_metrics).name for jobpaths in file_mapping_model.root.values()
]
print(f"Files in mapping file: {len(file_list_mapping_file)}")

print()

missing_files = set(file_list_mapping_file) - set(
    file_list_sim_metrics_dir
)  # Elements in list1 but not in list2

print(f"Number of missing files in sim metrics folder: {len(missing_files)}")
for idx, f_name in enumerate(missing_files, start=1):
    print(f"{idx}. {f_name}")

2025-03-10 10:10:16,354 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_0\url_to_file_mapping.json
2025-03-10 10:10:16,355 - evaluation_optimization.create_mapping_file - INFO - Loaded and validated mapping file from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_0\url_to_file_mapping.json


Files in reqs dir: 30
Files in resps dir: 30
Files in mapping file: 30

Number of missing files in sim metrics folder: 1
1. Liberty_Mutual_Insurance_Senior_Manager_II__Corporate_Strategy___Research_sim_metrics_iter0.csv


#### Responsibilities

In [32]:
from utils.get_file_names import get_file_names
import pandas as pd

resps_dir = RESPS_FILES_ITERATE_0_OPENAI_DIR
file_glean = r"C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_openai\\iteration_0\\responsibilities\\Glean_Head_of_Competitive_Intelligence_resps_flat_iter0.json"

data = load_and_decode_json(file_glean)
print(len(data.get("responsibilities")))

csv_file = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\similarity_metrics\Glean_Head_of_Competitive_Intelligence_sim_metrics_iter1.csv"
df = pd.read_csv(csv_file)
# print(df)
no_of_resps = set(df.responsibility_key)
print(len(no_of_resps))

print(set(df.responsibility_key))

30
30
{'1.responsibilities.3', '3.responsibilities.0', '2.responsibilities.6', '0.responsibilities.4', '4.responsibilities.0', '1.responsibilities.4', '4.responsibilities.1', '1.responsibilities.7', '0.responsibilities.1', '2.responsibilities.3', '0.responsibilities.2', '3.responsibilities.1', '1.responsibilities.2', '4.responsibilities.5', '2.responsibilities.1', '1.responsibilities.0', '2.responsibilities.4', '1.responsibilities.1', '2.responsibilities.7', '1.responsibilities.5', '5.responsibilities.0', '4.responsibilities.3', '2.responsibilities.5', '4.responsibilities.4', '2.responsibilities.2', '0.responsibilities.0', '2.responsibilities.0', '0.responsibilities.3', '1.responsibilities.6', '4.responsibilities.2'}


#### Requirements

In [None]:
from pydantic import ValidationError
from models.resume_job_description_io_models import Requirements
from project_config import REQS_FILES_ITERATE_0_OPENAI_DIR
from utils.get_file_names import get_file_names

reqs_dir = REQS_FILES_ITERATE_0_OPENAI_DIR


file_list = get_file_names(reqs_dir, True)

for idx, file in enumerate(file_list, start=1):
    try:
        # Load JSON data
        data = load_and_decode_json(file)

        # Validate using Pydantic model
        validated_data = Requirements(**data)

        # If validation passes, print results
        print(f"{idx}. {Path(file).name}")
        print(f"Number of requirements: {len(validated_data.requirements)}")

    except ValidationError as e:
        print(f"Skipping {file}: Validation failed - {e}")
    except json.JSONDecodeError as e:
        print(f"Skipping {file}: Invalid JSON format - {e}")
    except Exception as e:
        print(f"Skipping {file}: Unexpected error - {e}")

1. Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_reqs_flat_iter0.json
Number of requirements: 10
2. Adobe_Sr__Director__Applied_AI_ML__Discovery__reqs_flat_iter0.json
Number of requirements: 7
3. Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_reqs_flat_iter0.json
Number of requirements: 12
4. Airtable_Product_Manager__AI_reqs_flat_iter0.json
Number of requirements: 8
Skipping C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_0\requirements\Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_reqs_flat_iter0.json: Validation failed - 2 validation errors for Requirements
url
  Field required [type=missing, input_value={'0.pie_in_the_sky.0': '1...e and Technical teams.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/missing
requirements
  Field required [type=missing, input_value={'0.pie_in_the_sky.0': '1...e and Technical teams.'}, input_type=dict]
    For fu

In [None]:
from pydantic import ValidationError
from models.resume_job_description_io_models import Requirements
from project_config import REQS_FILES_ITERATE_0_OPENAI_DIR
from utils.get_file_names import get_file_names

reqs_dir = REQS_FILES_ITERATE_0_OPENAI_DIR


file_list = get_file_names(reqs_dir, True)

acn_requirements_file = file_list[0]

print("Job Requirements:")
display_json_pretty(acn_requirements_file)

data = load_and_decode_json(acn_requirements_file)
url = data.get("url")

job_postings = load_and_decode_json(JOB_DESCRIPTIONS_JSON_FILE)
job_posting = job_postings.get(url)

print("job posting:")
display_json_pretty(job_posting)

Job Requirements:


```json
{

  "url": "https://www.accenture.com/us-en/careers/jobdetails?id=R00251798_en&src=LINKEDINJP",
  "requirements": {

    "0.pie_in_the_sky.0": "Shape vision and create opportunities for data & AI led business
reinvention.",
    "0.pie_in_the_sky.1": "Create strategy for AI-first products and develop commercialization
opportunities.",
    "1.down_to_earth.0": "5+ years of experience in business development, client relationship
management, or marketing.",
    "1.down_to_earth.1": "Proficiency in CRM tools such as Salesforce for tracking and analyzing
client interactions.",
    "1.down_to_earth.2": "Ability to build client relationships and credibility as a trusted advisor
on how to infuse Data & AI into the business processes or functions.",
    "2.cultural_fit.0": "Collaborative leadership style with a growth-oriented mindset.",
    "2.cultural_fit.1": "Ability to mentor and develop high-performing teams.",
    "2.cultural_fit.2": "Infuse Responsible AI in vision and roadmap, develop plan for leveraging
ecosystem partners, and define operating model to foster a culture of innovation and
experimentation.",
    "3.other.0": "Experience working in professional services, Big Four firms, or consulting
environments.",
    "3.other.1": "Ability to travel up to 80%; travel as needed based on client expectations."
  }
}
```

job posting:


```json
{

  "https://www.accenture.com/us-en/careers/jobdetails?id=R00251798_en&src=LINKEDINJP": {

    "status": "success",
    "message": "Job site data processed successfully.",
    "data": {

      "url": "https://www.accenture.com/us-en/careers/jobdetails?id=R00251798_en&src=LINKEDINJP",
      "job_title": "Enterprise AI Value Strategy Senior Manager",
      "company": "Accenture",
      "location": "Multiple Locations",
      "salary_info": null,
      "posted_date": null,
      "content": {

        "Job Description": "Accenture is a leading global professional services company that helps
the world's leading businesses, governments and other organizations build their digital core,
optimize their operations, accelerate revenue growth, and enhance citizen services—creating tangible
value at speed and scale. We are a talent- and innovation-led company with approximately 774,000
people serving clients in more than 120 countries. Technology is at the core of change today, and we
are one of the world's leaders in helping drive that change, with strong ecosystem relationships. We
combine our strength in technology and leadership in cloud, data and AI with unmatched industry
experience, functional expertise, and global delivery capability. We are uniquely able to deliver
tangible outcomes because of our broad range of services, solutions and assets across Consulting,
Strategy, Technology, Operations, Industry X and Song. These capabilities, together with our culture
of shared success and commitment to creating 360° value, enable us to help our clients reinvent and
build trusted, lasting relationships. We measure our success by the 360° value we create for our
clients, each other, our shareholders, partners, and communities. Visit us at accenture.com.\n\nIn
Consulting we work with C-suite executives, leaders and boards of the world's leading organizations,
helping them reinvent every part of their enterprise to drive greater growth, enhance
competitiveness, implement operational improvements, reduce cost, deliver sustainable 360°
stakeholder value, and set a new performance frontier for themselves and the industry in which they
operate. Our deep industry and functional expertise are supported by proprietary assets and
solutions that help organizations transform faster and become more resilient.\n\nOur Data and AI
Strategy practitioners work to create and execute an organizations business strategy for data and
AI, including defining a compelling industry vision, creating value models, describing
business/technology roadmap, and creating operating model and platform and ecosystem architecture,
in support of data-led transformation. Use scaled agile disciplines to transform around iterative
approach sequencing use cases focused on critical data elements aligned to data product and platform
feature development.\n\nAs a Data & AI Strategy Senior Manager, you will:\n\n- Shape vision and
create opportunities for data & AI led business reinvention.\n- Build client relationships and
credibility as a trusted advisor on how to infuse Data & AI into the business processes or
functions\n- Define and structure an organization's data and AI strategy to build and optimize data
assets, AI technologies for growth & competitive advantages.\n- Assess maturity of an organization's
data and AI foundation. Define building blocks required to convert to an AI-first organization,
including technology and AI platform blueprint, data strategy and future-ready operating model.\n-
Create the business case, investment profile and roadmap to deliver on the strategy.\n- Infuse
Responsible AI in vision and roadmap, develop plan for leveraging ecosystem partners, and define
operating model to foster a culture of innovation and experimentation.\n- Identify people,
processes, and technologies to develop and operationalize AI solutions.\n- Create strategy for AI-
first products and develop commercialization opportunities.\n- Originate new opportunities and see
through sales cycle.\n\nAbility to travel up to 80%; travel as needed based on client
expectations.",
        "Qualification": "Locations",
        "Additional Information": "About Accenture"
      }
    }
  }
}
```

### Anthropic I/O Iterate 0

#### Mapping File

##### Check all records from mapping file

In [None]:
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

directory = ITERATE_0_ANTHROPIC_DIR
mapping_file = directory / mapping_file_name

file_mapping_model = load_mappings_model_from_json(mapping_file)

print("Job URLs:")
print(f"Number of URLs: {len(file_mapping_model.root.keys())}")

for index, url in enumerate(file_mapping_model.root.keys(), start=1):
    print(f"{index}. {url}")
    # print()

print("\n")

print("sim_metrics paths:")
for index, jobpaths in enumerate(file_mapping_model.root.values(), start=1):
    print(f"{index}. {Path(jobpaths.sim_metrics).name}")
    # print()

2025-03-09 15:40:39,378 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_0\url_to_file_mapping.json
2025-03-09 15:40:39,379 - evaluation_optimization.create_mapping_file - INFO - Loaded and validated mapping file from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_0\url_to_file_mapping.json


Job URLs:
Number of URLs: 30
1. https://www.google.com/about/careers/applications/jobs/results/113657145978692294-ai-market-intelligence-principal/?src=Online/LinkedIn/linkedin_us&utm_source=linkedin&utm_medium=jobposting&utm_campaign=contract&utm_medium=jobboard&utm_source=linkedin
2. https://www.capitalonecareers.com/job/-/-/234/66270465536?p_sid=ep3Sfxb&p_uid=sDBMWC5VxQ&source=rd_linkedin_job_posting_tm&ss=paid&utm_campaign=capone_all_jobs_24&utm_content=pj_board&utm_medium=jobad&utm_source=linkedin+slotted&dclid=CPGV3bef44gDFUEGTwgd4DoHPg
3. https://boards.greenhouse.io/embed/job_app?token=7600823002&gh_src=ab9f35b82
4. https://www.amazon.jobs/en/jobs/2696123/research-manager-strategy-and-insights-gca-marketing?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid
5. https://www.amazon.jobs/en/jobs/2742527/sr-generative-ai-strategist-generative-ai-innovation-center?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=c

##### Check for specific records

In [None]:
search_terms = ["blend", "Amazon", "Blend", "Snowflake"]


# Find URLs that contain the search term
matching_urls = [
    url
    for url in file_mapping_model.root.keys()
    if any(term.lower() in str(url).lower() for term in search_terms)
]

print("Matching URLs:")
for index, url in enumerate(matching_urls, start=1):
    print(f"{index}. {url}")
    print()

Matching URLs:
1. https://www.amazon.jobs/en/jobs/2696123/research-manager-strategy-and-insights-gca-marketing?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid

2. https://www.amazon.jobs/en/jobs/2742527/sr-generative-ai-strategist-generative-ai-innovation-center?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid

3. https://www.amazon.jobs/en/jobs/2684745/product-manager-artificial-general-intelligence-data-services?cmpid=SPLICX0248M&ss=paid&utm_campaign=cxro&utm_content=job_posting&utm_medium=social_media&utm_source=linkedin.com

4. https://jobs.smartrecruiters.com/Blend360/744000042638791-director-ai-strategy?trid=2d92f286-613b-4daf-9dfa-6340ffbecf73

5. https://www.amazon.jobs/en/jobs/2905092/senior-manger-partner-strategy-genai-innovation-center?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting

#### Sim Metrics

##### Check all records

In [None]:
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

directory = SIMILARITY_METRICS_ITERATE_0_ANTHROPIC_DIR
file_list = get_file_names(directory_path=directory)

print("sim_metrics files in sim_metrics folder")
print(f"Number of files: {len(file_list)}")

for index, file_name in enumerate(file_list, start=1):
    print(f"{index}. {file_name}")
    # print()

sim_metrics files in sim_metrics folder
Number of files: 27
1. Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_sim_metrics_iter0.csv
2. Adobe_Sr__Director__Applied_AI_ML__Discovery__sim_metrics_iter0.csv
3. Airtable_Product_Manager__AI_sim_metrics_iter0.csv
4. Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_sim_metrics_iter0.csv
5. Amazon_Research_Manager_-_Strategy_and_Insights_GCA_Marketing_sim_metrics_iter0.csv
6. Amazon_Sr__Generative_AI_Strategist__Generative_AI_Innovation_Center_sim_metrics_iter0.csv
7. Amplitude_Marketing_Strategy___Analytics_Manager_sim_metrics_iter0.csv
8. Blend_Director__AI_Strategy_sim_metrics_iter0.csv
9. Capital_One_Director__AI_Platforms_sim_metrics_iter0.csv
10. Deloitte_AI_Data_Specialist_sim_metrics_iter0.csv
11. Deloitte_Global_Business_Services__GBS__Strategy_Manager_sim_metrics_iter0.csv
12. Deloitte_Market_Research_Sr_Manager_sim_metrics_iter0.csv
13. DEPT__Director_of_Applied_AI_Strategy__Media_sim_metrics_iter0.csv
1

##### Missing records (similarity metrics files)

In [None]:
from pathlib import Path
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

sim_metrics_dir = SIMILARITY_METRICS_ITERATE_0_ANTHROPIC_DIR
file_list_sim_metrics_dir = get_file_names(directory_path=sim_metrics_dir)

directory = ITERATE_0_ANTHROPIC_DIR
mapping_file = directory / mapping_file_name
file_mapping_model = load_mappings_model_from_json(mapping_file)
file_list_mapping_file = [
    Path(jobpaths.sim_metrics).name for jobpaths in file_mapping_model.root.values()
]

missing_files = set(file_list_mapping_file) - set(
    file_list_sim_metrics_dir
)  # Elements in list1 but not in list2

print(f"Number of missing files in sim metrics folder: {len(missing_files)}")
for idx, f_name in enumerate(missing_files, start=1):
    print(f"{idx}. {f_name}")

2025-03-09 15:41:24,273 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_0\url_to_file_mapping.json
2025-03-09 15:41:24,275 - evaluation_optimization.create_mapping_file - INFO - Loaded and validated mapping file from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_0\url_to_file_mapping.json


Number of missing files in sim metrics folder: 3
1. Amazon_Web_Services__Inc__Senior_Manger__Partner_Strategy__GenAI_Innovation_Center_sim_metrics_iter0.csv
2. Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_sim_metrics_iter0.csv
3. Snowflake_Director__Product_Marketing_-_Analytics_sim_metrics_iter0.csv


In [None]:
from pathlib import Path
from utils.get_file_names import get_file_names
from models.resume_job_description_io_models import JobFileMappings, Requirements
from evaluation_optimization.create_mapping_file import load_mappings_model_from_json

# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI

sim_metrics_dir = SIMILARITY_METRICS_ITERATE_0_ANTHROPIC_DIR
file_list_sim_metrics_dir = get_file_names(directory_path=sim_metrics_dir)

requirements_dir = REQS_FILES_ITERATE_0_ANTHROPIC_DIR
file_list_reqs_dir = get_file_names(requirements_dir)
print(f"Files in reqs dir: {len(file_list_reqs_dir)}")

responsibilities_dir = RESPS_FILES_ITERATE_0_ANTHROPIC_DIR
file_list_resps_dir = get_file_names(responsibilities_dir)
print(f"Files in resps dir: {len(file_list_resps_dir)}")

directory = ITERATE_0_ANTHROPIC_DIR
mapping_file = directory / mapping_file_name
file_mapping_model = load_mappings_model_from_json(mapping_file)
file_list_mapping_file = [
    Path(jobpaths.sim_metrics).name for jobpaths in file_mapping_model.root.values()
]
print(f"Files in mapping file: {len(file_list_mapping_file)}")

print()

missing_files = set(file_list_mapping_file) - set(
    file_list_sim_metrics_dir
)  # Elements in list1 but not in list2

print(f"Number of missing files in sim metrics folder: {len(missing_files)}")
for idx, f_name in enumerate(missing_files, start=1):
    print(f"{idx}. {f_name}")

2025-03-09 15:43:34,592 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_0\url_to_file_mapping.json
2025-03-09 15:43:34,592 - evaluation_optimization.create_mapping_file - INFO - Loaded and validated mapping file from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_0\url_to_file_mapping.json


Files in reqs dir: 30
Files in resps dir: 30
Files in mapping file: 30

Number of missing files in sim metrics folder: 3
1. Amazon_Web_Services__Inc__Senior_Manger__Partner_Strategy__GenAI_Innovation_Center_sim_metrics_iter0.csv
2. Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_sim_metrics_iter0.csv
3. Snowflake_Director__Product_Marketing_-_Analytics_sim_metrics_iter0.csv


#### Requirements

In [None]:
from models.resume_job_description_io_models import Requirements
from project_config import REQS_FILES_ITERATE_0_ANTHROPIC_DIR

reqs_dir = REQS_FILES_ITERATE_0_ANTHROPIC_DIR


file_list = get_file_names(reqs_dir, True)

for idx, file in enumerate(file_list, start=1):
    data = load_and_decode_json(file)
    validated_data = Requirements(**data)

    print(f"{idx}. {Path(file).name}")
    print(f"Number of requirements: {len(validated_data.requirements)}")

1. Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_reqs_flat_iter0.json
Number of requirements: 10
2. Adobe_Sr__Director__Applied_AI_ML__Discovery__reqs_flat_iter0.json
Number of requirements: 6
3. Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_reqs_flat_iter0.json
Number of requirements: 12
4. Airtable_Product_Manager__AI_reqs_flat_iter0.json
Number of requirements: 5
5. Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_reqs_flat_iter0.json
Number of requirements: 8
6. Amazon_Research_Manager_-_Strategy_and_Insights_GCA_Marketing_reqs_flat_iter0.json
Number of requirements: 8
7. Amazon_Sr__Generative_AI_Strategist__Generative_AI_Innovation_Center_reqs_flat_iter0.json
Number of requirements: 12
8. Amazon_Web_Services__Inc__Senior_Manger__Partner_Strategy__GenAI_Innovation_Center_reqs_flat_iter0.json
Number of requirements: 12
9. Amplitude_Marketing_Strategy___Analytics_Manager_reqs_flat_iter0.json
Number of requirements: 13
10. Blend_Director__

## Check Iteration 1

### Imports

In [4]:
import json
import textwrap
from IPython.display import display, Markdown
from project_config import (
    JOB_POSTING_URLS_FILE,
    JOB_DESCRIPTIONS_JSON_FILE,
    JOB_REQUIREMENTS_JSON_FILE,
    ITERATE_1_ANTHROPIC_DIR,
    mapping_file_name,
    REQS_FILES_ITERATE_1_ANTHROPIC_DIR,
    RESPS_FILES_ITERATE_1_ANTHROPIC_DIR,
    SIMILARITY_METRICS_ITERATE_1_ANTHROPIC_DIR,
    ITERATE_1_OPENAI_DIR,
    REQS_FILES_ITERATE_1_OPENAI_DIR,
    RESPS_FILES_ITERATE_1_OPENAI_DIR,
    SIMILARITY_METRICS_ITERATE_1_OPENAI_DIR,
)

### Anthropic Iterate 1

#### Mapping File

#### Responsibilities

In [None]:
from utils.get_file_names import get_file_names

directory = RESPS_FILES_ITERATE_1_ANTHROPIC_DIR

file_names = get_file_names(directory_path=directory)

print(f"Responsibilities file names: \n" + "\n".join(name for name in file_names))

Responsibilities file names: 
Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json
Adobe_Sr__Director__Applied_AI_ML__Discovery__resps_nested_iter1.json
Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_resps_nested_iter1.json
Airtable_Product_Manager__AI_resps_nested_iter1.json
Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_resps_nested_iter1.json
Amazon_Research_Manager_-_Strategy_and_Insights_GCA_Marketing_resps_nested_iter1.json
Amazon_Sr__Generative_AI_Strategist__Generative_AI_Innovation_Center_resps_nested_iter1.json
Amazon_Web_Services__Inc__Senior_Manger__Partner_Strategy__GenAI_Innovation_Center_resps_nested_iter1.json
Amplitude_Marketing_Strategy___Analytics_Manager_resps_nested_iter1.json
Blend_Director__AI_Strategy_resps_nested_iter1.json
Capital_One_Director__AI_Platforms_resps_nested_iter1.json
Deloitte_AI_Data_Specialist_resps_nested_iter1.json
Deloitte_Global_Business_Services__GBS__Strategy_Manager_resps_nes

In [None]:
from models.resume_job_description_io_models import NestedResponsibilities

# Load and validate the JSON data
# file_name = "Blend_Director__AI_Strategy_resps_nested_iter1.json"
file_name = (
    "Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json"
)

for file in file_names:
    # file_name = "Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_resps_nested_iter1.json"
    file_path = RESPS_FILES_ITERATE_1_ANTHROPIC_DIR / file

    data = load_and_decode_json(file_path)
    validated_data = NestedResponsibilities(**data)

    # Compute the number of matched requirements per responsibility
    num_requirements_per_responsibility = {
        resp_key: len(resp.optimized_by_requirements)
        for resp_key, resp in validated_data.responsibilities.items()
    }

    # Display some insights
    most_matched_resp = max(
        num_requirements_per_responsibility,
        key=lambda k: num_requirements_per_responsibility[k],
    )

    least_matched_resp = min(
        num_requirements_per_responsibility,
        key=lambda k: num_requirements_per_responsibility[k],
    )

    print(f"File: {file}")
    print(f"Total Responsibilities: {len(num_requirements_per_responsibility)}")
    print(
        f"Most Matched Responsibility: {most_matched_resp} -> Matches: {num_requirements_per_responsibility[most_matched_resp]}"
    )
    print(
        f"Least Matched Responsibility: {least_matched_resp} -> Matches: {num_requirements_per_responsibility[least_matched_resp]}"
    )
    print()

# Find responsibilities with zero matches
no_match_resps = [
    resp_key
    for resp_key, count in num_requirements_per_responsibility.items()
    if count == 0
]
# print(f"Responsibilities with no matched requirements: {len(no_match_resps)}")

# matches_list = validated_data.responsibilities["2.responsibilities.7"]
# match_list = matches_list.model_dump()
# match_list
# # Distribution of matches
# import matplotlib.pyplot as plt

# plt.hist(num_requirements_per_responsibility.values(), bins=10, edgecolor="black")
# plt.xlabel("Number of Matched Requirements")
# plt.ylabel("Number of Responsibilities")
# plt.title("Distribution of Requirement Matches per Responsibility")
# plt.show()

File: Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json
Total Responsibilities: 30
Most Matched Responsibility: 0.responsibilities.0 -> Matches: 10
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 10

File: Adobe_Sr__Director__Applied_AI_ML__Discovery__resps_nested_iter1.json
Total Responsibilities: 26
Most Matched Responsibility: 3.responsibilities.0 -> Matches: 6
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 1

File: Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_resps_nested_iter1.json
Total Responsibilities: 30
Most Matched Responsibility: 0.responsibilities.0 -> Matches: 12
Least Matched Responsibility: 3.responsibilities.0 -> Matches: 7

File: Airtable_Product_Manager__AI_resps_nested_iter1.json
Total Responsibilities: 26
Most Matched Responsibility: 3.responsibilities.3 -> Matches: 5
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 1

File: Amazon_Product_Manager__Artificial_General_Intelligen

#### Requirements

In [None]:
from utils.get_file_names import get_file_names

directory = REQS_FILES_ITERATE_1_ANTHROPIC_DIR

file_names = get_file_names(directory_path=directory)
print(f"Files in requirements dir: \n", ".\n".join(names for names in file_names))

Files in requirements dir: 
 Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_reqs_flat_iter1.json.
Adobe_Sr__Director__Applied_AI_ML__Discovery__reqs_flat_iter1.json.
Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_reqs_flat_iter1.json.
Airtable_Product_Manager__AI_reqs_flat_iter1.json.
Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_reqs_flat_iter1.json.
Amazon_Research_Manager_-_Strategy_and_Insights_GCA_Marketing_reqs_flat_iter1.json.
Amazon_Sr__Generative_AI_Strategist__Generative_AI_Innovation_Center_reqs_flat_iter1.json.
Amazon_Web_Services__Inc__Senior_Manger__Partner_Strategy__GenAI_Innovation_Center_reqs_flat_iter1.json.
Amplitude_Marketing_Strategy___Analytics_Manager_reqs_flat_iter1.json.
Blend_Director__AI_Strategy_reqs_flat_iter1.json.
Capital_One_Director__AI_Platforms_reqs_flat_iter1.json.
Deloitte_AI_Data_Specialist_reqs_flat_iter1.json.
Deloitte_Global_Business_Services__GBS__Strategy_Manager_reqs_flat_iter1.json.
Deloitte_Mar

In [None]:
from models.resume_job_description_io_models import Requirements

# Load and validate the JSON data

# file_name = "Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_reqs_flat_iter1.json"
# file_name = "Blend_Director__AI_Strategy_reqs_flat_iter1.json"
# file_name = 'Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_reqs_flat_iter1.json'

file_list = get_file_names(REQS_FILES_ITERATE_1_ANTHROPIC_DIR, True)
print(file_list)

for idx, file in enumerate(file_list, start=1):
    data = load_and_decode_json(file)
    validated_data = Requirements(**data)

    print(f"{idx}. {Path(file).name}")
    print(f"Number of requirements: {len(validated_data.requirements)}")

['C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\requirements\\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_reqs_flat_iter1.json', 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\requirements\\Adobe_Sr__Director__Applied_AI_ML__Discovery__reqs_flat_iter1.json', 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\requirements\\Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_reqs_flat_iter1.json', 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\requirements\\Airtable_Product_Manager__AI_reqs_flat_iter1.json', 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\requirements\\Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_reqs_flat_iter1

### OpenAI Iterate 1

#### Mapping File

In [11]:
from utils.get_file_names import get_file_names
from pathlib import Path

from models.resume_job_description_io_models import JobFileMappings

from evaluation_optimization.create_mapping_file import load_mappings_model_from_json


# from project_config import URL_TO_FILE_MAPPING_FILE_ITERATE_0_OPENAI


directory = ITERATE_1_OPENAI_DIR
mapping_file = directory / mapping_file_name
file_mapping_model = load_mappings_model_from_json(mapping_file)


print("Job URLs:")
print(f"Number of URLs: {len(file_mapping_model.root.keys())}")


for index, url in enumerate(file_mapping_model.root.keys(), start=1):
    print(f"{index}. {url}")

print("\n")

print(
    *(
        url
        for index, url in enumerate(file_mapping_model.root.keys(), start=1)
        if "glean" in str(url)
    ),
    sep="\n",
)


# print("sim_metrics paths:")

# for index, jobpaths in enumerate(file_mapping_model.root.values(), start=1):

#     print(f"{index}. {Path(jobpaths.sim_metrics).name}")
# print()

2025-03-18 14:15:07,624 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\url_to_file_mapping.json
2025-03-18 14:15:07,625 - evaluation_optimization.create_mapping_file - INFO - Loaded and validated mapping file from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\url_to_file_mapping.json


Job URLs:
Number of URLs: 30
1. https://www.google.com/about/careers/applications/jobs/results/113657145978692294-ai-market-intelligence-principal/?src=Online/LinkedIn/linkedin_us&utm_source=linkedin&utm_medium=jobposting&utm_campaign=contract&utm_medium=jobboard&utm_source=linkedin
2. https://www.capitalonecareers.com/job/-/-/234/66270465536?p_sid=ep3Sfxb&p_uid=sDBMWC5VxQ&source=rd_linkedin_job_posting_tm&ss=paid&utm_campaign=capone_all_jobs_24&utm_content=pj_board&utm_medium=jobad&utm_source=linkedin+slotted&dclid=CPGV3bef44gDFUEGTwgd4DoHPg
3. https://boards.greenhouse.io/embed/job_app?token=7600823002&gh_src=ab9f35b82
4. https://www.amazon.jobs/en/jobs/2696123/research-manager-strategy-and-insights-gca-marketing?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid
5. https://www.amazon.jobs/en/jobs/2742527/sr-generative-ai-strategist-generative-ai-innovation-center?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=c

2025-03-18 14:10:10,658 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\url_to_file_mapping.json
2025-03-18 14:10:10,660 - evaluation_optimization.create_mapping_file - INFO - Loaded and validated mapping file from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\url_to_file_mapping.json


Job URLs:
Number of URLs: 30
1. https://www.google.com/about/careers/applications/jobs/results/113657145978692294-ai-market-intelligence-principal/?src=Online/LinkedIn/linkedin_us&utm_source=linkedin&utm_medium=jobposting&utm_campaign=contract&utm_medium=jobboard&utm_source=linkedin
2. https://www.capitalonecareers.com/job/-/-/234/66270465536?p_sid=ep3Sfxb&p_uid=sDBMWC5VxQ&source=rd_linkedin_job_posting_tm&ss=paid&utm_campaign=capone_all_jobs_24&utm_content=pj_board&utm_medium=jobad&utm_source=linkedin+slotted&dclid=CPGV3bef44gDFUEGTwgd4DoHPg
3. https://boards.greenhouse.io/embed/job_app?token=7600823002&gh_src=ab9f35b82
4. https://www.amazon.jobs/en/jobs/2696123/research-manager-strategy-and-insights-gca-marketing?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=cxro&utm_medium=social_media&utm_content=job_posting&ss=paid
5. https://www.amazon.jobs/en/jobs/2742527/sr-generative-ai-strategist-generative-ai-innovation-center?cmpid=SPLICX0248M&utm_source=linkedin.com&utm_campaign=c

#### Responsibilities

In [11]:
from utils.get_file_names import get_file_names
from project_config import RESPS_FILES_ITERATE_1_OPENAI_DIR

directory = RESPS_FILES_ITERATE_1_OPENAI_DIR

file_names = get_file_names(directory_path=directory)

print(f"Responsibilities file names: \n" + "\n".join(name for name in file_names))

Responsibilities file names: 
Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json
Adobe_Sr__Director__Applied_AI_ML__Discovery__resps_nested_iter1.json
Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_resps_nested_iter1.json
Airtable_Product_Manager__AI_resps_nested_iter1.json
Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_resps_iter1.json
Amazon_Research_Manager_-_Strategy_and_Insights_GCA_Marketing_resps_iter1.json
Amazon_Sr__Generative_AI_Strategist__Generative_AI_Innovation_Center_resps_iter1.json
Amazon_Web_Services__Inc__Senior_Manger__Partner_Strategy__GenAI_Innovation_Center_resps_nested_iter1.json
Amplitude_Marketing_Strategy___Analytics_Manager_resps_iter1.json
Blend_Director__AI_Strategy_resps_nested_iter1.json
Capital_One_Director__AI_Platforms_resps_iter1.json
Deloitte_AI_Data_Specialist_resps_nested_iter1.json
Deloitte_Global_Business_Services__GBS__Strategy_Manager_resps_nested_iter1.json
Deloitte_Market_Rese

In [12]:
from models.resume_job_description_io_models import NestedResponsibilities
from pydantic import ValidationError

# Load and validate the JSON data
# file_name = "Blend_Director__AI_Strategy_resps_nested_iter1.json"
file_name = (
    "Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json"
)

for file in file_names:
    # file_name = "Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_resps_nested_iter1.json"
    file_path = RESPS_FILES_ITERATE_1_OPENAI_DIR / file

    try:
        data = load_and_decode_json(file_path)
        validated_data = NestedResponsibilities(**data)

    except ValidationError as e:
        print(e)
        print(e.json)

    # Compute the number of matched requirements per responsibility
    num_requirements_per_responsibility = {
        resp_key: len(resp.optimized_by_requirements)
        for resp_key, resp in validated_data.responsibilities.items()
    }

    # Display some insights
    most_matched_resp = max(
        num_requirements_per_responsibility,
        key=lambda k: num_requirements_per_responsibility[k],
    )

    least_matched_resp = min(
        num_requirements_per_responsibility,
        key=lambda k: num_requirements_per_responsibility[k],
    )

    print(f"File: {file}")
    print(f"Total Responsibilities: {len(num_requirements_per_responsibility)}")
    print(
        f"Most Matched Responsibility: {most_matched_resp} -> Matches: {num_requirements_per_responsibility[most_matched_resp]}"
    )
    print(
        f"Least Matched Responsibility: {least_matched_resp} -> Matches: {num_requirements_per_responsibility[least_matched_resp]}"
    )
    print()

File: Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json
Total Responsibilities: 30
Most Matched Responsibility: 0.responsibilities.0 -> Matches: 10
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 10

File: Adobe_Sr__Director__Applied_AI_ML__Discovery__resps_nested_iter1.json
Total Responsibilities: 30
Most Matched Responsibility: 0.responsibilities.0 -> Matches: 7
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 7

File: Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_resps_nested_iter1.json
Total Responsibilities: 30
Most Matched Responsibility: 0.responsibilities.0 -> Matches: 1
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 1

File: Airtable_Product_Manager__AI_resps_nested_iter1.json
Total Responsibilities: 30
Most Matched Responsibility: 0.responsibilities.0 -> Matches: 8
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 8

1 validation error for NestedResponsibilities
url
  Field re

In [None]:
from utils.generic_utils import read_from_json_file

file = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\responsibilities\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json"
data = read_from_json_file(file)

# Compute the number of matched requirements per responsibility
num_requirements_per_responsibility = {
    resp_key: len(resp.optimized_by_requirements)
    for resp_key, resp in validated_data.responsibilities.items()
}

# Display some insights
most_matched_resp = max(
    num_requirements_per_responsibility,
    key=lambda k: num_requirements_per_responsibility[k],
)

least_matched_resp = min(
    num_requirements_per_responsibility,
    key=lambda k: num_requirements_per_responsibility[k],
)

print(f"File: {file}")
print(f"Total Responsibilities: {len(num_requirements_per_responsibility)}")
print(
    f"Most Matched Responsibility: {most_matched_resp} -> Matches: {num_requirements_per_responsibility[most_matched_resp]}"
)
print(
    f"Least Matched Responsibility: {least_matched_resp} -> Matches: {num_requirements_per_responsibility[least_matched_resp]}"
)
print()

2025-03-11 19:36:41,964 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\responsibilities\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json


File: C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\responsibilities\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_resps_nested_iter1.json
Total Responsibilities: 30
Most Matched Responsibility: 0.responsibilities.0 -> Matches: 8
Least Matched Responsibility: 0.responsibilities.0 -> Matches: 8



In [None]:
from utils.generic_utils import read_from_json_file

file = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_1\requirements\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_reqs_flat_iter1.json"
data = read_from_json_file(file)

data

2025-03-11 19:41:09,229 - utils.generic_utils - INFO - Loaded data from C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_1\requirements\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_reqs_flat_iter1.json


{'url': 'https://www.accenture.com/us-en/careers/jobdetails?id=R00251798_en&src=LINKEDINJP',
 'requirements': {'0.pie_in_the_sky.0': 'Shape vision and create opportunities for data & AI led business reinvention.',
  '0.pie_in_the_sky.1': 'Create strategy for AI-first products and develop commercialization opportunities.',
  '1.down_to_earth.0': '5+ years of experience in business development, client relationship management, or marketing.',
  '1.down_to_earth.1': 'Proficiency in CRM tools such as Salesforce for tracking and analyzing client interactions.',
  '1.down_to_earth.2': 'Ability to build client relationships and credibility as a trusted advisor on how to infuse Data & AI into the business processes or functions.',
  '2.cultural_fit.0': 'Collaborative leadership style with a growth-oriented mindset.',
  '2.cultural_fit.1': 'Ability to mentor and develop high-performing teams.',
  '2.cultural_fit.2': 'Infuse Responsible AI in vision and roadmap, develop plan for leveraging ecosys

#### Requirements

#### Sim Metrics

In [6]:
from utils.get_file_names import get_file_names

sim_dir = SIMILARITY_METRICS_ITERATE_1_OPENAI_DIR

get_file_names(sim_dir)

['Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_sim_metrics_iter1.csv',
 'Airtable_Product_Manager__AI_sim_metrics_iter1 - Copy.csv',
 'Airtable_Product_Manager__AI_sim_metrics_iter1.csv',
 'Amazon_Product_Manager__Artificial_General_Intelligence_-_Data_Services_sim_metrics_iter1.csv',
 'Amazon_Research_Manager_-_Strategy_and_Insights_GCA_Marketing_sim_metrics_iter1.csv',
 'Amazon_Sr__Generative_AI_Strategist__Generative_AI_Innovation_Center_sim_metrics_iter1.csv',
 'Amplitude_Marketing_Strategy___Analytics_Manager_sim_metrics_iter1.csv',
 'Glean_Head_of_Competitive_Intelligence_sim_metrics_iter1.csv',
 'Google_AI_Market_Intelligence_Principal_sim_metrics_iter1.csv',
 'Liberty_Mutual_Insurance_Senior_Manager_I_-_Corporate_Strategy___Research_sim_metrics_iter1.csv',
 'Meta_Product_Strategy_Lead_sim_metrics_iter1.csv',
 'Microsoft_Head_of_Partner_Intelligence_and_Strategy_sim_metrics_iter1.csv']

In [9]:
import pandas as pd


csv_file = sim_dir / r"Glean_Head_of_Competitive_Intelligence_sim_metrics_iter1.csv"

df = pd.read_csv(csv_file)

set(df.requirement_key)
len(df)

330

## Get File List

In [1]:
from utils.get_file_names import get_file_names
from project_config import (
    ITERATE_1_ANTHROPIC_DIR,
    SIMILARITY_METRICS_ITERATE_1_ANTHROPIC_DIR,
)

files_dir = SIMILARITY_METRICS_ITERATE_1_ANTHROPIC_DIR

file_list = get_file_names(files_dir, True)
file_list

['C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\similarity_metrics\\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_sim_metrics_iter1.csv',
 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\similarity_metrics\\Adobe_Sr__Director__Applied_AI_ML__Discovery__sim_metrics_iter1.csv',
 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\similarity_metrics\\Advisor360__Senior_Product_Manager_-_AI_Analytics___Insights_sim_metrics_iter1.csv',
 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\similarity_metrics\\Airtable_Product_Manager__AI_sim_metrics_iter1.csv',
 'C:\\github\\job_bot\\input_output\\evaluation_optimization\\evaluation_optimization_by_anthropic\\iteration_1\\similarity_metrics\\Amazon_Product_Manager__Artificial_General_Intell

## Cross Tab Heatmap

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import textwrap

# Load similarity metrics CSV
file_path = file_list[1]  # Replace with your actual file path
df = pd.read_csv(file_path)

# Pivot the data to match heatmap format
pivot_df = df.pivot(
    index="responsibility", columns="requirement", values="composite_score"
)

# Create the heatmap
fig, ax = plt.subplots(figsize=(20, 12))
cmap = sns.color_palette("coolwarm", as_cmap=True)
sns.heatmap(
    pivot_df,
    annot=False,
    fmt=".2f",
    cmap=cmap,
    linewidths=1,
    linecolor="black",
    cbar=True,
    ax=ax,
)


# Function to wrap text inside heatmap cells
def wrap_text(text, width=20):
    return "\n".join(textwrap.wrap(str(text), width))


# Wrap y-axis labels
wrapped_y_labels = [textwrap.fill(label, width=20) for label in pivot_df.index]
ax.set_yticklabels(wrapped_y_labels, rotation=0)

# Overlay text inside each cell (display composite score + wrapped requirement)
for i, res in enumerate(pivot_df.index):
    for j, req in enumerate(pivot_df.columns):
        match = df[(df["responsibility"] == res) & (df["requirement"] == req)]
        if not match.empty:
            score = match.iloc[0]["composite_score"]
            req_text = wrap_text(match.iloc[0]["requirement"], width=20)
            display_text = f"{score:.2f}\n{req_text}"
            ax.text(
                j + 0.5,
                i + 0.5,
                display_text,
                ha="center",
                va="center",
                fontsize=8,
                color="black",
            )

# Formatting adjustments
ax.set_title("Responsibility vs Requirement Matching Grid (Text Inside Cells)")
ax.set_xlabel("Requirements")
ax.set_ylabel("Responsibilities")

plt.xticks(rotation=45, ha="right")

# Increase left margin
plt.subplots_adjust(left=0.5)
# box = ax.get_position()
# ax.set_position([box.x0 + 0.2, box.y0, box.width, box.height])

plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import textwrap
import numpy as np

# Sample Responsibilities and Requirements
responsibilities = [
    "Led strategic initiatives for IT transformation",
    "Managed global vendor relationships",
    "Optimized business intelligence reporting",
    "Developed AI-driven analytics models",
    "Implemented cloud security protocols",
]

requirements = [
    "Experience in strategic IT leadership",
    "Vendor management expertise",
    "Business intelligence reporting experience",
    "AI and machine learning proficiency",
    "Cloud security best practices",
]

# Generate random similarity scores between 0.5 and 1.0
np.random.seed(42)
data = []
for res in responsibilities:
    for req in requirements:
        data.append(
            {
                "responsibility": res,
                "requirement": req,
                "composite_score": round(np.random.uniform(0.5, 1.0), 2),
            }
        )

# Convert to DataFrame
df_dummy = pd.DataFrame(data)

# Pivot table for heatmap
pivot_df = df_dummy.pivot(
    index="responsibility", columns="requirement", values="composite_score"
)

# Create the heatmap
fig, ax = plt.subplots(figsize=(12, 8))
cmap = sns.color_palette("coolwarm", as_cmap=True)  # Define color scheme

# Generate heatmap
sns.heatmap(
    pivot_df,
    annot=False,
    fmt=".2f",
    cmap=cmap,
    linewidths=1,
    linecolor="black",
    cbar=True,
    ax=ax,
)


# Function to wrap text inside heatmap cells
def wrap_text(text, width=20):
    return "\n".join(textwrap.wrap(str(text), width))


# Overlay text inside each cell (score + requirement)
for i, res in enumerate(pivot_df.index):
    for j, req in enumerate(pivot_df.columns):
        match = df_dummy[
            (df_dummy["responsibility"] == res) & (df_dummy["requirement"] == req)
        ]
        if not match.empty:
            score = match.iloc[0]["composite_score"]
            req_text = wrap_text(
                match.iloc[0]["requirement"], width=20
            )  # Wrap text for better display
            display_text = f"{score:.2f}\n{req_text}"  # Display similarity score + wrapped requirement text

            ax.text(
                j + 0.5,
                i + 0.5,
                display_text,
                ha="center",
                va="center",
                fontsize=8,
                color="black",
            )

# Formatting adjustments
ax.set_title("Dummy Responsibility vs Requirement Heatmap (Text Inside Cells)")
ax.set_xlabel("Requirements")
ax.set_ylabel("Responsibilities")
plt.xticks(rotation=45, ha="right")  # Rotate x-axis labels for better readability
plt.yticks(rotation=0)

# Show the plot
plt.show()

In [None]:
import altair as alt
import pandas as pd
import textwrap

# Load similarity metrics CSV
file_path = file_list[1]  # Replace with your actual file path
df = pd.read_csv(file_path)

# Create a wrapped version of the requirement text (width=20)
df["wrapped_requirement"] = df["requirement"].apply(
    lambda x: "\n".join(textwrap.wrap(str(x), width=20))
)
df["score_text"] = df["composite_score"].apply(lambda x: f"{x:.2f}")
df["label"] = df["score_text"] + "\n" + df["wrapped_requirement"]

# Build the heatmap chart
heatmap = (
    alt.Chart(df)
    .mark_rect()
    .encode(
        x=alt.X("requirement:N", title="Requirements", axis=alt.Axis(labelAngle=45)),
        y=alt.Y("responsibility:N", title="Responsibilities"),
        color=alt.Color(
            "composite_score:Q",
            scale=alt.Scale(scheme="redblue"),
            title="Composite Score",
        ),
    )
)

# Build the text overlay chart
# The key here is using the "detail" encoding so that each row is rendered individually.
text_overlay = (
    alt.Chart(df)
    .mark_text(
        fontSize=8,
        color="black",
        align="left",  # Set left alignment (change to 'center' if preferred)
        baseline="middle",
    )
    .encode(
        x=alt.X("requirement:N"),
        y=alt.Y("responsibility:N"),
        text=alt.Text("label:N"),
        detail="label:N",  # Force each label to be treated as a distinct detail
    )
)

# Combine the heatmap and text overlay
chart = (
    (heatmap + text_overlay)
    .properties(
        width=600,
        height=400,
        title="Responsibility vs Requirement Matching Grid (Text Inside Cells)",
    )
    .configure_view(strokeWidth=0)
)

chart.display()

# Make Cross Tab in Excel Instead

In [None]:
import pandas as pd
import argparse


def create_pivot_table(sim_metrics_csv, output_csv):
    """
    Reads the similarity metrics CSV and creates a pivot table:
      - Index: responsibility_key
      - Columns: requirement_key
      - Values: responsibility
    Then saves it as a new CSV file.
    """
    # Load CSV file
    df = pd.read_csv(sim_metrics_csv)
    # display(df.head(5))

    # Pivot table with responsibility_key as index, requirement_key as columns, and responsibility as values
    # Multi-index for columns: (requirement_key, requirement)
    pivot_table = df.pivot(
        index="responsibility_key",
        columns=["requirement_key", "requirement"],  # Multi-index for columns
        values=["responsibility", "composite_score"],  # Multi-values in pivot
    )

    # Fill missing values with empty string
    pivot_table = pivot_table.fillna("")

    # Save to CSV
    pivot_table.to_csv(output_csv)

    print(f"Pivot table saved to {output_csv}")

    display(pivot_table.head(10))


def main():
    # Define input and output file paths
    input_csv = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\similarity_metrics\Microsoft_Head_of_Partner_Intelligence_and_Strategy_sim_metrics_iter1.csv"
    output_csv = (
        r"C:\github\job_bot\data\matching_examples\resp_vs_reqs_pivot_output_1.csv"
    )

    # Call the function
    create_pivot_table(input_csv, output_csv)


if __name__ == "__main__":
    main()

### With Color Fromatting

In [None]:
import pandas as pd
from openpyxl import load_workbook
from openpyxl.styles import PatternFill
from openpyxl.formatting.rule import ColorScaleRule


def create_pivot_table(sim_metrics_csv, output_excel):
    """
    Reads the CSV and creates a pivot table:
      - Index: responsibility_key
      - Columns: (requirement_key, requirement)
      - Values: responsibility, composite_score
    Then saves it as an Excel file and applies conditional formatting.
    """
    # Load CSV file
    df = pd.read_csv(sim_metrics_csv)

    # Format responsibility text based on composite_score
    def format_responsibility(row):
        if pd.isna(row["composite_score"]):  # Handle NaN values
            return row["responsibility"]
        elif row["composite_score"] >= 0.75:
            return f"⭐ {row['responsibility']}"  # Highlight important ones
        elif row["composite_score"] < 0.3:
            return f"❌ {row['responsibility']}"  # Mark low ones
        return row["responsibility"]

    df["formatted_responsibility"] = df.apply(format_responsibility, axis=1)

    # Create pivot table
    pivot_table = df.pivot_table(
        index="responsibility_key",
        columns=["requirement_key", "requirement"],
        values=["formatted_responsibility", "composite_score"],
        aggfunc="first",
    )

    pivot_table = pivot_table.fillna("")
    pivot_table.to_excel(output_excel)

    # Apply Conditional Formatting
    apply_conditional_formatting(output_excel)
    print(f"Pivot table saved and formatted at: {output_excel}")


def apply_conditional_formatting(excel_file):
    """Finds composite_score columns in the pivot and applies a color scale formatting."""
    wb = load_workbook(excel_file)
    ws = wb.active

    # Define a Color Scale Rule (Red - Yellow - Green)
    color_rule = ColorScaleRule(
        start_type="num",
        start_value=0,
        start_color="FF6347",  # Red
        mid_type="num",
        mid_value=0.5,
        mid_color="FFFF00",  # Yellow
        end_type="num",
        end_value=1,
        end_color="00FF00",  # Green
    )

    # Detect composite_score columns explicitly
    for col in range(2, ws.max_column + 1):  # Columns start at 2
        header = ws.cell(row=1, column=col).value  # Get column header
        if header and "composite_score" in str(header):  # Ensure it's a valid column
            col_letter = ws.cell(row=1, column=col).column_letter
            ws.conditional_formatting.add(
                f"{col_letter}2:{col_letter}{ws.max_row}", color_rule
            )

    wb.save(excel_file)
    print("✅ Conditional formatting applied successfully!")


def main():
    input_csv = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\similarity_metrics\Microsoft_Head_of_Partner_Intelligence_and_Strategy_sim_metrics_iter1.csv"
    output_excel = (
        r"C:\github\job_bot\data\matching_examples\resp_vs_reqs_pivot_output_1.xlsx"
    )

    create_pivot_table(input_csv, output_excel)


if __name__ == "__main__":
    main()

#### With Xlwings

In [None]:
import pandas as pd
import xlwings as xw


def create_pivot_table(sim_metrics_csv, output_excel):
    """
    Reads the CSV and creates a pivot table:
      - Index: responsibility_key
      - Columns: (requirement_key, requirement)
      - Values: responsibility, composite_score
    Then saves it as an Excel file and applies conditional formatting using xlwings.
    """
    # Load CSV file
    df = pd.read_csv(sim_metrics_csv)

    # Format responsibility text based on composite_score
    def format_responsibility(row):
        if pd.isna(row["composite_score"]):  # Handle NaN values
            return row["responsibility"]
        elif row["composite_score"] >= 0.75:
            return f"{row['responsibility']}"  # Highlight important ones
        elif row["composite_score"] < 0.3:
            return f"{row['responsibility']}"  # Mark low ones
        return row["responsibility"]

    df["formatted_responsibility"] = df.apply(format_responsibility, axis=1)

    # Create pivot table
    pivot_table = df.pivot_table(
        index="responsibility_key",
        columns=["requirement_key", "requirement"],
        values=["formatted_responsibility", "composite_score"],
        aggfunc="first",
    )

    pivot_table = pivot_table.fillna("")
    pivot_table.to_excel(output_excel)

    # Apply Conditional Formatting with xlwings
    apply_xlwings_formatting(output_excel)
    print(f"Pivot table saved and formatted at: {output_excel}")


def apply_xlwings_formatting(excel_file):
    """Applies conditional formatting to value cells (not headers) based on their composite_score."""
    app = xw.App(visible=True)  # Keep Excel open for debugging
    wb = xw.Book(excel_file)
    ws = wb.sheets[0]

    # Detect last row and last column
    last_row = ws.range("A1").expand("down").last_cell.row
    last_col = ws.range("A1").expand("right").last_cell.column

    # Iterate through all data cells (excluding headers)
    for row in range(2, last_row + 1):  # Start from row 2 to avoid header
        for col in range(2, last_col + 1):  # Start from col 2 to avoid row labels
            cell = ws.cells(row, col)
            try:
                value = float(cell.value)  # Convert value to float
                if value >= 0.75:
                    cell.api.Interior.Color = xw.utils.rgb_to_int(
                        (0, 255, 0)
                    )  # Green for high scores
                elif value < 0.3:
                    cell.api.Interior.Color = xw.utils.rgb_to_int(
                        (255, 0, 0)
                    )  # Red for low scores
                else:
                    cell.api.Interior.Color = xw.utils.rgb_to_int(
                        (255, 255, 0)
                    )  # Yellow for mid-range scores
            except (ValueError, TypeError):
                pass  # Ignore non-numeric values

    wb.save()
    wb.close()
    app.quit()
    print("✅ Conditional formatting applied to value cells!")


def main():
    input_csv = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_openai\iteration_1\similarity_metrics\Microsoft_Head_of_Partner_Intelligence_and_Strategy_sim_metrics_iter1.csv"
    output_excel = (
        r"C:\github\job_bot\data\matching_examples\resp_vs_reqs_pivot_output_1.xlsx"
    )

    create_pivot_table(input_csv, output_excel)


if __name__ == "__main__":
    main()

In [None]:
input_csv = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_1\responsibilities\older_files\PwC_Strategy__Manager_-_Digital_Value_Transformation_Contact_Center_resps_nested_iter1.json"

with open(input_csv, "r", encoding="utf-8") as f:
    for _ in range(30):  # Print first 30 lines
        print(f.readline().strip())

In [None]:
#!/usr/bin/env python3
import pandas as pd
import argparse
import os
import xlsxwriter


def create_two_row_header_excel(sim_metrics_csv, output_file):
    """
    Reads the similarity metrics CSV and creates an Excel file with:
      - Row 1: "Resp Key / Req Key" + requirement keys
      - Row 2: "Requirements" + requirement texts
      - Rows 3+: One row per responsibility key, showing matched responsibility texts
    """
    # 1) Load the similarity metrics CSV
    df = pd.read_csv(sim_metrics_csv)

    # 2) Extract unique requirements and map to their texts
    req_map = df.groupby("requirement_key")["requirement"].first().to_dict()
    req_keys = sorted(req_map.keys())  # Ordered list of requirement keys
    req_texts = [req_map[k] for k in req_keys]  # Corresponding requirement texts

    # 3) Extract unique responsibilities
    resp_keys = sorted(df["responsibility_key"].unique())

    # 4) Create a dataframe to ensure all `requirement_keys` appear
    full_pivot = pd.DataFrame(index=resp_keys, columns=req_keys).fillna("")

    # 5) Pivot the table to get optimized_text per (responsibility, requirement) pair
    pivot = df.pivot(
        index="responsibility_key", columns="requirement_key", values="responsibility"
    )

    # 6) Merge the pivoted data into `full_pivot` to retain all columns
    full_pivot.update(pivot)

    # 7) Reset index so responsibility_key becomes a column
    full_pivot = full_pivot.reset_index()

    # 8) Prepare the first two header rows (Multi-layer Headers)
    header1 = ["Resp Key / Req Key"] + req_keys  # First row (Keys)
    header2 = ["Requirements"] + req_texts  # Second row (Descriptions)

    # 9) Write to Excel using xlsxwriter (Multi-layer Headers)
    workbook = xlsxwriter.Workbook(output_file)
    worksheet = workbook.add_worksheet("CrossTab")

    # Apply formatting
    bold_format = workbook.add_format(
        {"bold": True, "bg_color": "#002b36", "font_color": "white"}
    )
    wrap_format = workbook.add_format({"text_wrap": True, "align": "top"})

    # Merge header rows for multi-layer effect
    worksheet.write_row(0, 0, header1, bold_format)  # Row 1: Requirement Keys
    worksheet.write_row(1, 0, header2, wrap_format)  # Row 2: Requirement Texts

    # Write responsibilities data (row 3+)
    for row_idx, row in enumerate(full_pivot.itertuples(index=False), start=2):
        worksheet.write_row(row_idx, 0, row, wrap_format)

    # Adjust column widths for readability
    worksheet.set_column(0, 0, 25)  # Responsibility Key column
    worksheet.set_column(1, len(req_keys), 50)  # Requirement columns

    workbook.close()
    print(f"Excel file created: {output_file}")


def main():
    parser = argparse.ArgumentParser(
        description="Create a 2-row-header Excel from similarity metrics CSV."
    )
    parser.add_argument(
        "--sim_metrics_csv",
        required=True,
        help="Path to the similarity metrics CSV file",
    )
    parser.add_argument(
        "--output",
        required=True,
        help="Path to the output Excel file (e.g., output.xlsx)",
    )
    args = parser.parse_args()

    # Create the output directory if it doesn't exist
    output_dir = os.path.dirname(args.output)
    if output_dir and not os.path.exists(output_dir):
        os.makedirs(output_dir)

    create_two_row_header_excel(args.sim_metrics_csv, args.output)


if __name__ == "__main__":
    # Example file paths (adjust these as needed)
    input_csv = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_1\similarity_metrics\Thermo_Fisher_Scientific_Market___Competitive_Intelligence_Manager_sim_metrics_iter1.csv"
    output_excel = (
        r"C:\github\job_bot\data\matching_examples\resp_vs_reqs_crosstab_output_1.xlsx"
    )

    # Directly call the function with desired column names:
    create_two_row_header_excel(
        sim_metrics_csv=input_csv,
        output_file=output_excel,
    )

In [None]:
from pathlib import Path
import pandas as pd


def create_cross_tab(sim_metric_csv_file: Path, output_excel_file: Path):
    # Load the CSV file
    df = pd.read_csv(sim_metric_csv_file)

    # Create a pivot table (cross-tab) based on responsibility_key and requirement_key
    cross_tab = pd.pivot_table(
        df,
        values="responsibility",
        index="responsibility_key",
        columns="requirement_key",
        aggfunc=lambda x: " ".join(x),
    )

    # Extract unique requirements and their corresponding keys
    requirements = (
        df[["requirement_key", "requirement"]]
        .drop_duplicates()
        .set_index("requirement_key")["requirement"]
    )

    # Create a DataFrame for the requirements row with the same columns as cross_tab
    requirements_row = pd.DataFrame([requirements], columns=cross_tab.columns)

    # Combine the requirements row with the cross-tab table
    cross_tab_with_requirements = pd.concat([requirements_row, cross_tab], axis=0)

    # Reset the index to make the table cleaner
    cross_tab_with_requirements.reset_index(drop=True, inplace=True)

    # Save the cross-tab table with requirements to an Excel file
    cross_tab_with_requirements.to_excel(output_excel_file, index=False)

    print(f"Cross-tab table with requirements saved to {output_excel_file}")


# Input and output file paths
input_csv = r"C:\github\job_bot\input_output\evaluation_optimization\evaluation_optimization_by_anthropic\iteration_1\similarity_metrics\Thermo_Fisher_Scientific_Market___Competitive_Intelligence_Manager_sim_metrics_iter1.csv"
output_excel = (
    r"C:\github\job_bot\data\matching_examples\resp_vs_reqs_crosstab_output_1.xlsx"
)

# Create the cross-tab table
create_cross_tab(Path(input_csv), Path(output_excel))

In [None]:
responsibilities="{'0.responsibilities.0': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Led strategic initiatives to optimize the service partner network for a prominent international IT company in the Asia Pacific market, resulting in enhanced local execution outcomes. Possess extensive experience in analytical roles.'), '1.down_to_earth.1': OptimizedText(optimized_text="Led the optimization of a major global IT vendor's service partner ecosystem in the Asia Pacific region, resulting in improved local implementation outcomes. Leveraged extensive client-facing experience to drive these strategic enhancements."), '1.down_to_earth.2': OptimizedText(optimized_text='Led strategic consulting and analytics initiatives for a leading global IT vendor, driving enhancements to their partner ecosystem in the Asia Pacific region and delivering improved local implementation outcomes.'), '1.down_to_earth.3': OptimizedText(optimized_text="Led the optimization of a major global IT vendor's service partner ecosystem in the Asia Pacific region, driving improved local implementation results."), '2.other.0': OptimizedText(optimized_text='Led strategic initiatives that optimized the service partner network of a leading global IT vendor in the Asia Pacific region, resulting in enhanced local implementation and improved client outcomes.')}), '0.responsibilities.1': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Led the strategic growth of a leading international services provider by identifying and scaling new engineering service opportunities in key emerging markets.'), '1.down_to_earth.1': OptimizedText(optimized_text="Led the evaluation and scaling of new engineering service opportunities in vital emerging markets to support a leading international services provider's growth strategy."), '1.down_to_earth.2': OptimizedText(optimized_text='Led strategic analysis to identify and capitalize on new engineering service opportunities in key emerging markets, driving growth for a leading international services provider.'), '1.down_to_earth.3': OptimizedText(optimized_text='Led the evaluation and scaling of new engineering service opportunities in vital emerging markets to support the growth strategy of a U.S.-based international services provider.'), '2.other.0': OptimizedText(optimized_text='Led the expansion strategy for a leading international services provider by identifying and scaling new engineering service opportunities in key emerging markets.')}), '0.responsibilities.2': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Authored impactful industry reports analyzing engineering services merger and acquisition trends, providing strategic insights into deal sizes, capability gaps, and emerging opportunities to inform decisions on IT and operational technology convergence.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led the co-authorship of an industry-recognized report on M&A trends in the engineering services sector, providing strategic insights into deal sizes, capability gaps, and emerging opportunities to guide decision-making on IT and operational technology convergence.'), '1.down_to_earth.2': OptimizedText(optimized_text='Authored insightful industry reports analyzing mergers and acquisitions in the engineering services sector. Provided comprehensive insights into deal sizes, capability gaps, and emerging opportunities, informing strategic decisions on IT and operational technology convergence.'), '1.down_to_earth.3': OptimizedText(optimized_text='Led the development of an industry-recognized report on mergers and acquisitions in the engineering services sector, delivering in-depth analysis of deal dynamics, capability gaps, and emerging opportunities at the intersection of IT and operational technology. Leveraged these insights to drive strategic planning and execution.'), '2.other.0': OptimizedText(optimized_text='Authored a comprehensive industry report on mergers and acquisitions in the engineering services sector, providing in-depth analysis of deal sizes, capability gaps, and emerging opportunities. The report informed strategic decisions regarding the convergence of information technology and operational technology.')}), '0.responsibilities.3': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Led efforts to enhance data quality and consistency through thorough financial analysis, standardized methodologies, and collaborative vendor engagements.'), '1.down_to_earth.1': OptimizedText(optimized_text='Drove the enhancement of data quality and consistency by integrating thorough financial analysis, standardizing methodologies, and conducting in-depth vendor engagements.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led consultative analytics engagements, leveraging financial analysis, standardized methodologies, and vendor collaborations to enhance data quality and consistency.'), '1.down_to_earth.3': OptimizedText(optimized_text='Transformed data quality and consistency by integrating financial analysis, standardizing methodologies, and engaging vendors.'), '2.other.0': OptimizedText(optimized_text='Drove impactful improvements in data quality and consistency by integrating thorough financial analysis, standardizing methodologies, and conducting in-depth vendor engagements.')}), '0.responsibilities.4': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Led the optimization of resource allocation through centralization of tasks, transitioning a significant portion of work to an offshore team in India, which resulted in increased efficiency and cost savings.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led offshore teams, optimized resource utilization, and enhanced operational efficiency. Extensive client-facing experience.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led the centralization of over 40% of tasks to an offshore team in India, optimizing resource allocation and driving significant improvements in team productivity and efficiency.'), '1.down_to_earth.3': OptimizedText(optimized_text='Centralized over 40% of tasks to an offshore team, optimizing resource allocation for enhanced efficiency.'), '2.other.0': OptimizedText(optimized_text='Centralized over 40% of tasks to an offshore team, optimizing resource allocation and supporting product development through client services feedback.')}), '0.responsibilities.5': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Automated and streamlined internal processes using Python, driving over 40% improvements in report preparation and data analysis.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led the development of custom Python tools that automated and optimized internal workflows, resulting in a 40% decrease in report generation and data analysis time. Leveraged extensive experience collaborating with clients to deliver tailored solutions.'), '1.down_to_earth.2': OptimizedText(optimized_text='Developed custom Python tools that streamlined and accelerated internal processes, delivering over 40% reduction in report preparation and data analysis time. Led consultative analytics engagements with clients.'), '1.down_to_earth.3': OptimizedText(optimized_text='Led the development of custom Python tools that streamlined and accelerated internal processes, driving significant improvements in efficiency across report preparation and data analysis.'), '2.other.0': OptimizedText(optimized_text='Leveraged advanced Python programming skills to create custom tools that streamlined internal operations, driving over 40% improvements in report preparation and data analysis efficiency.')}), '0.responsibilities.6': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Analytical leader with a proven track record of pioneering groundbreaking industry research and publications. Collaborated extensively with engineering services teams to develop market forecasts, analyze the impact of COVID-19, and identify emerging trends in mergers and acquisitions within the engineering services sector.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led the engineering services research team in pioneering the engineering services tracker, authored impactful publications on market forecasts, the impact of COVID-19 on services, and trends in mergers and acquisitions within the engineering services industry. Demonstrated substantial client-facing experience.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led the development of the engineering services tracker and authored influential publications on market forecasts, the impact of COVID-19 on services, and trends in M&A within the engineering services industry.'), '1.down_to_earth.3': OptimizedText(optimized_text='Pioneered industry-leading engineering services tracker and authored impactful publications on market forecasts, COVID-19 impact, and engineering services M&A trends. Demonstrated ability to effectively manage complex projects and deliver valuable insights to stakeholders.'), '2.other.0': OptimizedText(optimized_text='Led engineering services research team to pioneer engineering services tracker, authored influential publications on market forecasts, the impact of COVID-19 on services, and industry trends. Leveraged these insights to drive product development through client services feedback.')}), '0.responsibilities.7': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Seasoned data analyst who led global teams (US, Canada, Latin America, Europe, MEA, APAC) to maintain data integrity, achieve objectives, and share expertise. Adept at identifying and implementing innovative analytical tools and techniques.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led collaborative efforts with global analyst teams to ensure data quality, meet deadlines, share knowledge, and implement best practices and new tools.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led consultative analytics engagements with global analyst teams. Ensured data quality, met deadlines, and shared knowledge, best practices, and methodology to procure new tools.'), '1.down_to_earth.3': OptimizedText(optimized_text='Collaborated with global analyst teams to ensure data quality, meet deadlines, share knowledge, implement best practices, and procure new tools.'), '2.other.0': OptimizedText(optimized_text='Led international analyst teams to maintain data integrity, achieve project milestones, exchange expertise and best practices, and acquire innovative tools in support of product development initiatives.')}), '1.responsibilities.0': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Seasoned leader with a proven record in full P&L oversight, including budgeting, HR, vendor relations, partnerships, research, and business development. Drove significant growth, expanding program bookings by over 50%. Extensive experience in analytical roles, with a strong foundation in data-driven decision-making and strategic planning.'), '1.down_to_earth.1': OptimizedText(optimized_text='Skilled leader with a proven track record in full P&L management, overseeing budgeting, HR, vendor relations, partnerships, research, and business development. Drove significant growth, expanding program bookings by over 50%. Excelled in client-facing roles, delivering exceptional service and driving impactful business results.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led full P&L management, including budgeting, HR, vendor relationships, partnerships, research, and business development. Drove significant expansion, growing program bookings by over 50%.'), '1.down_to_earth.3': OptimizedText(optimized_text='Adept leader with a proven record of full P&L management, overseeing diverse responsibilities including budgeting, human resources, vendor relationships, strategic partnerships, research, and business development. Drove significant growth, expanding program bookings by over 50%, showcasing exceptional project management skills.'), '2.other.0': OptimizedText(optimized_text='Led full profit and loss responsibilities, excelling at budgeting, human resources, vendor relationships, partnerships, research, and business development. Drove significant growth, increasing program bookings by over 50%.')}), '1.responsibilities.1': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Directed and expanded diverse global research teams, leading a global team of over 15 professionals across the US, India, and Mexico.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led a global research team, leveraging diverse perspectives to drive innovation across multiple locations.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led and grew a diverse global research team spanning multiple locations.'), '1.down_to_earth.3': OptimizedText(optimized_text='Led and managed diverse, global research teams to drive successful project outcomes.'), '2.other.0': OptimizedText(optimized_text='Led and expanded a diverse, global research team across strategic locations, enabling informed product development through client feedback.')}), '1.responsibilities.2': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Managed cross-functional teams to deliver innovative software solutions.'), '1.down_to_earth.1': OptimizedText(optimized_text='Managed cross-functional teams to deliver innovative software solutions for clients.'), '1.down_to_earth.2': OptimizedText(optimized_text='Spearheaded cross-functional teams to ideate, build, and deploy innovative software solutions that addressed client needs. Led an external software development team to build and implement new tools.'), '1.down_to_earth.3': OptimizedText(optimized_text='Spearheaded the development and implementation of innovative software tools and solutions by leading an external software development team.'), '2.other.0': OptimizedText(optimized_text='Collaborated with an external software development team to build and implement new tools that enhanced product development efforts.')}), '1.responsibilities.3': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Experienced leader who championed cutting-edge technology initiatives, including machine learning, natural language processing, chatbots, ontologies, web scraping, APIs, and user experience design. Demonstrated strong analytical skills with a data-driven approach honed over 8+ years.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led impactful and innovative technology projects leveraging cutting-edge tools like machine learning, natural language processing, chatbots, ontologies, web scraping, APIs, and user experience design. Collaborated extensively with stakeholders to deliver tailored solutions that exceeded expectations.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led innovative technology initiatives that leveraged cutting-edge tools and techniques, including machine learning, natural language processing, chatbots, ontologies, web scraping, APIs, and user experience design.'), '1.down_to_earth.3': OptimizedText(optimized_text='Led the successful implementation of cutting-edge technology initiatives, including machine learning, natural language processing, chatbots, ontology development, web scraping, API integration, and user experience design. Extensive experience in managing complex technology projects and delivering innovative solutions that drive business growth.'), '2.other.0': OptimizedText(optimized_text='Spearheaded innovative product development and enhanced user experience by leveraging cutting-edge technologies, including machine learning, natural language processing, chatbots, ontologies, web scraping, and APIs.')}), '1.responsibilities.4': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Developed and led a team to create Python-based automated tools that streamlined report preparation, driving 40% time savings.'), '1.down_to_earth.1': OptimizedText(optimized_text='Drove development of automated Python tools, reducing report preparation time by 40%.'), '1.down_to_earth.2': OptimizedText(optimized_text='Developed and implemented automated Python tools, driving a 40% reduction in report preparation time.'), '1.down_to_earth.3': OptimizedText(optimized_text='Adept project manager who led teams in developing custom Python-based tools, enhancing reporting efficiency by 40%.'), '2.other.0': OptimizedText(optimized_text='Developed Python-based automated solutions that streamlined report generation and enhanced overall operational efficiency.')}), '1.responsibilities.5': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Pioneered cutting-edge technologies, leading the implementation of machine learning, chatbots, APIs, and ontology development. Excels in analytical roles, delivering impactful solutions that drive business success.'), '1.down_to_earth.1': OptimizedText(optimized_text='Pioneered cutting-edge technology projects, including deploying machine learning, chatbots, APIs, and ontology development. Delivered client-focused solutions with a proven track record.'), '1.down_to_earth.2': OptimizedText(optimized_text='Pioneering technology executive with a track record of leading transformative initiatives, including the implementation of cutting-edge solutions such as machine learning, chatbots, APIs, and ontology development. Excels at driving strategic analytics engagements and delivering impactful consultative services to clients.'), '1.down_to_earth.3': OptimizedText(optimized_text='Innovative technology leader who pioneered cutting-edge solutions including machine learning, chatbots, APIs, and ontology development. Skilled at driving high-impact projects and delivering measurable results.'), '2.other.0': OptimizedText(optimized_text='Led the implementation of machine learning, launch of a chatbot, development of APIs, and construction of an ontology to drive innovation with emerging technologies. Collaborated closely with clients to provide valuable feedback that informed and supported ongoing product development efforts.')}), '1.responsibilities.6': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Leveraged analytical expertise to advise services firms on deal pursuit and sales orchestration strategies, driving business development and sales execution.'), '1.down_to_earth.1': OptimizedText(optimized_text='Guided professional services firms in developing their deal pursuit and sales strategy.'), '1.down_to_earth.2': OptimizedText(optimized_text='Advised services firms on deal pursuit and sales orchestration strategies, providing strategic guidance and expertise.'), '1.down_to_earth.3': OptimizedText(optimized_text='Proven leader who advised services firms on developing and executing effective deal pursuit and sales strategies.'), '2.other.0': OptimizedText(optimized_text='Drove deal pursuit and sales orchestration strategies for professional services firms, leveraging client insights to inform product development.')}), '1.responsibilities.7': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Advised software vendors on partnership strategy, leveraging extensive analytical expertise to provide strategic advisory services.'), '1.down_to_earth.1': OptimizedText(optimized_text='Advised software vendors on strategic partnerships and delivered client-facing services.'), '1.down_to_earth.2': OptimizedText(optimized_text='Guided software vendors on partnership opportunities and drove consultative analytics engagements with clients.'), '1.down_to_earth.3': OptimizedText(optimized_text='Guided software vendors on partnership strategy, leveraging extensive experience to deliver strategic guidance and drive successful initiatives.'), '2.other.0': OptimizedText(optimized_text='Guided software vendors on strategic services partnerships, driving successful client engagements.')}), '1.responsibilities.8': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Prolific content creator who authored reports, blogs, presentations, and custom research. Leveraged industry insights to drive strategic decision-making, analyzing go-to-market strategies, deal signing, renewal analysis, buyer studies, and technology trends (cloud, AI, ML, digital, etc.). Demonstrated extensive experience in an analytical role.'), '1.down_to_earth.1': OptimizedText(optimized_text='Prolific author of reports, blogs, presentations, and custom research. Adept at analyzing go-to-market strategies, deal signing, renewal trends, buyer behavior, and the adoption of emerging technologies such as cloud, AI, ML, and digital solutions. Proven track record of providing valuable industry insights and trend analysis to clients.'), '1.down_to_earth.2': OptimizedText(optimized_text='Seasoned professional who authors reports, blogs, presentations, and custom research. Adept at analyzing go-to-market strategies, deal signing, renewal trends, buyer behavior, and technology adoptions (cloud, AI, ML, digital, etc.), as well as identifying industry trends. Skilled in delivering consultative analytics engagements to clients.'), '1.down_to_earth.3': OptimizedText(optimized_text='Authored impactful reports, blogs, presentations, and custom research on go-to-market strategy, deal analysis, buyer studies, and industry trends. Leveraged emerging technologies like cloud, AI, and ML to deliver actionable insights.'), '2.other.0': OptimizedText(optimized_text='Accomplished professional who has authored impactful reports, blogs, presentations, and custom research projects. Expertise spans developing go-to-market strategies, conducting deal and renewal analyses, executing buyer studies, and analyzing technology adoption trends (e.g., cloud, AI, ML, digital). Regularly provided valuable insights and feedback to drive product development efforts.')}), '2.responsibilities.0': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Led quarterly webinars analyzing industry trends in outsourcing and managed services.'), '1.down_to_earth.1': OptimizedText(optimized_text='Conducted quarterly webinars on outsourcing and managed services trends.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led quarterly webinar series to present industry insights and best practices to clients.'), '1.down_to_earth.3': OptimizedText(optimized_text='Seasoned professional who has delivered quarterly webinars showcasing industry insights and best practices on outsourcing and managed services signing trends.'), '2.other.0': OptimizedText(optimized_text='Conducted quarterly webinars to share industry insights and client feedback, driving product development initiatives.')}), '2.responsibilities.1': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Authored insightful pursuit strategy reports and industry trend research.'), '1.down_to_earth.1': OptimizedText(optimized_text='Authored compelling pursuit strategy reports and conducted in-depth industry trend research, leveraging a strong background in client-facing roles.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led consultative analytics engagements and produced industry-leading research reports.'), '1.down_to_earth.3': OptimizedText(optimized_text='Authored strategic planning and industry analysis reports, demonstrating a proven track record in project management.'), '2.other.0': OptimizedText(optimized_text='Drove product development by leveraging client feedback and industry research.')}), '3.responsibilities.0': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Designed and architected complex database systems, integrating diverse data sources and enhancing data quality through deduplication initiatives. Demonstrated a proven track record in analytical roles over multiple years.'), '1.down_to_earth.1': OptimizedText(optimized_text='Designed and architected a complex company database, integrating external and internal data sources to significantly reduce data duplication. Led client-facing initiatives throughout my career, demonstrating extensive experience in this area.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led the technical design and architecture of a large-scale enterprise database, integrating multiple data sources to enhance data integrity and streamline operations. Demonstrated extensive expertise in complex data architecture and integration, collaborating with stakeholders to deliver impactful solutions that reduced data duplication by 50%.'), '1.down_to_earth.3': OptimizedText(optimized_text='Designed and architected sophisticated database solutions, integrating diverse data sources to enhance integrity and optimize management processes. Reduced data duplication by 50% in a complex company database with 100K+ unique records, seamlessly integrating DnB API and internal databases.'), '2.other.0': OptimizedText(optimized_text='Led the design and implementation of a comprehensive database system, consolidating diverse data sources to enhance data quality and streamline operations.')}), '3.responsibilities.1': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Drove implementation of process automation solutions that boosted productivity across multiple industries.'), '1.down_to_earth.1': OptimizedText(optimized_text='Results-driven professional with a proven track record of leading successful client-facing projects, including managing the implementation of two Appian solutions that enhanced team productivity by 20 to 30%.'), '1.down_to_earth.2': OptimizedText(optimized_text='Driven leader who managed multiple Appian implementations that delivered substantial productivity gains for client teams.'), '1.down_to_earth.3': OptimizedText(optimized_text='Accomplished project manager who led two successful Appian implementations that drove 20-30% improvements in team productivity.'), '2.other.0': OptimizedText(optimized_text='Led two Appian implementations that drove 20-30% improvements in team productivity.')}), '3.responsibilities.2': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Managed daily content operations, including leading a team of offshore and nearshore content team, as well as other sales and research related activities. Seasoned professional with a proven track record in managing content operations, leading cross-functional teams, and supporting sales and research initiatives.'), '1.down_to_earth.1': OptimizedText(optimized_text='Directed daily content operations, leading a team of offshore and nearshore content specialists, and supporting sales and research initiatives. Demonstrated extensive client-facing expertise.'), '1.down_to_earth.2': OptimizedText(optimized_text='Managed daily content operations, leading a team of offshore and nearshore content professionals, and overseeing a range of sales and research-related initiatives.'), '1.down_to_earth.3': OptimizedText(optimized_text='Managed daily content operations, including leading a team of offshore and nearshore content professionals as well as sales and research-related initiatives. Skilled at project management and delivering high-quality results.'), '2.other.0': OptimizedText(optimized_text='Led a talented content team to drive daily operations, collaborating with sales and research to support strategic initiatives.')}), '3.responsibilities.3': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Analyzed and modeled financials for 20-30 IT vendors and over 1,500 services contracts, delivering critical insights to support vendor and contract management.'), '1.down_to_earth.1': OptimizedText(optimized_text="Analyzed and modeled financials for 20 to 30 IT vendors' diverse portfolios. Reviewed and negotiated over 1,500 service contracts with extensive client-facing experience."), '1.down_to_earth.2': OptimizedText(optimized_text='Analyzed and modeled financials for 20 to 30 IT vendors and over 1,500 service contracts to support consultative engagements.'), '1.down_to_earth.3': OptimizedText(optimized_text='Analyzed and modeled financial data for 20 to 30 IT vendors and over 1,500 services contracts, leveraging insights to drive successful contract management.'), '2.other.0': OptimizedText(optimized_text='Analyzed and modeled financial data for 20 to 30 IT vendors and over 1,500 service contracts to support product development efforts by gathering and incorporating client services feedback.')}), '3.responsibilities.4': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Experienced data integration project manager who led the successful delivery of mission-critical platform initiatives.'), '1.down_to_earth.1': OptimizedText(optimized_text='Led three major data integration projects critical to the successful launch of a new platform. Adept at delivering high-impact client-facing solutions.'), '1.down_to_earth.2': OptimizedText(optimized_text='Led multiple data integration projects critical to the successful launch of a new enterprise platform, demonstrating strong experience managing client-facing analytics engagements.'), '1.down_to_earth.3': OptimizedText(optimized_text='Led the successful delivery of three mission-critical data integration projects instrumental in launching the new platform.'), '2.other.0': OptimizedText(optimized_text='Led the successful implementation of three mission-critical data integration projects that were instrumental in launching a new platform.')}), '3.responsibilities.5': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Established research leader who advanced to Research Manager role, drawing on over 10 years of analytical expertise.'), '1.down_to_earth.1': OptimizedText(optimized_text='Results-driven Research Manager with a proven track record of client engagement. Led teams and implemented strategies to deliver insights and drive improvements.'), '1.down_to_earth.2': OptimizedText(optimized_text='Managed analytical initiatives and partnered with clients to deliver impactful solutions. Promoted to Research Manager in 2007.'), '1.down_to_earth.3': OptimizedText(optimized_text='Promoted research professional with a decade of career advancement, culminating in a Research Manager role.'), '2.other.0': OptimizedText(optimized_text='Managed research initiatives and oversaw product development, driving continuous improvements based on client feedback. Transitioned from Senior Research Analyst to Research Manager.')}), '4.responsibilities.0': ResponsibilityMatch(optimized_by_requirements={'1.down_to_earth.0': OptimizedText(optimized_text='Drove strategic product decisions through extensive market research and analysis.'), '1.down_to_earth.1': OptimizedText(optimized_text='Researched market dynamics to drive strategic product development and strengthen client relationships.'), '1.down_to_earth.2': OptimizedText(optimized_text='Conducted market research and data analysis to inform strategic product decisions and improve client engagements.'), '1.down_to_earth.3': OptimizedText(optimized_text='Leveraged market research insights to develop and execute effective product strategies.'), '2.other.0': OptimizedText(optimized_text='Guided product strategy and development efforts by leveraging market research insights.')})}"
print(responsibilities)    


# View Cross Tabs

## Full Code Sample

In [None]:
# * Full code that works (Grok + Meta)

import xlwings as xw
import pandas as pd

file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\openai_processed\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_crosstab.xlsx"
sht_name = "Crosstab"

# Connect to the workbook
wb = xw.Book(file_path)
sheet = wb.sheets[sht_name]

# Define the range of the table
table_range = sheet.range("A1").expand("table")  # Adjust the range as needed

# Initialize an empty list to store the underlined cells
underlined_cells = []

# Iterate over each row in the table range
for i in range(1, table_range.rows.count + 1):
    underlined_row = []
    for j in range(1, table_range.columns.count + 1):
        cell = table_range.api.Cells(i, j)
        if j <= 2:  # * Keep the first 2 column as is
            underlined_row.append(cell.Value)
        elif cell.Font.Underline != -4142:  # -4142 represents no underline
            underlined_row.append(cell.Value)
        else:
            underlined_row.append(None)  # or "empty"
    underlined_cells.append(underlined_row)


# Convert the list to a DataFrame and set the first column as the index
df = pd.DataFrame(underlined_cells).set_index(0)

# Delete all the None cells
max_length = df.apply(lambda row: row.dropna().shape[0], axis=1).max()
new_df = pd.DataFrame(index=df.index)

# Delete all the None cells

"""
What It Does to a Row:
Starts with a row that might have missing values (like [1, NaN, 3]).
"Squeezes" it by throwing out the NaNs, leaving only the real values (like [1, 3]).
Picks the i-th value from this squeezed list (e.g., if i = 0, it picks 1; if i = 1, it picks 3).
If i is too big (e.g., i = 2 but there are only 2 values), it returns None (or np.nan).
"""


def get_ith_non_nan(row, i):
    non_nan = row.dropna()
    return non_nan.iloc[i] if i < len(non_nan) else np.nan


for i in range(max_length):
    new_df[i] = df.apply(get_ith_non_nan, args=(i,), axis=1)


# Delete the first 3 rows (by slicing the DataFrame)
new_df = new_df.iloc[2:]

# Rename index and columns separately
new_df.index.name = "responsibility_key"  # Rename the index
new_df.columns = [
    "original_responsibility",
    "edited_responsibility_1",
    "edited_responsibility_2",
]  # Rename the 3 columns

# Print the resulting DataFrame
pd.set_option("display.max_columns", None)
display(new_df.head(5))

Unnamed: 0_level_0,original_responsibility,edited_responsibility_1,edited_responsibility_2
responsibility_key,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0.responsibilities.0,Conversational AI & NLP: Engineered the core d...,"Engineered and executed AI-first strategies, f...",
0.responsibilities.1,Thought Generation & Processing: Designed pipe...,Thought Generation & Processing: Developed adv...,
0.responsibilities.2,Automated Evaluation & Adaptation: Developed a...,Developed and deployed an AI tool to monitor a...,Developed an AI-based system to assess user in...
0.responsibilities.3,State & Topic Management: Built a stateful tra...,Built a stateful AI system to optimize convers...,Developed and executed a stateful tracking sys...
0.responsibilities.4,Asynchronous AI Integration: Optimized API per...,Asynchronous AI Integration: Led enhancements ...,


## Functions

In [8]:
import xlwings as xw
import pandas as pd
import numpy as np
from docx import Document


def load_excel_sheet(file_path, sheet_name):
    """Load an Excel sheet using xlwings."""
    wb = xw.Book(file_path)
    sheet = wb.sheets[sheet_name]
    return wb, sheet


def get_underlined_cells(sheet):
    """Extract values from underlined cells while keeping the first two columns as is."""
    table_range = sheet.range("A1").expand("table")
    underlined_cells = []

    for i in range(1, table_range.rows.count + 1):
        underlined_row = []
        for j in range(1, table_range.columns.count + 1):
            cell = table_range.api.Cells(i, j)
            if j <= 2:  # Keep the first 2 columns as is
                underlined_row.append(cell.Value)
            elif cell.Font.Underline != -4142:  # -4142 represents no underline
                underlined_row.append(cell.Value)
            else:
                underlined_row.append(None)  # or "empty"
        underlined_cells.append(underlined_row)

    return pd.DataFrame(underlined_cells).set_index(0)


def clean_and_remove_rows(df):
    """Remove None values and reorganize the dataframe."""
    max_length = df.apply(lambda row: row.dropna().shape[0], axis=1).max()
    new_df = pd.DataFrame(index=df.index)

    for i in range(max_length):
        new_df[i] = df.apply(lambda row: get_ith_non_nan(row, i), axis=1)

    return new_df.iloc[2:]  # Remove the first two rows


def get_ith_non_nan(row, i):
    """Retrieve the ith non-NaN value from a row."""
    non_nan = row.dropna()
    return non_nan.iloc[i] if i < len(non_nan) else np.nan


def rename_dataframe_columns(df):
    """Rename index and columns dynamically based on the number of columns."""
    df.index.name = "responsibility_key"
    column_names = ["original_responsibility"] + [
        f"edited_responsibility_{i}" for i in range(1, df.shape[1])
    ]
    df.columns = column_names[: df.shape[1]]
    return df


def json_to_docx(json_data: dict, output_file: Path | str):
    doc = Document()

    if isinstance(json_data, str):
        data = json.loads(json_data)
    else:
        data = json_data

    for main_key, sub_dict in data.items():
        for key, value in sub_dict.items():
            doc.add_paragraph(f"{key}:")
            doc.add_paragraph(value)
            doc.add_paragraph("")  # Add blank line

    if isinstance(output_file, Path):
        doc.save(str(output_file))
    else:
        doc.save(output_file)


# def main():
#     file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\openai_processed\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_crosstab.xlsx"
#     sheet_name = "Crosstab"

#     wb, sheet = load_excel_sheet(file_path, sheet_name)

#     try:
#         df = get_underlined_cells(sheet)
#         cleaned_df = clean_and_remove_rows(df)
#         final_df = rename_dataframe_columns(cleaned_df)

#         pd.set_option("display.max_columns", None)
#         display(final_df.head(5))

#     finally:
#         wb.close()


# if __name__ == "__main__":
#     main()

## Check Different Resume Files Per Posting

### Accenture Job Posting

#### Dataframe format

In [None]:
file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\openai_processed\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_crosstab.xlsx"
sheet_name = "Crosstab"


wb, sheet = load_excel_sheet(file_path, sheet_name)
df = get_underlined_cells(sheet)
cleaned_df = clean_and_remove_rows(df)
final_df = rename_dataframe_columns(cleaned_df)


pd.set_option("display.max_columns", None)

display(final_df)

wb.close()

Unnamed: 0_level_0,original_responsibility,edited_responsibility_1
responsibility_key,Unnamed: 1_level_1,Unnamed: 2_level_1
0.responsibilities.0,Conversational AI & NLP: Engineered the core d...,"Engineered and executed AI-first strategies, f..."
0.responsibilities.1,Thought Generation & Processing: Designed pipe...,Thought Generation & Processing: Developed adv...
0.responsibilities.2,Automated Evaluation & Adaptation: Developed a...,Developed an AI-based system to assess user in...
0.responsibilities.3,State & Topic Management: Built a stateful tra...,Built a stateful AI system to optimize convers...
0.responsibilities.4,Asynchronous AI Integration: Optimized API per...,Asynchronous AI Integration: Led enhancements ...
1.responsibilities.0,Provided strategic insights to a major global ...,Optimized the service partner ecosystem for a ...
1.responsibilities.1,Assisted a U.S.-based international services p...,Assisted a U.S.-based international services p...
1.responsibilities.2,Co-authored an industry-recognized report on M...,Co-authored a seminal report on M&A in the eng...
1.responsibilities.3,Enhanced data quality and consistency by integ...,Enhanced data quality and consistency through ...
1.responsibilities.4,Achieved over 40% centralization of tasks to a...,Optimized resource allocation and enhanced tea...


#### JSON format: Original

In [None]:
file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\openai_processed\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_crosstab.xlsx"
sheet_name = "Crosstab"


wb, sheet = load_excel_sheet(file_path, sheet_name)
df = get_underlined_cells(sheet)
cleaned_df = clean_and_remove_rows(df)
final_df = rename_dataframe_columns(cleaned_df)

final_df.drop("edited_responsibility_1", axis=1, inplace=True)

# print(final_df)
json_string = final_df.to_json()
json_data = json.loads(json_string)  # Convert string to dictionary

print("JSON String:")

# Format JSON for readability
formatted_json = format_json_readable(json_data, wrap_width=120)

# Display with Markdown to prevent horizontal scrolling
display(Markdown(f"```json\n{formatted_json}\n```"))

wb.close()

JSON String:


```json
{
  "original_responsibility": {
    "0.responsibilities.0": "Conversational AI & NLP: Engineered the core dialogue system to generate dynamic questions,
evaluate responses, and maintain conversation flow.",
    "0.responsibilities.1": "Thought Generation & Processing: Designed pipelines for hierarchical idea expansion,
clustering, and ranking AI-generated thoughts.",
    "0.responsibilities.2": "Automated Evaluation & Adaptation: Developed an AI-based evaluator to assess user
responses, generate adaptive follow-ups, and track discussion depth.",
    "0.responsibilities.3": "State & Topic Management: Built a stateful tracking system to guide conversations, handle
topic transitions, and prevent redundancy.",
    "0.responsibilities.4": "Asynchronous AI Integration: Optimized API performance for OpenAI GPT-4 & Anthropic Claude
using AsyncIO, improving efficiency and scalability.",
    "1.responsibilities.0": "Provided strategic insights to a major global IT vendor, optimizing their service partner
ecosystem in Asia Pacific for improved local implementation outcomes.",
    "1.responsibilities.1": "Assisted a U.S.-based international services provider in its growth strategy by precisely
evaluating and scaling new engineering service opportunities in vital emerging markets.",
    "1.responsibilities.2": "Co-authored an industry-recognized report on M&A in the engineering services sector,
offering deep dives into deal sizes, capability gaps, and emerging opportunities, influencing strategic decisions in IT
and operational technology convergence.",
    "1.responsibilities.3": "Enhanced data quality and consistency by integrating thorough financial analysis,
standardizing methodologies, and conducting in-depth vendor engagements.",
    "1.responsibilities.4": "Achieved over 40% centralization of tasks to an offshore team in India, optimizing resource
allocation.",
    "1.responsibilities.5": "Developed Python tools to automate and accelerate internal processes, cutting report
preparation and data analysis time by over 40%.",
    "1.responsibilities.6": "Collaborated with the engineering services research team to pioneer the engineering
services tracker, authored influential publications on market forecasts, the impact of COVID-19 on services, and trends
in M&A within the engineering services industry.",
    "1.responsibilities.7": "Collaborated with analyst teams across the globe (US, Canada, Latin America, Europe, MEA,
APAC) to ensure data quality, meeting deadlines, sharing knowledge/best practices/methodology, and procure new tools.",
    "2.responsibilities.0": "Full P&L management including budgeting, HR, vendors, partnerships, research, and business
development: expanded the programs' bookings by more than 50%.",
    "2.responsibilities.1": "Expanded and managed a global research team of more than 15 (US, India, and Mexico).",
    "2.responsibilities.2": "Led an external software development team to build and implement new tools.",
    "2.responsibilities.3": "Championed new technology projects using ML, NLP, chatbot, ontology, web-scraping, API, UX
(User Experience). First in IDC to implement ML (machine learning). First to launch a chatbot. First to implement API
(application programming interface). First to build an ontology.",
    "2.responsibilities.4": "Led a team to develop automated tools in Python, reducing report preparation time by 40%.",
    "2.responsibilities.5": "Advised services firms on deal pursuit and sales orchestration strategies.",
    "2.responsibilities.6": "Advised software vendors on services partnership strategy.",
    "2.responsibilities.7": "Authored reports, blogs, presentations, & custom researches in go-to-market strategy, deal
signing analysis, renewal analysis, buyer studies, technology adoptions (cloud, AI, ML, digital, etc.), and industry
trend analysis.",
    "3.responsibilities.0": "Delivered quarterly webinars on outsourcing/managed services signing trends.",
    "3.responsibilities.1": "Authored 7 to 10 pursuit strategy reports and industry trend research documents.",
    "4.responsibilities.0": "Designed and architected a complex company database with 100K+ unique records and
integrating DnB API and internal databases, which reduced data duplication by 50%.",
    "4.responsibilities.1": "Oversaw two Appian implementations that increased team productivity by 20 to 30%.",
    "4.responsibilities.2": "Managed daily content operations, including leading a team of offshore and nearshore
content team, as well as other sales and research related activities.",
    "4.responsibilities.3": "Analyzed and Modeled 20 to 30 IT vendors' financials and over 1,500 services contracts.",
    "4.responsibilities.4": "Managed three major data integration projects critical to the launch of the new platform.",
    "4.responsibilities.5": "Promoted from Senior Research Analyst to Research Manager in February 2007.",
    "5.responsibilities.0": "Researched market dynamics in the Web Services Security market to support product
strategy."
  }
}
```

#### JSON format: edited by LLM

In [4]:
file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\openai_processed\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_crosstab.xlsx"
sheet_name = "Crosstab"

wb, sheet = load_excel_sheet(file_path, sheet_name)
df = get_underlined_cells(sheet)
cleaned_df = clean_and_remove_rows(df)
final_df = rename_dataframe_columns(cleaned_df)

final_df.drop("original_responsibility", axis=1, inplace=True)
# print(final_df)

json_string = final_df.to_json()
json_data = json.loads(json_string)  # Convert string to dictionary

print("JSON String:")
# Format JSON for readability
formatted_json = format_json_readable(json_data, wrap_width=120)

# Display with Markdown to prevent horizontal scrolling
display(Markdown(f"```json\n{formatted_json}\n```"))

wb.close()

JSON String:


```json
{
  "edited_responsibility_1": {
    "0.responsibilities.0": "Engineered and executed AI-first strategies, focusing on the development and engineering of
conversational AI systems to enhance user engagement and generate commercialization opportunities.",
    "0.responsibilities.1": "Thought Generation & Processing: Developed advanced algorithms for structuring and
prioritizing AI-generated ideas, demonstrating expertise in environments akin to consulting, professional services, or
Big Four firms.",
    "0.responsibilities.2": "Developed an AI-based system to assess user interactions and adaptively generate follow-up
content, ensuring alignment with responsible AI practices as outlined in the strategic vision. This system also tracks
engagement levels to support an innovative operating model, fostering a culture of innovation and experimentation.",
    "0.responsibilities.3": "Built a stateful AI system to optimize conversation flows and strategically manage
dialogues, supporting the creation of strategies for AI-first products and exploring commercialization opportunities.",
    "0.responsibilities.4": "Asynchronous AI Integration: Led enhancements in API performance for leading AI models such
as OpenAI GPT-4 and Anthropic Claude, focusing on boosting efficiency and scalability. This strategic initiative was
crucial in reshaping the vision and opening pathways for data and AI-driven business transformation.",
    "1.responsibilities.0": "Optimized the service partner ecosystem for a leading global IT vendor in the Asia Pacific
region by providing strategic insights, including Data & AI, that enhanced client relationships and established
credibility as a trusted advisor, leading to improved implementation outcomes.",
    "1.responsibilities.1": "Assisted a U.S.-based international services provider by shaping strategic visions and
creating scalable opportunities, particularly in new data and AI-driven engineering services in key emerging markets.",
    "1.responsibilities.2": "Co-authored a seminal report on M&A in the engineering services sector, analyzing deal
dimensions and capability gaps. This work influenced strategic visions and fostered new opportunities, particularly by
driving convergence between IT and operational technology to reinvent businesses through data and AI.",
    "1.responsibilities.3": "Enhanced data quality and consistency through comprehensive financial analysis and the
standardization of methodologies, collaborating extensively with vendors to shape vision and unlock opportunities for
data & AI-led business reinvention.",
    "1.responsibilities.4": "Optimized resource allocation and enhanced team performance by centralizing over 40% of
tasks to an offshore team in India, effectively mentoring and developing high-performing teams.",
    "1.responsibilities.5": null,
    "1.responsibilities.6": "Led the engineering services research team in pioneering the engineering services tracker,
authored key publications on market forecasts, the impact of COVID-19 on the industry, and M&A trends, significantly
shaping strategic insights and creating opportunities for data & AI-led business reinvention.",
    "1.responsibilities.7": "Collaborated with analyst teams around the world, including the US, Canada, Latin America,
Europe, MEA, and APAC, in a global professional services environment. Enhanced data quality, ensured timely project
completions, and integrated new tools and best practices typical of Big Four and consulting firm settings.",
    "2.responsibilities.0": "Managed full P&L responsibilities encompassing budgeting, human resources, vendor
relations, and strategic partnerships, coupled with a focus on research and business development, which collectively led
to a substantial increase in program bookings.",
    "2.responsibilities.1": "Expanded and managed a multinational research team across the US, India, and Mexico,
focusing on spearheading the strategic development of AI-first products and their commercialization strategies.",
    "2.responsibilities.2": "Led and mentored high-performing external software development teams to innovate and
successfully implement new tools.",
    "2.responsibilities.3": "Championed AI-first product strategies, pioneering and commercializing cutting-edge
technologies including machine learning, natural language processing, chatbots, ontologies, and APIs. Spearheaded the
initial rollout of user-centric AI solutions that enhanced web interfaces and user experiences, opening significant
avenues for commercialization.",
    "2.responsibilities.4": "Led a team in developing Python-based automation tools, reducing report preparation time by
40%. Boasts extensive experience in business development, client relationship management, and marketing, with over five
years dedicated to these areas.",
    "2.responsibilities.5": "Advised service firms on integrating Responsible AI into their strategic vision and
roadmap, developed approaches for leveraging ecosystem partners in deal pursuits, and defined sales orchestration
processes to enhance a culture of innovation and experimentation.",
    "2.responsibilities.6": "Advised on strategic plans for AI-first software products, focusing on developing
partnership strategies and exploiting commercialization opportunities.",
    "2.responsibilities.7": "Authored detailed reports, blogs, presentations, and specialized research in go-to-market
strategies, deal signing, renewal processes, and buyer behavior, focusing on the in-depth analysis of technology
adoption trends such as cloud computing, artificial intelligence, machine learning, and digital transformations. This
expertise has played a crucial role in strengthening client relationships and establishing a reputation as a trusted
advisor, especially in guiding the integration of Data & AI into business processes and industry trends.",
    "3.responsibilities.0": "Delivered quarterly webinars on outsourcing and managed services, emphasizing strategic
planning and the utilization of partnerships to promote a culture of innovation and experimentation, in alignment with
Responsible AI and ecosystem collaboration trends.",
    "3.responsibilities.1": "Authored multiple strategy reports and conducted industry trend analyses, significantly
enhancing client relationships and establishing credibility as a trusted advisor on incorporating Data & AI into
business processes.",
    "4.responsibilities.0": "Designed and architected a sophisticated company database encompassing over 100,000 unique
records, integrating external APIs like Salesforce and internal systems, effectively reducing data duplication by 50%
and enhancing data integrity, demonstrating adeptness in CRM tools for efficient tracking and analysis of client
interactions.",
    "4.responsibilities.1": "Oversaw multiple Appian implementations at a Big Four firm, enhancing team productivity
significantly and demonstrating extensive consulting experience.",
    "4.responsibilities.2": "Managed daily content operations and led a diverse team, significantly enhancing
performance in sales and research, and demonstrating a strong ability to develop high-performing teams.",
    "4.responsibilities.3": "Analyzed and modeled the financials of numerous IT vendors, managed extensive portfolios of
service contracts, and leveraged this expertise to build strong client relationships, serving as a trusted advisor on
integrating Data & AI into business processes.",
    "4.responsibilities.4": "Led several major data integration projects, essential for the successful launch of a new
platform, and successfully mentored and developed high-performing teams.",
    "4.responsibilities.5": null,
    "5.responsibilities.0": "Explored market trends in the Web Services Security sector to bolster product strategy
through collaborative leadership and a growth-oriented approach."
  }
}
```

#### Final trim & edit by Grok

In [5]:
file_path = r"C:\github\job_bot\data\Accenture_Enterprise_AI_Value_Strategy_Senior_Manager_grok_final_edit.json"

display_json_pretty(file_path)

```json
{
  "revised_responsibility_1": {
    "0.responsibilities.0": "Engineered conversational AI systems to boost engagement and drive commercialization
opportunities.",
    "0.responsibilities.1": "Developed advanced algorithms to structure and prioritize AI-generated ideas for strategic
applications.",
    "0.responsibilities.2": "Built AI system to evaluate user interactions, adapt content, and measure engagement
depth.",
    "0.responsibilities.3": "Created stateful AI framework to optimize conversation flows and guide strategic
dialogues.",
    "0.responsibilities.4": "Enhanced API performance for OpenAI GPT-4 and Anthropic Claude with AsyncIO for
scalability.",
    "1.responsibilities.0": "Optimized IT vendor’s Asia Pacific ecosystem with data and AI-driven strategic insights.",
    "1.responsibilities.1": "Shaped scalable data and AI engineering service strategies for a U.S. provider in emerging
markets.",
    "1.responsibilities.2": "Co-authored M&A report on engineering services, driving IT-OT convergence strategies.",
    "1.responsibilities.3": "Improved data quality with financial analysis and vendor collaboration for AI-led
outcomes.",
    "1.responsibilities.4": "Centralized 40% of tasks to offshore India team, enhancing resource efficiency.",
    "1.responsibilities.5": null,
    "1.responsibilities.6": "Led engineering services tracker, authored influential market forecasts and M&A trend
analyses.",
    "1.responsibilities.7": "Collaborated globally to ensure data quality, meet deadlines, and deploy innovative
tools.",
    "2.responsibilities.0": "Managed P&L across budgeting and partnerships, growing bookings by over 50%.",
    "2.responsibilities.1": "Expanded and led global research team in US, India, and Mexico for AI-first initiatives.",
    "2.responsibilities.2": "Directed external software team to develop and deploy innovative tools.",
    "2.responsibilities.3": "Pioneered ML, NLP, and chatbot technologies, launching AI-first solutions.",
    "2.responsibilities.4": "Led Python tool development, cutting report preparation time by 40%.",
    "2.responsibilities.5": "Advised firms on Responsible AI integration and sales orchestration strategies.",
    "2.responsibilities.6": "Guided software vendors on AI-first partnership and commercialization strategies.",
    "2.responsibilities.7": "Authored go-to-market reports on AI, cloud, and digital trends for strategic impact.",
    "3.responsibilities.0": "Delivered webinars on outsourcing trends, emphasizing innovation and partnerships.",
    "3.responsibilities.1": "Authored strategy reports and trend analyses to shape data and AI insights.",
    "4.responsibilities.0": "Architected database with 100K+ records, cutting duplication by 50% via API integration.",
    "4.responsibilities.1": "Oversaw Appian implementations, increasing team productivity by 20-30%.",
    "4.responsibilities.2": "Managed content operations, leading offshore teams for sales and research success.",
    "4.responsibilities.3": "Analyzed financials of IT vendors and 1,500+ contracts for strategic insights.",
    "4.responsibilities.4": "Led critical data integration projects for new platform deployment.",
    "4.responsibilities.5": null,
    "5.responsibilities.0": "Researched Web Services Security trends to inform product strategy."
  }
}
```

### Blend Job Posting

#### Dataframe format

In [10]:
company = "Blend"

file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\reviewed_matchings\Blend_Director__AI_Strategy_crosstab_reviewed.xlsx"
sheet_name = "Crosstab"

wb, sheet = load_excel_sheet(file_path, sheet_name)
df = get_underlined_cells(sheet)


cleaned_df = clean_and_remove_rows(df)
final_df = rename_dataframe_columns(cleaned_df)

print(company)

pd.set_option("display.max_columns", None)
display(final_df)

wb.close()

Blend


Unnamed: 0_level_0,original_responsibility,edited_responsibility_1
responsibility_key,Unnamed: 1_level_1,Unnamed: 2_level_1
0.responsibilities.0,Conversational AI & NLP: Engineered the core d...,Engineered the core dialogue system to generat...
0.responsibilities.1,Thought Generation & Processing: Designed pipe...,Led the design and deployment of AI-powered so...
0.responsibilities.2,Automated Evaluation & Adaptation: Developed a...,Led the development of AI-powered systems that...
0.responsibilities.3,State & Topic Management: Built a stateful tra...,Led the design and implementation of a statefu...
0.responsibilities.4,Asynchronous AI Integration: Optimized API per...,Led the optimization of high-performance AI sy...
1.responsibilities.0,Provided strategic insights to a major global ...,Led strategic initiatives to optimize the serv...
1.responsibilities.1,Assisted a U.S.-based international services p...,
1.responsibilities.2,Co-authored an industry-recognized report on M...,
1.responsibilities.3,Enhanced data quality and consistency by integ...,Led comprehensive financial analysis and proce...
1.responsibilities.4,Achieved over 40% centralization of tasks to a...,"Led centralized task management initiatives, a..."


#### JSON format: Original

In [27]:
file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\reviewed_matchings\Blend_Director__AI_Strategy_crosstab_reviewed.xlsx"
sheet_name = "Crosstab"

wb, sheet = load_excel_sheet(file_path, sheet_name)
df = get_underlined_cells(sheet)
cleaned_df = clean_and_remove_rows(df)
final_df = rename_dataframe_columns(cleaned_df)

final_df.drop("edited_responsibility_1", axis=1, inplace=True)
# print(final_df)

json_string = final_df.to_json()
json_data = json.loads(json_string)  # Convert string to dictionary

print("JSON: original")
display_json_pretty(json_data)

wb.close()

JSON: original


```json
{
  "original_responsibility": {
    "0.responsibilities.0": "Conversational AI & NLP: Engineered the core dialogue system to generate dynamic questions,
evaluate responses, and maintain conversation flow.",
    "0.responsibilities.1": "Thought Generation & Processing: Designed pipelines for hierarchical idea expansion,
clustering, and ranking AI-generated thoughts.",
    "0.responsibilities.2": "Automated Evaluation & Adaptation: Developed an AI-based evaluator to assess user
responses, generate adaptive follow-ups, and track discussion depth.",
    "0.responsibilities.3": "State & Topic Management: Built a stateful tracking system to guide conversations, handle
topic transitions, and prevent redundancy.",
    "0.responsibilities.4": "Asynchronous AI Integration: Optimized API performance for OpenAI GPT-4 & Anthropic Claude
using AsyncIO, improving efficiency and scalability.",
    "1.responsibilities.0": "Provided strategic insights to a major global IT vendor, optimizing their service partner
ecosystem in Asia Pacific for improved local implementation outcomes.",
    "1.responsibilities.1": "Assisted a U.S.-based international services provider in its growth strategy by precisely
evaluating and scaling new engineering service opportunities in vital emerging markets.",
    "1.responsibilities.2": "Co-authored an industry-recognized report on M&A in the engineering services sector,
offering deep dives into deal sizes, capability gaps, and emerging opportunities, influencing strategic decisions in IT
and operational technology convergence.",
    "1.responsibilities.3": "Enhanced data quality and consistency by integrating thorough financial analysis,
standardizing methodologies, and conducting in-depth vendor engagements.",
    "1.responsibilities.4": "Achieved over 40% centralization of tasks to an offshore team in India, optimizing resource
allocation.",
    "1.responsibilities.5": "Developed Python tools to automate and accelerate internal processes, cutting report
preparation and data analysis time by over 40%.",
    "1.responsibilities.6": "Collaborated with the engineering services research team to pioneer the engineering
services tracker, authored influential publications on market forecasts, the impact of COVID-19 on services, and trends
in M&A within the engineering services industry.",
    "1.responsibilities.7": "Collaborated with analyst teams across the globe (US, Canada, Latin America, Europe, MEA,
APAC) to ensure data quality, meeting deadlines, sharing knowledge/best practices/methodology, and procure new tools.",
    "2.responsibilities.0": "Full P&L management including budgeting, HR, vendors, partnerships, research, and business
development: expanded the programs' bookings by more than 50%.",
    "2.responsibilities.1": "Expanded and managed a global research team of more than 15 (US, India, and Mexico).",
    "2.responsibilities.2": "Led an external software development team to build and implement new tools.",
    "2.responsibilities.3": "Championed new technology projects using ML, NLP, chatbot, ontology, web-scraping, API, UX
(User Experience). First in IDC to implement ML (machine learning). First to launch a chatbot. First to implement API
(application programming interface). First to build an ontology.",
    "2.responsibilities.4": "Led a team to develop automated tools in Python, reducing report preparation time by 40%.",
    "2.responsibilities.5": "Advised services firms on deal pursuit and sales orchestration strategies.",
    "2.responsibilities.6": "Advised software vendors on services partnership strategy.",
    "2.responsibilities.7": "Authored reports, blogs, presentations, & custom researches in go-to-market strategy, deal
signing analysis, renewal analysis, buyer studies, technology adoptions (cloud, AI, ML, digital, etc.), and industry
trend analysis.",
    "3.responsibilities.0": "Delivered quarterly webinars on outsourcing/managed services signing trends.",
    "3.responsibilities.1": "Authored 7 to 10 pursuit strategy reports and industry trend research documents.",
    "4.responsibilities.0": "Designed and architected a complex company database with 100K+ unique records and
integrating DnB API and internal databases, which reduced data duplication by 50%.",
    "4.responsibilities.1": "Oversaw two Appian implementations that increased team productivity by 20 to 30%.",
    "4.responsibilities.2": "Managed daily content operations, including leading a team of offshore and nearshore
content team, as well as other sales and research related activities.",
    "4.responsibilities.3": "Analyzed and Modeled 20 to 30 IT vendors' financials and over 1,500 services contracts.",
    "4.responsibilities.4": "Managed three major data integration projects critical to the launch of the new platform.",
    "4.responsibilities.5": "Promoted from Senior Research Analyst to Research Manager in February 2007.",
    "5.responsibilities.0": "Researched market dynamics in the Web Services Security market to support product
strategy."
  }
}
```

#### JSON format: edited by LLM

In [32]:
file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching\reviewed_matchings\Blend_Director__AI_Strategy_crosstab_reviewed.xlsx"
sheet_name = "Crosstab"

wb, sheet = load_excel_sheet(file_path, sheet_name)

df = get_underlined_cells(sheet)
cleaned_df = clean_and_remove_rows(df)

final_df = rename_dataframe_columns(cleaned_df)
final_df.drop("original_responsibility", axis=1, inplace=True)


# print(final_df)
json_string = final_df.to_json()
# json_data = json.loads(json_string)  # Convert string to dictionary

print("JSON String:")

# # Format JSON for readability
# formatted_json = format_json_readable(json_data, wrap_width=120)

# # Display with Markdown to prevent horizontal scrolling
# display(Markdown(f"```json\n{formatted_json}\n```"))


display_json_pretty(json_string)
wb.close()

JSON String:


```json
{
  "edited_responsibility_1": {
    "0.responsibilities.0": "Engineered the core dialogue system to generate dynamic questions, evaluate responses, and
maintain conversation flow, driving substantial business value and revenue growth through strategic AI initiatives.",
    "0.responsibilities.1": "Led the design and deployment of AI-powered solutions to drive ideation, thought
generation, and business innovation. Developed hierarchical idea expansion, clustering, and ranking pipelines to unlock
new insights and opportunities.",
    "0.responsibilities.2": "Led the development of AI-powered systems that analyze user feedback, generate dynamic
responses, and monitor conversation depth. Collaborative leadership approach focused on continuous improvement and team
development.",
    "0.responsibilities.3": "Led the design and implementation of a stateful tracking system to facilitate seamless
topic management and enhance conversational efficiency. Leveraged analytical and problem-solving skills to guide
discussions, enable smooth transitions, and ensure overall conversational effectiveness.",
    "0.responsibilities.4": "Led the optimization of high-performance AI systems, leveraging cutting-edge technologies
like AsyncIO to enhance the scalability and efficiency of large language models such as OpenAI GPT-4 and Anthropic
Claude. Brings a collaborative leadership approach and a growth-oriented mindset to drive continuous improvement and
innovation.",
    "1.responsibilities.0": "Led strategic initiatives to optimize the service partner ecosystem of a leading global IT
vendor in the Asia Pacific region, driving improved local implementation outcomes and demonstrating strong analytical
and problem-solving capabilities.",
    "1.responsibilities.1": null,
    "1.responsibilities.2": null,
    "1.responsibilities.3": "Led comprehensive financial analysis and process standardization initiatives, driving data
integrity and uniformity through extensive vendor collaborations. Demonstrated a collaborative leadership style focused
on continuous improvement and growth.",
    "1.responsibilities.4": "Led centralized task management initiatives, achieving over 40% offshoring to leverage
global talent and drive operational efficiency. Cultivated a collaborative, growth-oriented team environment.",
    "1.responsibilities.5": null,
    "1.responsibilities.6": "Pioneered the engineering services tracker as part of the research team. Authored
influential publications on market forecasts, the impact of COVID-19 on services, and trends in M&A within the
engineering services industry. Demonstrated strong communication and collaboration skills, adept at working effectively
with cross-functional teams.",
    "1.responsibilities.7": "Led global analyst teams to ensure data integrity, meet critical deadlines, share
expertise, and procure innovative tools. Drove successful implementation of AI solutions in a business environment.",
    "2.responsibilities.0": "Seasoned professional who led full P&L management, driving significant growth by expanding
program bookings over 50%. Demonstrated expertise in budgeting, HR, vendor relations, partnerships, research, and
business development. Possesses a deep understanding of AI technologies and their business applications.",
    "2.responsibilities.1": "Led and grew a multicultural research team with a collaborative, growth-focused approach.",
    "2.responsibilities.2": "Led external software development teams to conceptualize and implement cutting-edge
solutions, leveraging strong communication and collaboration abilities to drive cross-functional alignment.",
    "2.responsibilities.3": "Innovative technology leader who championed transformative solutions leveraging cutting-
edge tools like machine learning, natural language processing, chatbots, ontologies, web scraping, and APIs. Drove
substantial business value and revenue growth as the first in the organization to implement these advanced technologies,
including launching a chatbot and building an ontology.",
    "2.responsibilities.4": "Led a team in developing automated Python tools, driving a 40% reduction in report
preparation time. Demonstrated a collaborative approach focused on continuous improvement and a growth-oriented
mindset.",
    "2.responsibilities.5": "Advised services firms on deal pursuit and sales orchestration strategies, leveraging
excellent communication and interpersonal skills to collaborate effectively with cross-functional teams.",
    "2.responsibilities.6": "Leveraged strong communication and interpersonal skills to develop strategic software
partnerships by collaborating effectively across diverse teams.",
    "2.responsibilities.7": "Led the creation of impactful content, including reports, blogs, presentations, and custom
research, to drive strategic initiatives across areas such as go-to-market strategy, deal analysis, customer insights,
and technology trends. Demonstrated a collaborative leadership approach and a growth-oriented mindset.",
    "3.responsibilities.0": null,
    "3.responsibilities.1": "Authored strategic reports and industry analyses, demonstrating strong communication and
collaboration skills.",
    "4.responsibilities.0": "Designed and implemented a comprehensive enterprise-level database system, integrating
multiple internal and external data sources to enhance data integrity and reduce duplication by a significant margin.
Demonstrated strong communication and technical expertise in delivering this complex project, working effectively with
cross-functional teams.",
    "4.responsibilities.1": "Led successful Appian implementations that enhanced team productivity.",
    "4.responsibilities.2": "Led cross-functional content teams and oversaw daily content operations, including sales
and research-focused initiatives. Demonstrated proficiency in analytical problem-solving and data-driven decision-
making.",
    "4.responsibilities.3": "Analyzed financials of 20-30 IT service providers and managed a portfolio of over 1,500
contracts, leveraging advanced analytical and financial modeling skills to provide in-depth insights. Demonstrated a
collaborative leadership approach and a growth-oriented mindset in driving strategic decision-making.",
    "4.responsibilities.4": "Led the successful delivery of three mission-critical data integration projects that
enabled the launch of a new enterprise-level platform. Drove large-scale AI strategy and implementation initiatives at
the enterprise level.",
    "4.responsibilities.5": null,
    "5.responsibilities.0": null
  }
}
```

#### Final trim & edit by GPT

In [19]:
file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching_trimmed\Blend_Director__AI_Strategy_crosstab_trimmed.json"

data = load_and_decode_json(file_path)

word_count = 0


# Iterate through the values of the JSON object
def count_words(data):
    word_count = 0
    for v in data.values():
        if isinstance(v, dict):
            word_count += count_words(v)
        elif isinstance(v, str):
            word_count += len(v.split())
    return word_count


word_count = count_words(data)


print("Trimmed and Condensed by GPT:")
print(f"Word count: {word_count}")


# word_count = sum(len(v.split()) for v in data.values())


display_json_pretty(file_path)

Trimmed and Condensed by GPT:
Word count: 273


```json
{
  "edited_responsibility_1": {
    "0.responsibilities.0": "Developed AI-driven dialogue systems optimizing user engagement and revenue growth.",
    "0.responsibilities.1": "Designed AI solutions for ideation and business innovation, leveraging hierarchical
clustering and ranking algorithms.",
    "0.responsibilities.2": "Built AI-powered feedback analysis systems to enhance response generation and conversation
tracking.",
    "0.responsibilities.3": "Implemented stateful tracking for seamless topic management, improving discussion flow and
engagement.",
    "0.responsibilities.4": "Optimized large-scale AI systems using AsyncIO for enhanced scalability and efficiency.",
    "1.responsibilities.0": "Led strategic initiatives optimizing IT vendor ecosystems in APAC, improving implementation
outcomes.",
    "1.responsibilities.3": "Standardized financial analysis and vendor collaborations, improving data integrity and
decision-making.",
    "1.responsibilities.4": "Achieved 40% operational offshoring to enhance global talent utilization and efficiency.",
    "1.responsibilities.6": "Developed an engineering services tracker, publishing key reports on market trends and M&A
activity.",
    "1.responsibilities.7": "Led global analyst teams to maintain data integrity, meet deadlines, and integrate AI-
driven business insights.",
    "2.responsibilities.0": "Managed full P&L, driving 50% program growth and leading strategy in budgeting, HR, and
vendor relations.",
    "2.responsibilities.1": "Grew a multicultural research team, fostering a collaborative, high-performance
environment.",
    "2.responsibilities.2": "Directed external development teams to design and implement cutting-edge AI solutions.",
    "2.responsibilities.3": "Pioneered ML, NLP, chatbots, and ontologies, significantly advancing AI adoption within the
organization.",
    "2.responsibilities.4": "Led automation initiatives in Python, reducing report preparation time by 40%.",
    "2.responsibilities.5": "Advised services firms on strategic deal pursuit and sales orchestration.",
    "2.responsibilities.6": "Developed strategic software partnerships through cross-functional collaboration.",
    "2.responsibilities.7": "Created high-impact content including reports, blogs, and presentations to drive business
insights.",
    "3.responsibilities.1": "Authored strategic industry reports, delivering high-impact insights to executive
stakeholders.",
    "4.responsibilities.0": "Designed an enterprise-level database, integrating multiple data sources to enhance data
integrity.",
    "4.responsibilities.1": "Led Appian implementations improving team productivity.",
    "4.responsibilities.2": "Managed cross-functional content teams overseeing research and sales-focused content
strategies.",
    "4.responsibilities.3": "Analyzed financials of 20+ IT service providers, managing a portfolio of 1,500+
contracts.",
    "4.responsibilities.4": "Delivered three critical data integration projects, enabling enterprise-level AI adoption."
  }
}
```

In [15]:
file_path = r"C:\github\job_bot\input_output\human_review\resps_reqs_matching_trimmed\Blend_Director__AI_Strategy_crosstab_trimmed.json"

data = load_and_decode_json(file_path)

word_count = sum(len(v.split()) for v in data.values() if isinstance(v, str))

print("Trimmed and Condensed by GPT:")
print(f"Word count: {word_count}")

Trimmed and Condensed by GPT:
Word count: 0


# Random Stuff

In [16]:
import pandas as pd

f_path = r"C:\Users\xzhan\My Drive\Job Search\Job Search 2025.xlsx"

df = pd.read_excel(f_path)
for co, job in zip(df.Company, df["Job Title"]):
    text = "_".join([co, job])
    text = text.replace(" ", "_")
    text = text.replace(",", "")
    print(text)

MongoDB_Director_Competitive_Intelligence
Adobe_Sr._Director_Applied_AI/ML_(Discovery)
Flextronics_Sr._Manager_AI_Strategy
TRACE3_Senior_Consultant_AI_Strategy_(Remote)
PwC_Strategy&_Senior_Manager_-_Digital_Value_Transformation_Contact_Center
Glean_Head_of_Competitive_Intelligence
Airtable_Product_Manager_AI
Veeva_Director_-_Crossix_Analytics_Services
ThermoFisher_Scientific_Market_&_Competitive_Intelligence_Manager
DigitialOcean_Director_Product_Management_(AI/ML)
Figma_Researcher_Strategic_Growth
DEPT_Director_of_Applied_AI_Strategy_Media
Deloitte_Market_Research_Sr_Manager_Boston
Deloitte_AI_Data_Specialist_Boston
Deloitte_Global_Business_Services_(GBS)_Strategy_Manager_Boston
Amazon_Senior_Manger_Partner_Strategy_GenAI_Innovation_Center
Blend_Director_AI_Strategy
Snowflake_Director_Product_Marketing_-_Analytics
Advisor360_Degrees_Sr._Product_Manager_–_AI_Analytics_&Insights
Oracle_Senior_AI_Product_Marketing_Manager
Liberty_Mutual_Insurance_Senior_Manager_II_Corporate_Strategy_&_R

TypeError: sequence item 0: expected str instance, float found

In [26]:
# Cell 1: Set style
from IPython.display import display, HTML

display(
    HTML(
        """
<style>
.monaco-workbench .notebook-cell .output pre {
    font-size: 24px !important;
}
</style>
"""
    )
)

# Cell 2: Test
display("This text should be bigger")
display({"key": "value"})

'This text should be bigger'

{'key': 'value'}