# Azure OpenAI ChatGPT-4o CV Reviewer

In [7]:
from dotenv import load_dotenv
load_dotenv()

True

In [8]:
import os
from collections import defaultdict

# Define the path to the "data" folder
data_folder = "data"

cv_images = []
# Traverse through each subfolder inside the "data" folder
for root, dirs, files in os.walk(data_folder):
    # Iterate over each file in the current subfolder
    for file in files:
        # Check if the file has a PNG extension
        if file.endswith(".png"):
            # Print the file path
            # print(os.path.join(root, file))
            cv_images.append(os.path.join(root, file))

# Group cv_images by folder
cv_images_by_folder = defaultdict(list)
for image_path in cv_images:
    folder = os.path.dirname(image_path)
    cv_images_by_folder[folder].append(image_path)       

In [9]:
system_prompt = """
As a dedicated career guide, your responsibility is to meticulously examine student resumes and provide feedback in Markdown format. Here are the detailed instructions:

1. Identify and enumerate contact details, list actual value of the email address, mobile number, and LinkedIn Profile URL, in the initial section.
2. List out all URLs present in the resume.
3. List out all technologies mentioned.
4. List out all skills highlighted.
5. List out all certifications acquired.
6. List out all educational qualifications along with the duration.
7. List out all professional experiences along with the duration.
8. The resume **should** contain an email and phone number for communication. Issue an alert if these details are missing.
9. The profile section **should** contain the student's name, course name, institution, and GitHub URL. Issue an alert if any of these elements are missing.
10. Students are anticipated to be enrolled in the **Higher Diploma in Cloud and Data Centre Administration** course in Hong Kong. Issue an alert if this information is missing or incorrect.
11. Be vigilant for any illogical content (excluding irrelevant/non-IT work experience) or spelling mistakes. Issue an alert and underline the errors if any are detected.
12. The summary section should be devoid of any pronouns.
13. Ensure the consistency of tenses throughout the resume.
14. Propose a suitable job title for the student based on the resume content.
15. Assign a "Resume Rating" on a scale of 1 to 10, where 10 signifies an outstanding resume.
16. If there are any alerts or missing information, the "Resume Rating" **should not** exceed 5.
17. If the phone number or email address is missing, the "Resume Rating" **should** be 0.
18. Assume the role of an IT interviewer and justify the "Resume Rating", correlating it with the likelihood of securing a job.
19. Suggest the kind of job the student is likely to land, such as a Cloud Engineer, Data Centre Technician, or Network Engineer, based on the resume content.
"""

In [10]:
import base64

# Function to encode an image file as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to create messages for the AI model
def create_messages(base64_images):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": [
            {"type": "text", "text": "Describe the images as an alternative text, provide feedback, warning if any and ratiing on the resume."},
            *[
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img}"}}
                for img in base64_images
            ]
        ]}
    ]

In [13]:
from tqdm import tqdm
import os
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    openai_api_version=os.getenv("AZURE_OPENAI_GPT4O_API_VERSION"),
    azure_deployment=os.getenv("AZURE_OPENAI_GPT4O_DEPLOYMENT_NAME"),
    temperature=0,
)

# Sort the cv_images_by_folder dictionary by folder
sorted_cv_images_by_folder = dict(sorted(cv_images_by_folder.items(), key=lambda x: x[0]))

for folder, images in tqdm(sorted_cv_images_by_folder.items(), desc="Processing folders"):
    save_path = os.path.join(folder, 'chatgpt_result.md')
    if os.path.exists(save_path):
        continue                
    encode_images = [encode_image(image) for image in images]
    messages = create_messages(encode_images)
    ai_message = llm.invoke(messages)
    # print(ai_message.content)
    # Save ai_message.content to a file
    with open(save_path, 'w') as file:
        file.write(ai_message.content)

Processing folders:   0%|          | 0/82 [00:00<?, ?it/s]

Processing folders: 100%|██████████| 82/82 [00:15<00:00,  5.28it/s]
