In [1]:
import os

from dotenv import load_dotenv
load_dotenv()

True

In [2]:
from langchain_openai import AzureChatOpenAI
llm = AzureChatOpenAI(
    openai_api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    azure_deployment=os.getenv("AZURE_OPENAI_GPT4O_DEPLOYMENT_NAME"),
    temperature=0,
)

In [4]:
import base64

system_prompt = """
As a committed career mentor, your task is to thoroughly scrutinize student resumes and offer feedback in Markdown format. Here are the specific guidelines:

1. The resume **must** incorporate an email and mobile number for communication purposes. Issue a warning if these details are absent.
2. The profile section **must** include the student's name, course name, school, and GitHub URL. If any of these components are absent, issue a warning.
3. Students are expected to be enrolled in the **Higher Diploma in Cloud and Data Centre Administration** course in Hong Kong. Issue a warning if this information is absent or incorrect.
4. Stay alert for any nonsensical content (excluding Irrelevant/non-IT working Experience) or spelling errors. Issue a warning and highlight the mistakes if any are found.
5. The summary section should not contain any pronouns.
6. Ensure the use of tenses is consistent throughout the resume.
7. Based on the resume content, propose an appropriate job title for the student.
8. Assign a "Resume Rating" on a scale from 1 to 10, where 10 represents an exceptional resume.
9. If there are any warnings or missing information, the "Resume Rating" **must not** surpass 5.
10. Adopt the perspective of an IT interviewer and provide a rationale for the "Resume Rating", linking it to the probability of landing a job.
11. Based on the resume content, suggest the type of job the student is likely to secure, such as a Cloud Engineer, Data Centre Technician, or Network Engineer.

"""

# Function to encode an image file as a base64 string
def encode_image(image_path):
    with open(image_path, "rb") as image_file:
        return base64.b64encode(image_file.read()).decode("utf-8")

# Function to create messages for the AI model
def create_messages(base64_images):
    return [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": [
            {"type": "text", "text": "Describe the images as an alternative text, provide feedback, warning if any and ratiing on the resume."},
            *[
                {"type": "image_url", "image_url": {"url": f"data:image/png;base64,{img}"}}
                for img in base64_images
            ]
        ]}
    ]

In [5]:
import os
from collections import defaultdict

# Define the path to the "data" folder
data_folder = "data"

cv_images = []
# Traverse through each subfolder inside the "data" folder
for root, dirs, files in os.walk(data_folder):
    # Iterate over each file in the current subfolder
    for file in files:
        # Check if the file has a PNG extension
        if file.endswith(".png"):
            # Print the file path
            # print(os.path.join(root, file))
            cv_images.append(os.path.join(root, file))

# Group cv_images by folder
cv_images_by_folder = defaultdict(list)
for image_path in cv_images:
    folder = os.path.dirname(image_path)
    cv_images_by_folder[folder].append(image_path)       

In [6]:
from tqdm import tqdm
import os

# Sort the cv_images_by_folder dictionary by folder
sorted_cv_images_by_folder = dict(sorted(cv_images_by_folder.items(), key=lambda x: x[0]))

for folder, images in tqdm(sorted_cv_images_by_folder.items(), desc="Processing folders"):
    save_path = os.path.join(folder, 'chatgpt_result.md')
    if os.path.exists(save_path):
        continue             
    # print(f"Folder: {folder}")
    encode_images = [encode_image(image) for image in images]
    messages = create_messages(encode_images)
    ai_message = llm.invoke(messages)
    # print(ai_message.content)
    # Save ai_message.content to a file
    with open(save_path, 'w') as file:
        file.write(ai_message.content)



Processing folders:   0%|          | 0/82 [00:00<?, ?it/s]

Processing folders: 100%|██████████| 82/82 [21:44<00:00, 15.91s/it]
