In [None]:
# The following packages are needed for this project 
# --------------------------------------------------------------------------------------------------------------
# openai:       For using OpenAI’s language models to enable natural language processing 
#               and text generation.
# --------------------------------------------------------------------------------------------------------------
# pdfplumber:   For extracting text, tables, and data from PDF files for analysis and 
#               processing.
# --------------------------------------------------------------------------------------------------------------
# ipython:      Using Markdown from this package for visualizing and formatting text more 
#               easily, especially useful in creating structured and styled documents and 
#               web content.
# --------------------------------------------------------------------------------------------------------------
!pip install openai pdfplumber ipython


In [4]:
# Importing the previously presented packages
from openai import OpenAI
import pdfplumber
from IPython.display import display, Markdown, clear_output

In [5]:
# Reads and returns the text content of a PDF file using pdfplumber.
def read_pdf(file_path):
    document_text = ""
    with pdfplumber.open(file_path) as pdf:
        for page in pdf.pages:
            document_text += page.extract_text()
    return document_text

In [7]:
personal_openai_key = 'YOUR OPENAI KEY'

In [8]:
# Initializes the OpenAI client with the provided API key.
client = OpenAI(api_key = personal_openai_key)

# Replace with the path to your document
document_path = 'documents/CV_1.pdf'  

# Reads the text content of the specified PDF document.
document_text = read_pdf(document_path)

In [9]:
# Displays the loaded text without formatting. 
# If you want to see the text formatted, you can use the two lines below, commented
document_text

#print(document_text)
display(Markdown(document_text))

A S H L E Y S T O N E
Senior Software Engineer
CONTACT INFORMATION: PROFESSIONAL SUMMARY:
+1 (555) 123-4567 Highly skilled Senior Software Engineer with over 8 years of experience in full-stack
ashley.stone@example.com development. Proficient in various programming languages and frameworks, with a
linkedin.com/in/ashleystone strong background in system architecture design, database management, and team
leadership. Adept at collaborating with cross-functional teams to deliver high-quality
SKILLS: software solutions.
Programming Languages: Java, WORK EXPERIENCE:
JavaScript, Python, SQL
Web Development Frameworks: SENIOR SOFTWARE ENGINEER
AngularJS, Node.js, Spring Boot Tech Innovations Inc., San Francisco, CA
August 2018 - Present
Database Technologies: MySQL,
MongoDB, PostgreSQL Lead the development team in designing and implementing scalable and efficient
Cloud Platforms: AWS, Microsoft software solutions for complex business requirements.
Spearhead the migration of legacy systems to modern, cloud-based architectures,
Version Control: Git, SVN resulting in improved performance and reduced maintenance costs.
Agile Methodologies: Scrum, Collaborate with product managers and stakeholders to define project scopes,
Kanban Strong problem-solving prioritize tasks, and ensure timely delivery of features.
and analytical skills Mentor junior engineers, conducting code reviews, providing technical guidance,
Excellent communication and and fostering a culture of continuous learning and improvement.
leadership abilities Implemented CI/CD pipelines to automate the software deployment process,
increasing deployment frequency and reducing manual errors.
EDUCATION:
SOFTWARE ENGINEER
Master of Science in Computer CodeCrafters Co., Seattle, WA
Science June 2015 - July 2018
University of Technology,
Innovations, and Science, 2013 Developed web applications using AngularJS and Node.js, contributing to a 30%
increase in user engagement and customer satisfaction.
LANGUAGES: Designed and optimized SQL databases for efficient data storage and retrieval,
reducing query times by 40%.
English (Native proficiency) Implemented RESTful APIs for seamless integration between frontend and
Spanish (B2) backend systems, improving overall system performance and reliability.
Collaborated with cross-functional teams to gather requirements, plan project
REFERENCES: timelines, and deliver high-quality software solutions within budget constraints.
Conducted unit testing and debugging to ensure the stability and reliability of
Available upon request. software applications.

In [23]:
def ask_openai(document_text, question, openai_key):# Initialize the streaming option
  withStreaming = True

  # Create a chat completion request using the OpenAI client
  response = client.chat.completions.create(
    # Specify the model to use (uncomment the desired model)
    model = "gpt-4o-mini",
    # model = "gpt-3.5-turbo-0125",
    # model = "gpt-4-turbo",
    # model = "gpt-4o",
    messages = [
      {"role": "system", "content": "You are an expert in extracting informations from CV."},
      {"role": "user", "content": f"""Document: {document_text}\n\nQuestion: {question}. 
      Return the answer in form of Markdown for formatting"""}
    ],
    n = 1,  # Number of completions to generate
    stop = None,  # Sequence where the API will stop generating further tokens
    max_tokens = 2000, # Maximum number of tokens to be generated by the model
    temperature = 0.7,  # Sampling temperature
    stream = withStreaming  # Enable streaming mode
  )

  # Check if streaming mode is enabled
  if (withStreaming):
    collected_messages = []
    answer = ""
    # Initialize display handle for formatted output with Markdown
    display_handle = display(Markdown(""), display_id=True)
    for chunk in response:
        collected_message = chunk.choices[0].delta.content
        if (collected_message is not None):
          # Append received chunk to the collected messages
          collected_messages.append(collected_message)
          current_text = ''.join(collected_messages)
          # Update the display with the current text in Markdown format
          display_handle.update(Markdown(current_text))
    # Combine all collected messages into the final answer
    answer = ''.join(collected_messages)
  else:
    # If streaming is disabled, read the output directly
    answer = response.choices[0].message.content
    # Display the answer
    display(Markdown(answer))

  return answer

In [24]:
# The question you are asking about the document
question = """
Extract the following relevant information from the CV.
- Candidate's skills
- Experience 
- Languages 
- Programming Languages
- Certifications
- Educational background
"""
final_response = ask_openai(document_text, question, personal_openai_key)

In [27]:
# The question you are asking about the document
question = """
Extract the following relevant information from the CV and convert it into JSON-format to process it later
- Candidate's skills
- Experience 
- Languages 
- Everything about coding
- Certifications
- Educational background
"""
final_response = ask_openai(document_text, question, personal_openai_key)

```json
{
  "candidate": {
    "skills": {
      "programming_languages": ["Java", "JavaScript", "Python", "SQL"],
      "web_development_frameworks": ["AngularJS", "Node.js", "Spring Boot"],
      "database_technologies": ["MySQL", "MongoDB", "PostgreSQL"],
      "cloud_platforms": ["AWS", "Microsoft"],
      "version_control": ["Git", "SVN"],
      "agile_methodologies": ["Scrum", "Kanban"],
      "problem_solving": true,
      "analytical_skills": true,
      "communication_skills": true,
      "leadership_abilities": true
    },
    "experience": [
      {
        "position": "Senior Software Engineer",
        "company": "Tech Innovations Inc.",
        "location": "San Francisco, CA",
        "duration": "August 2018 - Present",
        "responsibilities": [
          "Lead the development team in designing and implementing scalable and efficient software solutions for complex business requirements.",
          "Spearhead the migration of legacy systems to modern, cloud-based architectures, resulting in improved performance and reduced maintenance costs.",
          "Collaborate with product managers and stakeholders to define project scopes, prioritize tasks, and ensure timely delivery of features.",
          "Mentor junior engineers, conducting code reviews, providing technical guidance, and fostering a culture of continuous learning and improvement.",
          "Implemented CI/CD pipelines to automate the software deployment process, increasing deployment frequency and reducing manual errors."
        ]
      },
      {
        "position": "Software Engineer",
        "company": "CodeCrafters Co.",
        "location": "Seattle, WA",
        "duration": "June 2015 - July 2018",
        "responsibilities": [
          "Developed web applications using AngularJS and Node.js, contributing to a 30% increase in user engagement and customer satisfaction.",
          "Designed and optimized SQL databases for efficient data storage and retrieval, reducing query times by 40%.",
          "Implemented RESTful APIs for seamless integration between frontend and backend systems, improving overall system performance and reliability.",
          "Collaborated with cross-functional teams to gather requirements, plan project timelines, and deliver high-quality software solutions within budget constraints.",
          "Conducted unit testing and debugging to ensure the stability and reliability of software applications."
        ]
      }
    ],
    "languages": {
      "English": "Native proficiency",
      "Spanish": "B2"
    },
    "coding": {
      "proficiencies": [
        "Java",
        "JavaScript",
        "Python",
        "SQL",
        "AngularJS",
        "Node.js",
        "Spring Boot"
      ]
    },
    "certifications": [],
    "education": {
      "degree": "Master of Science in Computer Science",
      "institution": "University of Technology, Innovations, and Science",
      "duration": "2013 - 2015"
    }
  }
}
```