In [6]:
import json
import torch
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
import openai
import os

In [7]:
openai.api_key = "OPENAI_API_KEY"

In [21]:
# Load the NER model and tokenizer
model_name = "savasy/bert-base-turkish-ner-cased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)

Some weights of the model checkpoint at savasy/bert-base-turkish-ner-cased were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [22]:
# Create the NER pipeline
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)

In [23]:
# Read the JSON file line by line
with open('path/to/your/json', 'r', encoding='utf-8') as f:
    job_listings = []
    for line in f:
        try:
            # Load each line as a separate JSON object
            job = json.loads(line.strip())
            job_listings.append(job)
        except json.JSONDecodeError as e:
            print(f"Line is not in JSON format, error: {e}")

In [24]:
# Function to extract features using NER
def extract_features_with_ner(job_description):
    job_description = job_description.replace('\n', ' ')  # Remove new lines
    job_description = ' '.join(job_description.split())  # Remove extra spaces
    job_description = job_description.strip()  # Remove leading and trailing spaces

    ner_results = ner_pipeline(job_description)

    # Variables for extracted features
    tasks = []
    about_company = []
    technical_skills = []
    soft_skills = []
    employment_type = ""
    location_type = ""
    languages = ""
    experience = ""
    salary_range = ""
    degree = ""
    requirements = ""

    for entity in ner_results:
        if entity['entity'] == 'B-TASK':
            tasks.append(entity['word'])
        elif entity['entity'] == 'B-ABOUT_COMPANY':
            about_company.append(entity['word'])
        elif entity['entity'] == 'B-TECHNICAL_SKILL':
            technical_skills.append(entity['word'])
        elif entity['entity'] == 'B-SOFT_SKILL':
            soft_skills.append(entity['word'])
        elif entity['entity'] == 'B-EMPLOYMENT_TYPE':
            employment_type = entity['word']
        elif entity['entity'] == 'B-LOCATION_TYPE':
            location_type = entity['word']
        elif entity['entity'] == 'B-LANGUAGE':
            languages = entity['word']
        elif entity['entity'] == 'B-EXPERIENCE':
            experience = entity['word']
        elif entity['entity'] == 'B-SALARY_RANGE':
            salary_range = entity['word']
        elif entity['entity'] == 'B-DEGREE':
            degree = entity['word']
        elif entity['entity'] == 'B-REQUIREMENT':
            requirements = entity['word']

    return {
        "tasks": tasks,
        "about_company": about_company,
        "technical_skills": technical_skills,
        "soft_skills": soft_skills,
        "employment_type": employment_type,
        "location_type": location_type,
        "languages": languages,
        "experience": experience,
        "salary_range": salary_range,
        "degree": degree,
        "requirements": requirements,
    }

In [25]:
# Function to extract features using ChatGPT API
def extract_features_with_chatgpt(job_description):
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "Extract important features from the following job description."},
                {"role": "user", "content": job_description}
            ],
            temperature=0.7,
            max_tokens=100
        )
        return response.choices[0]['message']['content']  # Return the output
    except Exception as e:
        print(f"Error with ChatGPT API: {e}")
        return {}

In [26]:
# Process the job listings
for job in job_listings:
    try:
        # Get the job ID and job description
        job_id = job.get("_id", None)  # Retrieve '_id', return None if not found
        job_source = job.get("_source", {})
        job_description = job_source.get("job_description", {}).get("description", "")

        if job_id is None:
            print(f"Job listing does not contain '_id' field: {job}")
            continue

        if not job_description:
            print(f"'description' field not found for Job ID: {job_id}.")
            continue

        # Features extracted with NER
        ner_features = extract_features_with_ner(job_description)

        # Features extracted with ChatGPT API
        gpt_features = extract_features_with_chatgpt(job_description)

        # Combine features
        job['_source']['ner_features'] = ner_features
        job['_source']['gpt_features'] = gpt_features

        # Print the updated job listing
        print(json.dumps(job, ensure_ascii=False, indent=4))

    except Exception as e:
        print(f"Error occurred while processing Job ID: {job_id}: {e}")

# Write results to a JSON file
with open('path/to/your/updated_ner_and_gpt.json', 'w', encoding='utf-8') as outfile:
    json.dump(job_listings, outfile, ensure_ascii=False, indent=4)

İş ilanında '_id' alanı bulunamadı: {'took': 6, 'timed_out': False, '_shards': {'total': 1, 'successful': 1, 'skipped': 0, 'failed': 0}, 'hits': {'total': {'value': 340, 'relation': 'eq'}, 'max_score': 9.548363, 'hits': [{'_index': 'jobs', '_id': 'WZsqcJEBsNvx8ebwoSwm', '_score': 9.548363, '_source': {'id': '4001571132', 'title': 'Yazılım Mühendisi', 'company': 'ETE Technology AS', 'location': 'Gaziemir', 'date': '2024-08-16', 'job_url': 'https://www.linkedin.com/jobs/view/4001571132/', 'job_description': {'description': "SOFTWARE ENGINEER\nWe are looking for a Software Engineer to join our growing engineering team.\n\xa0\nGeneral Qualifications\n- Bachelor's or Master’s degree in computer science, mathematics or another related field\n- Good spoken and written English\n- Teamwork and result oriented\n\xa0\nRequired Skills\n- Experience and good knowledge in C/C++ software programming languages\n- Good understanding of Object Oriented Software concepts\n- Design and implementation of d