#                                              Indeed.com Job Posts Analysis 

## Business Problem

The job market is rapidly evolving, driven by technological advancements, shifting industry demands, and changing workforce expectations.Job seekers face challenges in understanding which roles, skills, and locations offer the best career opportunities. Without reliable insights, both employers and professionals risk making uninformed decisions that lead to talent gaps, inefficient hiring, and missed career growth opportunities.


## Problem Statement
There is a lack of structured, data-driven insights into current job market trends. Companies are uncertain about which roles and skills to prioritize, how to benchmark salaries across industries and locations, and how their employer brand (ratings, reputation) impacts recruitment. Job seekers, on the other hand, lack clarity on the most promising career paths, salary expectations, and skill requirements. By analyzing job postings data from platforms like Indeed.com, actionable insights can be generated to bridge this gap—helping companies refine recruitment strategies and enabling job seekers to make informed career decisions.


## Project Objectives

#### 1.Analyze Job Demand

* Identify the most in-demand job roles and industries.
* Determine which companies are hiring for multiple roles.

#### 2.Salary Analysis

* Analyze salary trends by job role and location.
* Identify salary ranges and patterns for entry-level vs experienced positions.

#### 3.Company Insights

* Evaluate which companies are hiring the most.
* Analyze company ratings vs offered salaries.
#### 4.Skill Analysis

* Identify most sought-after skills for each role.
* Understand skill gaps and demand for emerging technologies (e.g., AI, Cloud, BI).

#### 5.Location-Based Insights

* Determine which cities or regions have highest job availability.
* Compare salary differences across locations.

#### 6.Provide Recommendations

* Help job seekers target roles and skills in demand.
* Help companies benchmark salaries and improve recruitment strategies.

In [51]:
keywords = [
    "data scientist",
    "data analyst",
    "power bi developer",
    "sql developer",
    "business analyst",
    "machine learning engineer",
    "deep learning engineer",
    "ai engineer",
    "python developer",
    "java developer",
    "big data engineer",
    "cloud engineer",
    "aws developer",
    "azure developer",
    "tableau developer",
    "etl developer",
    "data engineer",
    "analytics consultant",
    "business intelligence developer",
    "r programmer",
    "statistical analyst",
    "DevOps Engineer",
    "Cybersecurity Analyst",
    "Full Stack Developer",
    "Database Administrator",
    "Software Engineer"
]

In [71]:

import time
import re
import pandas as pd
import undetected_chromedriver as uc
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import random

# -------------------- SETUP --------------------
options = uc.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
driver = uc.Chrome(options=options)
wait = WebDriverWait(driver, 10)

# -------Keywords--------
keywords = [
    "data scientist",
    "data analyst",
    "power bi developer",
    "sql developer",
    "business analyst",
    "machine learning engineer",
    "deep learning engineer",
    "ai engineer",
    "python developer",
    "java developer",
    "big data engineer",
    "cloud engineer",
    "aws developer",
    "azure developer",
    "tableau developer",
    "etl developer",
    "data engineer",
    "analytics consultant",
    "business intelligence developer",
    "r programmer",
    "statistical analyst",
    "DevOps Engineer",
    "Cybersecurity Analyst",
    "Full Stack Developer",
    "Database Administrator",
    "Software Engineer"
]

all_data = []

# -------------------- HELPER FUNCTIONS --------------------
def extract_salary(salary_text):
    salary_min, salary_max = None, None
    if not salary_text:
        return salary_min, salary_max
    salary_text = salary_text.lower().replace(",", "")
    year_range = re.search(r"₹(\d+)\s*-\s*₹(\d+)\s*a year", salary_text)
    year_single = re.search(r"₹(\d+)\s*a year", salary_text)
    month_range = re.search(r"₹(\d+)\s*-\s*₹(\d+)\s*a month", salary_text)
    month_single = re.search(r"₹(\d+)\s*a month", salary_text)
    if year_range:
        salary_min = int(year_range.group(1))
        salary_max = int(year_range.group(2))
    elif year_single:
        salary_min = salary_max = int(year_single.group(1))
    elif month_range:
        salary_min = int(month_range.group(1)) * 12
        salary_max = int(month_range.group(2)) * 12
    elif month_single:
        salary_min = salary_max = int(month_single.group(1)) * 12
    return salary_min, salary_max

def extract_experience(description_text):
    exp_min, exp_max = None, None
    exp_pattern = r"(\d+)-?(\d+)?\s+years?"
    match = re.search(exp_pattern, description_text, re.IGNORECASE)
    if match:
        exp_min = int(match.group(1))
        exp_max = int(match.group(2)) if match.group(2) else exp_min
    return exp_min, exp_max

def extract_skills(description_text):
    skills = []
    skill_keywords = [
    # Programming Languages
    "Python", "R", "SQL", "Java", "Scala", "C++", "JavaScript",
    
    # Python Libraries & Frameworks
    "Pandas", "NumPy", "Scikit-learn", "TensorFlow", "PyTorch", "Keras",
    "Matplotlib", "Seaborn", "OpenCV", "HuggingFace Transformers",
    "Flask", "Django", "FastAPI", "MLflow", "PyTest", "UnitTest",
    
    # R Libraries
    "tidyverse", "dplyr", "ggplot2", "caret", "randomForest", "Shiny",
    
    # Java Frameworks
    "Spring", "Hibernate", "JDBC", "JPA", "JUnit", "Mockito",
    
    # Data Visualization Tools
    "Power BI", "Tableau", "Excel", "QlikView",
    
    # BI & Analytics
    "DAX", "Power Query (M Language)", "Calculated Fields", "Parameters",
    "Dashboard Design", "Storytelling with Data",
    
    # Databases
    "MySQL", "PostgreSQL", "Oracle", "MongoDB", "Cassandra", "Cosmos DB",
    
    # SQL Concepts
    "DDL", "DML", "DCL", "TCL", "Stored Procedures", "Functions",
    "Triggers", "Query Optimization", "Indexing", "Database Design",
    "Normalization",
    
    # Big Data Tools
    "Hadoop", "HDFS", "MapReduce", "Hive", "Pig", "Apache Spark",
    "Kafka", "Flink", "Storm",
    
    # ETL Tools
    "Informatica", "Talend", "SSIS", "Pentaho",
    
    # Workflow Orchestration
    "Airflow", "Luigi",
    
    # Cloud Platforms & Services
    "AWS", "Azure", "GCP", "AWS Sagemaker", "Azure ML", "GCP AI Platform",
    "AWS EC2", "AWS S3", "AWS RDS", "AWS Lambda", "AWS DynamoDB",
    "Azure VMs", "Azure Blob Storage", "Azure SQL Database",
    "Azure Functions", "Azure Synapse", "Azure DevOps",
    "Azure Cosmos DB", "Azure AD",
    
    # Cloud Infrastructure
    "Terraform", "CloudFormation", "ARM Templates",
    
    # Containers & Deployment
    "Docker", "Kubernetes", "CI/CD Pipelines",
    
    # Monitoring & Logging
    "CloudWatch", "Azure Monitor",
    
    # Statistical & Analytical Skills
    "Regression Analysis", "ANOVA", "Hypothesis Testing",
    "Probability Theory", "Experimental Design",
    
    # Machine Learning Concepts
    "Feature Engineering", "Model Selection", "Hyperparameter Tuning",
    "MLOps", "Model Deployment", "Responsible AI",
    
    # Deep Learning Concepts
    "CNN", "RNN", "LSTM", "Transformers", "Computer Vision",
    "Natural Language Processing (NLP)", "Reinforcement Learning",
    "GPU Computing (CUDA)",
    
    # Business & Analytical Skills
    "Requirements Gathering", "Stakeholder Management",
    "Process Mapping", "Workflow Analysis", "Business Acumen",
    "Client Management", "Domain Knowledge (Finance, Healthcare, Retail)",
    
    # General Tools & Practices
    "Version Control (Git)", "Automation & Scripting",
    "API Development & Integration", "Performance Optimization",
    "Security & Governance", "Row-Level Security (RLS)",
    "Error Handling & Logging", "Scheduling & Automation",
    "Communication & Presentation Skills"
]
    for kw in skill_keywords:
        if kw.lower() in description_text.lower():
            skills.append(kw)
    return skills

# -------------------- SCRAPING --------------------
for keyword in keywords5:
    TARGET_PER_KEYWORD = 200
    page = 0
    collected = 0
    while collected < TARGET_PER_KEYWORD:
        print(f"[{keyword}] Page {page+1} | Collected: {collected}")
        url = f"https://in.indeed.com/jobs?q={keyword.replace(' ', '+')}&l=India&start={page*10}"
        driver.get(url)
        time.sleep(4)

        cards = driver.find_elements(By.CSS_SELECTOR, "div.job_seen_beacon")
        if not cards:
            break

        for card in cards:
            
            if collected >= TARGET_PER_KEYWORD:
                break
            try:
                driver.execute_script("arguments[0].scrollIntoView(true);", card)
                wait.until(EC.element_to_be_clickable(card)).click()
                time.sleep(2)
                
                job_desc = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, "div#jobDescriptionText")))
                description_text = job_desc.text
            
                print("job_dec scraped")
                title = card.find_element(By.CSS_SELECTOR, "h2.jobTitle span").text
                print("title scraped")
                try:
                    job_type_elem = driver.find_element(By.CSS_SELECTOR, "span.js-match-insights-provider-18uwqyc.e1wnkr790")
                    time.sleep(5)
                    job_type = job_type_elem.text.strip()
                except:
                    job_type = None
                print("job type scraped")
                try:
                    company = driver.find_element(By.CSS_SELECTOR, "a.css-1h4l2d7.e19afand0").text
                except:
                    company = "N/A"
                print("com name scraped")
                
                try:
                    location = driver.find_element(By.CSS_SELECTOR, "div[data-testid='inlineHeader-companyLocation']").text
                except:
                    try:
                        location = driver.find_element(By.CSS_SELECTOR, "div.companyLocation").text
                    except:
                        location = "N/A"

                print("location scraped")
                try:
                    salaries = driver.find_elements(By.XPATH, "/html/body/main/div/div/div[2]/div/div[5]/div/div[2]/div/div/div/div[2]/div[2]/div[1]/div/div[2]/div/div")
                    salary_text = salaries[0].text.strip()  
                except:
                    salary_text = None
                
                salary_min, salary_max = extract_salary(salary_text)
                
                exp_min, exp_max = extract_experience(description_text)
                print("salary scraped")
                
                skills = extract_skills(description_text)
                print("skills scraped")   
                try:
                    date_posted = card.find_element(By.CSS_SELECTOR, "span.date").text
                except:
                    date_posted = None
                print("date posted scraped")
                try:
                    company_rating = card.find_element(By.CSS_SELECTOR, "span.ratingsContent").text
                except:
                    company_rating = None
                print("company ratings scraped")
                try:
                    job_url = card.find_element(By.CSS_SELECTOR, "a").get_attribute("href")
                except:
                    job_url = None
                print("job url scraped")
                
                all_data.append({
                    "Keyword": keyword,
                    "Job_Title": title,
                    "Job_type": job_type,
                    "Company": company,
                    "Location": location,
                    "Salary_Min_Yearly": salary_min,
                    "Salary_Max_Yearly": salary_max,
                    "Experience_Min": exp_min,
                    "Experience_Max": exp_max,
                    "Skills": ", ".join(skills),
                    "Company_Rating": company_rating,
                    "Job_URL": job_url
                })

                collected += 1
                print(f"   ➡ Scraped: {title}")

            except Exception as e:
                print(f"Error: {e}")
                continue

        page += 1

driver.quit()

# -------------------- SAVE TO DATAFRAME --------------------
df = pd.DataFrame(all_data)
print(f"\nTotal jobs scraped: {df.shape[0]}")
print(df.head())
df.to_csv('jobs_indeed.csv', index=False)

[data engineer] Page 1 | Collected: 0
job_dec scraped
title scraped
job type scraped
com name scraped
location scraped
salary scraped
skills scraped
date posted scraped
company ratings scraped
job url scraped
   ➡ Scraped: Data Engineer
job_dec scraped
title scraped
job type scraped
com name scraped
location scraped
salary scraped
skills scraped
date posted scraped
company ratings scraped
job url scraped
   ➡ Scraped: Intern | Data Engineer | Mumbai | Information Technology
job_dec scraped
title scraped
job type scraped
com name scraped
location scraped
salary scraped
skills scraped
date posted scraped
company ratings scraped
job url scraped
   ➡ Scraped: Data Engineer
job_dec scraped
title scraped
job type scraped
com name scraped
location scraped
salary scraped
skills scraped
date posted scraped
company ratings scraped
job url scraped
   ➡ Scraped: Data Engineer
Error: Message: 

job_dec scraped
title scraped
job type scraped
com name scraped
location scraped
salary scraped
skills sc

In [26]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import undetected_chromedriver as uc
import pandas as pd
import time
import re

# -------------------- SETUP --------------------
options = uc.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")
driver = uc.Chrome(options=options)
wait = WebDriverWait(driver, 10)

# -------------------- PARAMETERS --------------------
keywords = "data scientist" # "data analyst" , "power bi developer", "sql developer"]
TARGET_PER_KEYWORD = 15 # number of jobs to scrape per keyword
all_data = []

# -------------------- HELPER FUNCTIONS --------------------
def extract_salary(salary_text):
    """Extract min/max salary per year"""
    salary_min, salary_max = None, None
    if not salary_text:
        return salary_min, salary_max

    year_pattern = r"₹([\d,]+)\s*-\s*₹([\d,]+)\s*a year"
    month_pattern = r"₹([\d,]+)\s*-\s*₹([\d,]+)\s*a month"
    
    year_match = re.search(year_pattern, salary_text)
    month_match = re.search(month_pattern, salary_text)

    if year_match:
        salary_min = int(year_match.group(1).replace(",", ""))
        salary_max = int(year_match.group(2).replace(",", ""))
    elif month_match:
        salary_min = int(month_match.group(1).replace(",", "")) * 12
        salary_max = int(month_match.group(2).replace(",", "")) * 12
    
    return salary_min, salary_max

def extract_experience(description_text):
    """Extract min/max experience from job description"""
    exp_min, exp_max = None, None
    exp_pattern = r"(\d+)-?(\d+)?\s+years?"
    match = re.search(exp_pattern, description_text, re.IGNORECASE)
    if match:
        exp_min = int(match.group(1))
        if match.group(2):
            exp_max = int(match.group(2))
        else:
            exp_max = exp_min
    return exp_min, exp_max
    def extract_salary(salary_text):
        """
        Extract salary and convert monthly to yearly
        Returns: salary_min_yearly, salary_max_yearly
        """
    salary_min, salary_max = None, None

    if not salary_text:
        return salary_min, salary_max

    salary_text = salary_text.lower().replace(",", "")

    # YEARLY
    year_range = re.search(r"₹(\d+)\s*-\s*₹(\d+)\s*a year", salary_text)
    year_single = re.search(r"₹(\d+)\s*a year", salary_text)

    # MONTHLY
    month_range = re.search(r"₹(\d+)\s*-\s*₹(\d+)\s*a month", salary_text)
    month_single = re.search(r"₹(\d+)\s*a month", salary_text)

    if year_range:
        salary_min = int(year_range.group(1))
        salary_max = int(year_range.group(2))

    elif year_single:
        salary_min = salary_max = int(year_single.group(1))

    elif month_range:
        salary_min = int(month_range.group(1)) * 12
        salary_max = int(month_range.group(2)) * 12

    elif month_single:
        salary_min = salary_max = int(month_single.group(1)) * 12

    return salary_min, salary_max


def extract_skills(description_text):
    """Simple keyword-based skill extraction"""
    skills = []
    skill_keywords = ["Python","SQL", "Excel", "Tableau", "Power BI", "R", "SAS", "Machine Learning"]
    for kw in skill_keywords:
        if kw.lower() in description_text.lower():
            skills.append(kw)
    return skills

# -------------------- SCRAPING --------------------
for keyword in keywords:
    page = 0
    collected = 0

    while collected < TARGET_PER_KEYWORD:
        print(f"[{keyword}] Page {page+1} | Collected: {collected_per_keyword}")
        url = f"https://in.indeed.com/jobs?q={keyword.replace(' ', '+')}&l=India&start={page*10}"
        driver.get(url)
        time.sleep(4)
        cards = driver.find_elements(By.CSS_SELECTOR, "div.job_seen_beacon")
        if not cards:
            break

        for card in cards:
            if collected >= TARGET_PER_KEYWORD:
                break
            try:
                driver.execute_script("arguments[0].scrollIntoView(true);", card)
                wait.until(EC.element_to_be_clickable(card)).click()
                time.sleep(2)

                # Job details panel
                job_desc = wait.until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "div#jobDescriptionText"))
                )
                description_text = job_desc.text

                # Extract basic info
                title = card.find_element(By.CSS_SELECTOR, "h2.jobTitle span").text
                company = wait.until(
                    EC.presence_of_element_located((By.CSS_SELECTOR, "span.companyName"))).text
                location = card.find_element(By.CSS_SELECTOR, "div.companyLocation").text

                # Salary (if exists)
                try:
                    salary_text = card.find_element(By.CSS_SELECTOR, "div.metadata.salary-snippet-container").text
                except:
                    salary_text = None
                salary_min, salary_max = extract_salary(salary_text)

                # Experience
                exp_min, exp_max = extract_experience(description_text)

                # Skills
                skills = extract_skills(description_text)

                # Date posted
                try:
                    date_posted = card.find_element(By.CSS_SELECTOR, "span.date").text
                except:
                    date_posted = None

                # Company rating
                try:
                    company_rating = card.find_element(By.CSS_SELECTOR, "span.ratingsContent").text
                except:
                    company_rating = None

                # Job URL
                try:
                    job_url = card.find_element(By.CSS_SELECTOR, "a").get_attribute("href")
                except:
                    job_url = None

                # Append to data
                all_data.append({
                    "Keyword": keyword,
                    "Job_Title": title,
                    "Company": company,
                    "Location": location,
                    "Salary_Min_Yearly": salary_min,
                    "Salary_Max_Yearly": salary_max,
                    "Experience_Min": exp_min,
                    "Experience_Max": exp_max,
                    "Skills": ", ".join(skills),
                    "Date_Posted": date_posted,
                    "Company_Rating": company_rating,
                    "Job_URL": job_url
                })

                collected += 1
                print(f"[{keyword}] Collected {collected} | {title}")

            except Exception as e:
                print(f"Error: {e}")
                continue

        page += 1

driver.quit()

# -------------------- SAVE TO DATAFRAME --------------------
df = pd.DataFrame(all_data)
print(df.shape)
df.head()

[d] Page 1 | Collected: 0
Error: Message: 
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x667d13
	0x667d54
	0x48b290
	0x4d56ea
	0x4d598b
	0x517912
	0x4f8004
	0x515111
	0x4f7d56
	0x4c94d9
	0x4ca294
	0x8dbb64
	0x8d7215
	0x8f3fad
	0x681ef8
	0x689b0d
	0x670738
	0x670902
	0x65a1da
	0x755d5d49
	0x7770d5db
	0x7770d561

Error: Message: stale element reference: stale element not found
  (Session info: chrome=144.0.7559.60); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#staleelementreferenceexception
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x667d13
	0x667d54
	0x48b290
	0x49d1a2
	0x49c26c
	0x492819
	0x490c32
	0x49419f
	0x494238
	0x4d5160
	0x4d598b
	0x4cae51
	0x4f8004
	0x4cad44
	0x4f8184
	0x515111
	0x4f7d56
	0x4c94d9
	0x4ca294
	0x8dbb64
	0x8d7215
	0x8f3fad
	0x681ef8
	0x689b0d
	0x670738
	0x670902
	0x65a1da
	0x755d5d49
	0x7770d5db
	0x7770d561

Error: Message: stale element referenc

InvalidSessionIdException: Message: invalid session id; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#invalidsessionidexception
Stacktrace:
Symbols not available. Dumping unresolved backtrace:
	0x667d13
	0x667d54
	0x48b0ce
	0x4c8765
	0x4f7e76
	0x4f3be1
	0x4f30e2
	0x45e00b
	0x45e5ae
	0x45ea8d
	0x8dbb64
	0x8d7215
	0x8f3fad
	0x681ef8
	0x689b0d
	0x45db89
	0x45d1d0
	0xa4534f
	0x755d5d49
	0x7770d5db
	0x7770d561


In [53]:
df

Unnamed: 0,Keyword,Job_Title,Company,Location,Salary_Min_Yearly,Salary_Max_Yearly,Experience_Min,Experience_Max,Skills,Date_Posted,Company_Rating,Job_URL
0,data scientist,"Data Scientist / Senior Data Scientist, India ...",,"Mumbai, Maharashtra",,,,,"Python, SQL, R, Machine Learning",,,https://in.indeed.com/pagead/clk?mo=r&ad=-6NYl...
1,data scientist,"AI Engineer / Senior AI Engineer, India - BCG X",,"Mumbai, Maharashtra",,,,,"Python, R",,,https://in.indeed.com/pagead/clk?mo=r&ad=-6NYl...
2,data scientist,"AI Engineer / Senior AI Engineer, India - BCG X",,"Gurugram, Haryana",,,,,"Python, R",,,https://in.indeed.com/pagead/clk?mo=r&ad=-6NYl...
3,data scientist,Global AI/ML Engineer Manager,,"Gurugram, Haryana",,,,,"Python, Excel, R",,,https://in.indeed.com/pagead/clk?mo=r&ad=-6NYl...


In [14]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import undetected_chromedriver as uc
import time
import re
import pandas as pd

options = uc.ChromeOptions()
options.add_argument("--start-maximized")
options.add_argument("--disable-blink-features=AutomationControlled")

driver = uc.Chrome(options=options)

all_data = []
TARGET = 15

keywords = ["data scientist", "data analyst", "power bi developer", "sql developer"]

for keyword in keywords:
    page = 0
    collected_per_keyword = 0

    while collected_per_keyword < TARGET and page:
        print(f"[{keyword}] Page {page+1} | Collected: {collected_per_keyword}")

        url = f"https://in.indeed.com/jobs?q={keyword.replace(' ', '+')}&l=India&start={page*10}"
        driver.get(url)
        time.sleep(6)

        cards = driver.find_elements(By.CSS_SELECTOR, "div.job_seen_beacon")
        print("Cards found:", len(cards))


        if len(cards) == 0:
            page += 1
            continue
    
        for card in cards:
            if len(all_data) >= TARGET:
                break
                    
            text = card.text.strip()
            if not text:
                continue
    
            lines = text.split("\n")
            if len(lines) < 3:
                continue

            # -------- SALARY --------
            salary_min, salary_max = None, None #salary = card.find_element(By.XPATH,"//*[@id='salaryInfoAndJobType']/span").text.strip()
            try:
                salary_text = card.find_element(By.XPATH,"//*[@id='salaryInfoAndJobType']/span").text

                year_pattern = r"₹([\d,]+)\s*-\s*₹([\d,]+)\s*a year"
                month_pattern = r"₹([\d,]+)\s*-\s*₹([\d,]+)\s*a month"

                year_match = re.search(year_pattern, salary_text)
                month_match = re.search(month_pattern, salary_text)

                if year_match:
                    salary_min = int(year_match.group(1).replace(",", ""))
                    salary_max = int(year_match.group(2).replace(",", ""))

                elif month_match:
                    salary_min = int(month_match.group(1).replace(",", "")) * 12
                    salary_max = int(month_match.group(2).replace(",", "")) * 12
            except:
                pass

            # --------SKILLS----
            try:
                skills= card.find_element(By.CLASS, "div.js-match-insights-provider-u74ql7").text
            except:
                skills = None

            # -------- JOB URL --------
            try:
                job_url = card.find_element(By.CSS_SELECTOR, "a").get_attribute("href")
            except:
                job_url = None

            all_data.append({
                "Keyword": keyword,
                "Job_Title": lines[0],
                "Company": lines[1],
                "Location": lines[2],
                "Salary_Min_Yearly": salary_min,
                "Salary_Max_Yearly": salary_max,
                "Skill_required": skills,
                "Job_URL": job_url
            })

            collected_per_keyword += 1
            print("Added:", title)

        page += 1

driver.quit()

df = pd.DataFrame(all_data)
print(df.shape)
df.head()


(0, 0)
