In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

# ==== Country domain mapping ====
indeed_domains = {
    "Argentina": "ar", "Australia": "au", "Austria": "at", "Bahrain": "bh", "Belgium": "be",
    "Brazil": "br", "Canada": "ca", "Chile": "cl", "China": "cn", "Colombia": "co",
    "Costa Rica": "cr", "Czech Republic": "cz", "Denmark": "dk", "Ecuador": "ec", "Egypt": "eg",
    "Finland": "fi", "France": "fr", "Germany": "de", "Greece": "gr", "Hong Kong": "hk",
    "Hungary": "hu", "India": "in", "Indonesia": "id", "Ireland": "ie", "Israel": "il",
    "Italy": "it", "Japan": "jp", "Kuwait": "kw", "Luxembourg": "lu", "Malaysia": "my",
    "Mexico": "mx", "Morocco": "ma", "Netherlands": "nl", "New Zealand": "nz", "Nigeria": "ng",
    "Norway": "no", "Oman": "om", "Pakistan": "pk", "Panama": "pa", "Peru": "pe",
    "Philippines": "ph", "Poland": "pl", "Portugal": "pt", "Qatar": "qa", "Romania": "ro",
    "Saudi Arabia": "sa", "Singapore": "sg", "South Africa": "za", "South Korea": "kr",
    "Spain": "es", "Sweden": "se", "Switzerland": "ch", "Taiwan": "tw", "Thailand": "th",
    "Turkey": "tr", "Ukraine": "ua", "United Arab Emirates": "ae", "United Kingdom": "uk",
    "United States": "", "Uruguay": "uy", "Venezuela": "ve", "Vietnam": "vn"
}

# ==== Function to build URL ====
def build_indeed_url(job_title, location, country="India", start=0):
    country_code = indeed_domains.get(country, "")
    base_url = f"https://{country_code}.indeed.com/jobs" if country_code else "https://www.indeed.com/jobs"
    job_title_encoded = job_title.replace(" ", "+")
    location_encoded = location.replace(" ", "+")
    return f"{base_url}?q={job_title_encoded}&l={location_encoded}&from=searchOnDesktopSerp&start={start}"

# ==== User inputs ====
job_title = "Machine Learning Engineer"
location = "India"
country = "India"

# ==== Setup Chrome driver ====
chrome_options = Options()
chrome_options.add_argument("--start-maximized")
chrome_options.add_argument("--disable-notifications")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=chrome_options)

jobs = []
max_pages = 3  # Limit to first 3 pages

for page in range(max_pages):
    start = page * 10
    search_url = build_indeed_url(job_title, location, country, start)
    print(f"\nDEBUG: Processing page {page+1} | URL: {search_url}")

    driver.get(search_url)
    time.sleep(3)

    # ==== Wait for job cards ====
    try:
        job_cards = WebDriverWait(driver, 10).until(
            EC.presence_of_all_elements_located((By.CSS_SELECTOR, "div.job_seen_beacon"))
        )
        print(f"DEBUG: Found {len(job_cards)} job cards on page {page+1}")
    except:
        print(f"No job cards found on page {page+1}, stopping.")
        break

    for idx, card in enumerate(job_cards, start=1):
        print(f"\nDEBUG: Processing job card #{idx} on page {page+1}")
        
        # Title
        try:
            title_elem = card.find_element(By.CSS_SELECTOR, "h2 > a")
            title = title_elem.text
            job_link = title_elem.get_attribute("href")
            print("Title:", title)
        except:
            title = "N/A"
            job_link = None
        
        # Company
        try:
            company_elem = card.find_element(By.CSS_SELECTOR, "span[data-testid='company-name']")
            company = company_elem.text
            print("Company:", company)
        except:
            company = "N/A"
        
        # Location
        try:
            location_elem = card.find_element(By.CSS_SELECTOR, "div[data-testid='text-location']")
            location_text = location_elem.text
            print("Location:", location_text)
        except:
            location_text = "N/A"
        
            # ==== Get Job Type from right panel ====
        try:
            # Wait for the job details section
            job_details_section = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.ID, "jobDetailsSection"))
            )
            # Locate the Job type group
            job_type_group = job_details_section.find_element(By.XPATH, ".//div[@aria-label='Job type']")
            # Get all <li> items
            job_type_lis = job_type_group.find_elements(By.XPATH, ".//ul/li")
            # Extract text from the nested <span> inside each <li>
            job_type_list = []
            for li in job_type_lis:
                try:
                    span = li.find_element(By.XPATH, ".//span")
                    job_type_list.append(span.text)
                except:
                    continue
            job_type = ", ".join(job_type_list) if job_type_list else "N/A"
        except Exception as e:
            print("Error fetching job type from right panel:", e)
            job_type = "N/A"



        # ==== Click to load job description in right pane ====
        try:
            driver.execute_script("arguments[0].scrollIntoView();", title_elem)
            title_elem.click()
            job_desc_elem = WebDriverWait(driver, 5).until(
                EC.presence_of_element_located((By.ID, "jobDescriptionText"))
            )
            job_description = job_desc_elem.text
            print("Job Description length:", len(job_description))
        except Exception as e:
            print("Error fetching description:", e)
            job_description = "N/A"

        # ==== Detect Apply Type and Apply Link ====
        try:
            apply_button = driver.find_element(By.CSS_SELECTOR, "span.indeed-apply-status-not-applied button")
            apply_type = "Apply Now"
            apply_link = apply_button.get_attribute("onclick") or job_link
        except:
            apply_type = "Apply on Company Site"
            apply_link = job_link

        jobs.append({
            "Title": title,
            "Company": company,
            "Location": location_text,
            "Job Type": job_type,
            "Description": job_description,
            "Apply Type": apply_type,
            "Apply Link": apply_link
        })

# ==== Save to CSV ====
df = pd.DataFrame(jobs)
# Drop rows with empty essential columns
df.replace("N/A", pd.NA, inplace=True)
df.dropna(subset=["Title", "Company", "Location", "Job Type", "Description"], inplace=True)
df.to_csv("indeed_jobs.csv", index=False)
print("\n✅ Scraping complete. Saved to indeed_jobs.csv")

driver.quit()



DEBUG: Processing page 1 | URL: https://in.indeed.com/jobs?q=Machine+Learning+Engineer&l=India&from=searchOnDesktopSerp&start=0
DEBUG: Found 16 job cards on page 1

DEBUG: Processing job card #1 on page 1
Title: DevOps Engineer - Technical Lead
Company: Aristocrat
Location: Hybrid work in Noida, Uttar Pradesh
Job Description length: 3833

DEBUG: Processing job card #2 on page 1
Title: Machine Learning Engineer
Company: National Credit Recovery Inc
Location: Remote in India
Job Description length: 2654

DEBUG: Processing job card #3 on page 1
Title: AI/ Machine Learning Engineer
Company: Harbour And Hills
Location: Remote
Error fetching job type from right panel: Message: no such element: Unable to locate element: {"method":"xpath","selector":".//div[@aria-label='Job type']"}
  (Session info: chrome=141.0.7390.122); For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#nosuchelementexception
Stacktrace:
#0 0x62bc61deafea 