In [1]:
import csv
import time
import random
import json
import os
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException, ElementClickInterceptedException
from datetime import datetime

In [2]:
### 1. Function: CSV SAVERR
def save_to_csv(jobs, filename=None):
    """Save the scraped jobs to a CSV file"""
    if not jobs:
        print("No jobs to save")
        return False
    
    if filename is None:
        # Create a filename with timestamp
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        filename = f'glassdoor_healthcare_jobs_{timestamp}.csv'
    
    try:
        with open(filename, 'w', newline='', encoding='utf-8') as file:
            # Get all possible keys from all jobs
            fieldnames = set()
            for job in jobs:
                fieldnames.update(job.keys())
            
            writer = csv.DictWriter(file, fieldnames=list(fieldnames))
            writer.writeheader()
            writer.writerows(jobs)
        
        print(f"Successfully saved {len(jobs)} jobs to {filename}")
        return True
    except Exception as e:
        print(f"Error saving to CSV: {e}")
        return False

### 2. Function: Enhanced Scraper

def enhanced_scraper(max_jobs=None, max_show_more_attempts=60, get_job_details=True):
    """Enhanced scraper that efficiently processes jobs and extracts detailed information
    
    Args:
        max_jobs (int, optional): Maximum number of jobs to scrape. Default is None (no limit).
        max_show_more_attempts (int, optional): Maximum number of times to click 'Show More Jobs'. Default is 60.
        get_job_details (bool, optional): Whether to extract detailed job information. Default is True.
    """
    options = webdriver.ChromeOptions()
    options.add_argument("--disable-notifications")
    options.add_argument("--disable-popup-blocking")
    
    # Add user agent - use a more recent one
    user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
    options.add_argument(f"user-agent={user_agent}")
    
    # Add additional options to make the browser less detectable
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_experimental_option("excludeSwitches", ["enable-automation"])
    options.add_experimental_option("useAutomationExtension", False)
    
    try:
        driver = webdriver.Chrome(options=options)
        
        # Execute script to make the webdriver undetectable
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
        
        # Load page
        url = "https://www.glassdoor.sg/Job/singapore-analytics-internship-jobs-SRCH_IL.0,9_IC3235921_KO10,30.htm"
        driver.get(url)
        print(f"Loaded initial page: {url}")
        
        # Wait longer for initial load
        time.sleep(15)
        
        # Initialize list to store job data
        jobs_data = []
        processed_jobs = set()  # Keep track of which jobs we've already processed
        show_more_attempts = 0
        
        # Main loop for pagination through job listings
        while True:
            print(f"Jobs found so far: {len(jobs_data)}")
            
            # Check if we've reached the maximum job limit
            if max_jobs and len(jobs_data) >= max_jobs:
                print(f"Reached configured maximum of {max_jobs} jobs")
                break
                
            # Check if we've exceeded maximum scroll attempts
            if show_more_attempts >= max_show_more_attempts:
                print(f"Reached maximum 'Show More Jobs' attempts ({max_show_more_attempts})")
                break
            
            # Close any modals that might appear
            close_modals(driver)
            
            # Find all job listings
            job_listings = find_job_listings(driver)
            if not job_listings:
                print("Could not find any job listings")
                break
                
            print(f"Found {len(job_listings)} total job listings")
            
            # Process new jobs (ones we haven't seen before)
            if get_job_details:
                # Detailed mode: Click on each new job to get full details
                new_jobs_processed = process_detailed_jobs(driver, job_listings, jobs_data, processed_jobs)
            else:
                # Simple mode: Just extract card information without clicking
                new_jobs_processed = process_basic_jobs(driver, job_listings, jobs_data, processed_jobs)
            
            if new_jobs_processed == 0:
                print("No new jobs found on this page")
            else:
                print(f"Processed {new_jobs_processed} new jobs")
                
            # Try to load more jobs
            more_jobs_clicked = click_show_more_jobs(driver)
            
            if more_jobs_clicked:
                show_more_attempts += 1
                print(f"Clicked 'Show More Jobs' (attempt {show_more_attempts}/{max_show_more_attempts})")
                time.sleep(5)  # Wait for new jobs to load
            else:
                print("Could not find or click 'Show More Jobs' button")
                # No need for pagination attempts since we're focused on "Show More Jobs"
                break
        
        print(f"Completed scraping. Collected {len(jobs_data)} jobs.")
        return jobs_data
    
    except Exception as e:
        print(f"Critical error: {e}")
        return []
    
    finally:
        try:
            driver.quit()
        except:
            pass



### 3. Function: Job Listing

def find_job_listings(driver):
    """Find all job listings on the current page"""
    job_listings = []
    
    # Try multiple selectors to find job listings
    selectors_to_try = [
        "li[data-test='jobListing']",
        ".react-job-listing",
        ".jobCard",
        "div[data-job-id]",
        "li[class*='jobListItem']",
        "li[data-brandviews*='jsearch-job-listing']"
    ]
    
    for selector in selectors_to_try:
        try:
            # Use a shorter timeout for each attempt
            job_listings = WebDriverWait(driver, 5).until(
                EC.presence_of_all_elements_located((By.CSS_SELECTOR, selector))
            )
            if job_listings and len(job_listings) > 0:
                print(f"Found job listings using selector: {selector}")
                break
        except Exception as e:
            continue
    
    # If we still didn't find anything, try XPath as a last resort
    if not job_listings:
        xpaths_to_try = [
            "//li[contains(@class, 'job') and contains(@class, 'list')]",
            "//li[contains(@data-brandviews, 'job-listing')]",
            "//div[contains(@class, 'job') and contains(@class, 'card')]",
            "//li[.//a[contains(@href, '/job/') or contains(@href, '/Job/')]]"
        ]
        
        for xpath in xpaths_to_try:
            try:
                job_listings = WebDriverWait(driver, 5).until(
                    EC.presence_of_all_elements_located((By.XPATH, xpath))
                )
                if job_listings and len(job_listings) > 0:
                    print(f"Found job listings using XPath: {xpath}")
                    break
            except Exception as e:
                continue
    
    return job_listings
### 4. Function: New Job Processor
def process_basic_jobs(driver, job_listings, jobs_data, processed_jobs):
    """Process only the new jobs that haven't been seen before (basic info only)"""
    new_jobs_processed = 0
    
    for job in job_listings:
        # Try to get a unique identifier for this job
        job_id = get_job_identifier(job)
        
        # Skip if we've already processed this job
        if job_id in processed_jobs:
            continue
            
        # Process this job
        print(f"Processing new job: {new_jobs_processed + 1}")
        
        # Scroll to ensure the job is visible
        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", job)
        time.sleep(0.5)
        
        # Extract job details
        job_data = extract_job_data(job)
        
        if job_data and (job_data.get("title") != "Unknown Title" or job_data.get("company") != "Unknown Company"):
            # Add to our results and mark as processed
            jobs_data.append(job_data)
            processed_jobs.add(job_id)
            print(f"  Added job: {job_data.get('title', 'Unknown')} at {job_data.get('company', 'Unknown')}")
            new_jobs_processed += 1
        
        # Small delay between jobs
        time.sleep(random.uniform(0.3, 0.7))
    
    return new_jobs_processed
### 5, Function: Process jobs and click each one to get detailed information
def process_detailed_jobs(driver, job_listings, jobs_data, processed_jobs):
    """Process jobs and click each one to get detailed information"""
    new_jobs_processed = 0
    
    for job in job_listings:
        try:
            # Try to get a unique identifier for this job
            job_id = get_job_identifier(job)
            
            # Skip if we've already processed this job
            if job_id in processed_jobs:
                continue
                
            # Process this job
            print(f"Processing new job: {new_jobs_processed + 1}")
            
            # First extract basic data from the card
            job_data = extract_job_data(job)
            
            if not job_data or (job_data.get("title") == "Unknown Title" and job_data.get("company") == "Unknown Company"):
                continue
                
            # Find the clickable element to open the job details
            click_success = click_job_listing(driver, job)
            
            if click_success:
                # Wait for job details to load
                try:
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, "div[data-test='jobDescriptionContent'], .jobDescriptionContent"))
                    )
                    
                    # Handle the "Show more" button in job description if present
                    click_show_more_in_description(driver)
                    
                    # Extract detailed job information
                    detailed_info = extract_detailed_job_info(driver)
                    
                    # Add detailed info to job data
                    job_data.update(detailed_info)
                    
                    print(f"  Added detailed info for: {job_data.get('title')}")
                    
                    # Go back to job list
                    # Sometimes a back button is available, other times need to use browser back
                    try:
                        back_button = driver.find_element(By.CSS_SELECTOR, "button[data-test='back-to-SRP'], .backButton")
                        back_button.click()
                        time.sleep(2)
                    except:
                        driver.back()
                        time.sleep(3)
                    
                    # Wait for job listings to be visible again
                    WebDriverWait(driver, 10).until(
                        EC.presence_of_element_located((By.CSS_SELECTOR, "li[data-test='jobListing']"))
                    )
                except Exception as e:
                    print(f"  Error extracting detailed info: {e}")
                    driver.back()
                    time.sleep(3)
                    
                    # Wait for listings to be visible again
                    try:
                        WebDriverWait(driver, 10).until(
                            EC.presence_of_element_located((By.CSS_SELECTOR, "li[data-test='jobListing']"))
                        )
                    except:
                        # If we can't get back to listings, refresh the page
                        current_url = driver.current_url
                        driver.get(current_url)
                        time.sleep(5)
            
            # Add job data to results and mark as processed
            jobs_data.append(job_data)
            processed_jobs.add(job_id)
            new_jobs_processed += 1
            
            # Small delay between jobs
            time.sleep(random.uniform(1.0, 2.0))
            
            # Get fresh job listings after browser navigation
            if new_jobs_processed % 5 == 0:
                # Refresh job listings every 5 jobs to avoid stale elements
                return new_jobs_processed
                
        except StaleElementReferenceException:
            print("  Job element became stale, continuing to next job")
            continue
        except Exception as e:
            print(f"  Error processing job: {e}")
            continue
    
    return new_jobs_processed
### 6. Function: Job listing detail opener
def click_job_listing(driver, job_element):
    """Click on a job listing to open its details"""
    try:
        # Try to find the clickable element within the job listing
        clickable_selectors = [
            "a[data-test='job-link']",
            "a.jobLink",
            "a[href*='/job/']",
            "h2 a",
            "h3 a",
            ".jobTitle a"
        ]
        
        for selector in clickable_selectors:
            try:
                clickable = job_element.find_element(By.CSS_SELECTOR, selector)
                if clickable and clickable.is_displayed():
                    # Scroll to element
                    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", clickable)
                    time.sleep(1)
                    
                    # Try clicking with JavaScript (most reliable)
                    driver.execute_script("arguments[0].click();", clickable)
                    time.sleep(3)
                    return True
            except:
                continue
                
        # If all selectors failed, try clicking the job element itself
        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", job_element)
        time.sleep(1)
        driver.execute_script("arguments[0].click();", job_element)
        time.sleep(3)
        return True
        
    except Exception as e:
        print(f"  Error clicking job listing: {e}")
        return False
### 7, Function: Click on 'Show more' button in job description if present
def click_show_more_in_description(driver):
    """Click on 'Show more' button in job description if present"""
    try:
        # Wait briefly to see if a "Show more" button appears
        show_more_buttons = driver.find_elements(By.XPATH, 
            "//button[contains(., 'Show more') and not(contains(., 'jobs'))]")
        
        if not show_more_buttons:
            # Try alternative selectors
            show_more_buttons = driver.find_elements(By.CSS_SELECTOR, 
                "button.showMore, button[data-test='show-more-content'], button.css-n2dhtq")
        
        if show_more_buttons:
            for button in show_more_buttons:
                if button.is_displayed() and button.is_enabled():
                    print("  Found 'Show more' button in job description")
                    
                    # Scroll to button
                    driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
                    time.sleep(1)
                    
                    # Click with JavaScript
                    driver.execute_script("arguments[0].click();", button)
                    print("  Expanded job description")
                    time.sleep(2)
                    return True
        
        return False
        
    except Exception as e:
        print(f"  Error clicking 'Show more' in description: {e}")
        return False

### 8. Function: Extract detailed job information from the job details page
def extract_detailed_job_info(driver):
    """Extract detailed job information from the job details page"""
    detailed_info = {}
    
    try:
        # Job Description
        description_selectors = [
            "div[data-test='jobDescriptionContent']",
            ".jobDescriptionContent",
            "#JobDescriptionContainer",
            "[data-test='job-description']"
        ]
        
        for selector in description_selectors:
            try:
                description_elem = driver.find_element(By.CSS_SELECTOR, selector)
                detailed_info["description"] = description_elem.text
                break
            except:
                continue
                
        if "description" not in detailed_info:
            detailed_info["description"] = "No description available"
        
        # Extract more structured information
        
        # Job Type
        try:
            job_type_elements = driver.find_elements(By.XPATH, 
                "//*[contains(text(), 'Job Type') or contains(text(), 'Employment Type')]/following-sibling::*")
            
            if job_type_elements:
                detailed_info["job_type"] = job_type_elements[0].text.strip()
            else:
                # Alternative approach
                employment_info = driver.find_elements(By.CSS_SELECTOR, ".css-1cz2bp4, .css-1pldt9b, .css-1vg6q84")
                for elem in employment_info:
                    text = elem.text.strip()
                    if "Full-time" in text or "Part-time" in text or "Contract" in text or "Permanent" in text:
                        detailed_info["job_type"] = text
                        break
        except:
            detailed_info["job_type"] = "Not specified"
        
        # Experience Level
        try:
            exp_elements = driver.find_elements(By.XPATH, "//*[contains(text(), 'Experience')]/following-sibling::*")
            if exp_elements:
                detailed_info["experience"] = exp_elements[0].text.strip()
        except:
            detailed_info["experience"] = "Not specified"
        
        # Education
        try:
            edu_elements = driver.find_elements(By.XPATH, "//*[contains(text(), 'Education')]/following-sibling::*")
            if edu_elements:
                detailed_info["education"] = edu_elements[0].text.strip()
        except:
            detailed_info["education"] = "Not specified"
        
        # Industry
        try:
            industry_elements = driver.find_elements(By.XPATH, "//*[contains(text(), 'Industry')]/following-sibling::*")
            if industry_elements:
                detailed_info["industry"] = industry_elements[0].text.strip()
        except:
            detailed_info["industry"] = "Not specified"
        
        # Benefits
        try:
            benefits_container = driver.find_elements(By.CSS_SELECTOR, 
                ".jobDetails-benefits, .css-1jnzwkp, [data-test='benefits']")
            
            benefits = []
            if benefits_container:
                benefit_elements = benefits_container[0].find_elements(By.CSS_SELECTOR, "li, span")
                for elem in benefit_elements:
                    benefits.append(elem.text.strip())
                
                detailed_info["benefits"] = ", ".join([b for b in benefits if b])
            else:
                detailed_info["benefits"] = "Not listed"
        except:
            detailed_info["benefits"] = "Not listed"
        
        # Extract additional fields
        
        # Check for common sections in job descriptions
        description_text = detailed_info["description"].lower()
        
        # Look for Requirements section
        requirements = []
        if "requirements:" in description_text or "qualifications:" in description_text:
            lines = detailed_info["description"].split('\n')
            in_requirements_section = False
            
            for line in lines:
                line_lower = line.lower().strip()
                
                if line_lower.startswith("requirements:") or line_lower.startswith("qualifications:"):
                    in_requirements_section = True
                    requirements.append(line.replace("Requirements:", "").replace("Qualifications:", "").strip())
                    continue
                elif in_requirements_section and (line_lower.startswith("responsibilities:") or 
                          line_lower.startswith("about the role:") or 
                          line_lower.startswith("about the job:") or
                          line_lower.startswith("what you'll do:")):
                    in_requirements_section = False
                elif in_requirements_section and line.strip():
                    requirements.append(line.strip())
            
            if requirements:
                detailed_info["requirements"] = " ".join(requirements)
        
        return detailed_info
        
    except Exception as e:
        print(f"  Error extracting detailed job info: {e}")
        return {"description": "Error extracting details"}
### 9. Function: Create a unique identifier for a job element
def get_job_identifier(job_element):
    """Create a unique identifier for a job element"""
    # Try multiple approaches to get a unique ID
    try:
        # First try to get any ID attributes
        job_id = job_element.get_attribute("id")
        if job_id:
            return job_id
            
        # Try data-job-id
        job_id = job_element.get_attribute("data-job-id")
        if job_id:
            return job_id
            
        # Try data-id
        job_id = job_element.get_attribute("data-id")
        if job_id:
            return job_id
        
        # As a fallback, use a combination of title and company
        title = extract_with_multiple_selectors(job_element, ["a[data-test='job-link']", ".job-title", "h2"])
        company = extract_with_multiple_selectors(job_element, ["div[data-test='employer-name']", ".employer-name"])
        
        if title and company:
            return f"{title}|{company}"
            
        # Last resort: use a portion of the element's HTML
        return hash(job_element.get_attribute("outerHTML")[:200])  # First 200 chars should be distinctive enough
        
    except Exception as e:
        # If all else fails, use a random ID (better than nothing)
        return f"random-{random.randint(10000, 99999)}"
### 10. Function: Extract all relevant data from a job listing
def extract_job_data(job):
    """Extract all relevant data from a job listing"""
    job_data = {
        "scrape_date": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    }
    
    try:
        # Get all text from the job element for fallback
        all_text = job.text
        
        # Title
        title_selectors = [
            "a[data-test='job-link']", 
            ".job-title", 
            ".jobTitle", 
            "a[href*='/job/']",
            "[data-test='job-title']",
            "h2", 
            "h3"
        ]
        job_data["title"] = extract_with_multiple_selectors(job, title_selectors) or "Unknown Title"
        
        # Company
        company_selectors = [
            "div[data-test='employer-name']",
            ".employer-name",
            ".companyName",
            "[data-test='company-name']",
            ".css-1vg6q84",
            ".e1n63ojh0"
        ]
        job_data["company"] = extract_with_multiple_selectors(job, company_selectors) or "Unknown Company"
        
        # Location
        location_selectors = [
            "div[data-test='location']",
            ".location",
            ".css-56kyx5",
            "[data-test='loc']"
        ]
        job_data["location"] = extract_with_multiple_selectors(job, location_selectors) or "Unknown Location"
        
        # Salary
        salary_selectors = [
            "div[data-test='detailSalary']",
            ".salary",
            ".salaryEstimate",
            "span[data-test='salary']",
            ".css-16u98uf",
            ".e1wijj240"
        ]
        job_data["salary"] = extract_with_multiple_selectors(job, salary_selectors) or "Not Provided"
        
        # Posted date
        date_selectors = [
            "div[data-test='job-age']",
            ".job-age",
            ".listingAge",
            "div[data-test='listing-age']",
            ".css-1u0budq",
            ".e1uvbk020"
        ]
        job_data["posted_date"] = extract_with_multiple_selectors(job, date_selectors) or "Unknown"
        
        # Rating
        rating_selectors = [
            "span[data-test='detailRating']",
            ".rating",
            ".ratingNumber",
            ".css-152xdkl",
            ".e1pr2f4f2"
        ]
        job_data["rating"] = extract_with_multiple_selectors(job, rating_selectors) or "Not Rated"
        
        # If we failed to get title and company, try text fallback
        if job_data["title"] == "Unknown Title" and job_data["company"] == "Unknown Company":
            if all_text and len(all_text.strip()) > 0:
                lines = all_text.strip().split('\n')
                if len(lines) >= 2:
                    job_data["title"] = lines[0]
                    job_data["company"] = lines[1]
                    if len(lines) > 2:
                        job_data["location"] = lines[2]
                
        return job_data
        
    except Exception as e:
        print(f"  Error extracting job data: {e}")
        return None
### 11. Function: Try multiple selectors to extract text from an element
def extract_with_multiple_selectors(element, selectors):
    """Try multiple selectors to extract text from an element"""
    # Try CSS selectors
    for selector in selectors:
        try:
            found_element = element.find_element(By.CSS_SELECTOR, selector)
            text = found_element.text.strip()
            if text:
                return text
        except:
            continue
    
    # If CSS selectors failed, try XPath as fallback
    xpath_title = [".//h2", ".//h3", ".//a[contains(@href, '/job/')]"]
    xpath_company = [".//div[contains(@class, 'company')]", ".//div[contains(@class, 'employer')]"]
    xpath_location = [".//div[contains(@class, 'location')]", ".//*[contains(@class, 'loc')]"] 
    xpath_salary = [".//span[contains(text(), '$')]", ".//div[contains(text(), 'SGD')]"]
    xpath_date = [".//div[contains(text(), 'd')]", ".//div[contains(text(), 'h')]"]
    
    # Determine which XPath patterns to try based on selector names
    xpath_to_try = []
    selector_text = " ".join(selectors).lower()
    
    if "title" in selector_text or "job" in selector_text:
        xpath_to_try.extend(xpath_title)
    if "company" in selector_text or "employer" in selector_text:
        xpath_to_try.extend(xpath_company)
    if "location" in selector_text or "loc" in selector_text:
        xpath_to_try.extend(xpath_location)
    if "salary" in selector_text:
        xpath_to_try.extend(xpath_salary)
    if "date" in selector_text or "age" in selector_text:
        xpath_to_try.extend(xpath_date)
    
    # Try the relevant XPaths
    for xpath in xpath_to_try:
        try:
            found_element = element.find_element(By.XPATH, xpath)
            text = found_element.text.strip()
            if text:
                return text
        except:
            continue
    
    return None

### 12. Function: Try to click the 'Show More Jobs' button
def click_show_more_jobs(driver):
    """Try to click the 'Show More Jobs' button"""
    try:
        # Try multiple selectors for the button
        show_more_selectors = [
            "button[data-test='load-more']",
            "button.button_ButtonMlD2g",
            "button[aria-live='polite']",
            "button[data-test='show-more-jobs']"
        ]
        
        for selector in show_more_selectors:
            try:
                buttons = driver.find_elements(By.CSS_SELECTOR, selector)
                for btn in buttons:
                    # Check if this button contains text about showing more jobs
                    button_text = btn.text.lower()
                    if "show more jobs" in button_text or "load more" in button_text:
                        if btn.is_displayed() and btn.is_enabled():
                            print(f"Found 'Show More Jobs' button using selector: {selector}")
                            
                            # Scroll to button
                            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)
                            time.sleep(1)
                            
                            # Try clicking with JavaScript (most reliable method)
                            driver.execute_script("arguments[0].click();", btn)
                            return True
            except Exception:
                continue
        
        # If CSS selectors didn't work, try XPath
        try:
            xpath_buttons = driver.find_elements(By.XPATH, 
                "//button[.//span[contains(text(), 'Show more jobs')]]")
            
            if xpath_buttons:
                for btn in xpath_buttons:
                    if btn.is_displayed() and btn.is_enabled():
                        print("Found 'Show More Jobs' button using XPath")
                        driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", btn)
                        time.sleep(1)
                        driver.execute_script("arguments[0].click();", btn)
                        return True
        except Exception:
            pass
        
        return False
        
    except Exception as e:
        print(f"Error looking for 'Show More Jobs' button: {e}")
        return False

### 13. Function: Attempts to close any modal dialogs that might be present
def close_modals(driver):
    """Attempts to close any modal dialogs that might be present"""
    try:
        # Look for common overlay elements
        overlay_selectors = [".ModalOverlay", ".modal_main", "[role='dialog']"]
        overlay_found = False
        
        for selector in overlay_selectors:
            try:
                overlay = driver.find_element(By.CSS_SELECTOR, selector)
                overlay_found = True
                break
            except:
                continue
                
        if not overlay_found:
            return False
        
        # Try various close button selectors
        close_selectors = [
            "button[aria-label='Close']",
            ".modal-close-btn",
            ".close",
            "button.dismiss",
            ".modal_closeIcon",
            ".CloseButton",
            "button.e1jbctw80",
            "[alt='Close']"
        ]
        
        for selector in close_selectors:
            try:
                close_buttons = driver.find_elements(By.CSS_SELECTOR, selector)
                if close_buttons:
                    for button in close_buttons:
                        try:
                            driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", button)
                            driver.execute_script("arguments[0].click();", button)
                            time.sleep(1)
                            return True
                        except:
                            continue
            except:
                continue
        
        # Add missing part - if no buttons worked
        # Try ESC key
        actions = webdriver.ActionChains(driver)
        actions.send_keys(webdriver.Keys.ESCAPE).perform()
        time.sleep(1)
        
        # JavaScript approach as last resort
        script = """
        // Hide all possible modal/overlay elements
        var overlays = document.querySelectorAll('.ModalOverlay, .modal, .popover, .dialog, [role="dialog"]');
        for (var i = 0; i < overlays.length; i++) {
            overlays[i].style.display = 'none';
        }
        // Re-enable scrolling on body
        document.body.style.overflow = 'auto';
        """
        driver.execute_script(script)
        time.sleep(1)
        
        return True
            
    except Exception as e:
        print(f"Error closing modal: {e}")
        return False


### WEBSCRAPER ACTIVIATION
if __name__ == "__main__":
    print("Starting Enhanced Glassdoor jobs scraper...")
    
    # Configure the number of jobs to scrape
    max_jobs = None  # Set to None to scrape all jobs, or a number to limit
    max_show_more_attempts = 60  # Should be enough to get all jobs
    get_job_details = False  # Set to True to get detailed job information
    
    # Use the enhanced scraper
    jobs = enhanced_scraper(max_jobs, max_show_more_attempts, get_job_details)
    
    if jobs:
        # Save to CSV
        csv_file = f"glassdoor__jobs_detailed_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
        save_to_csv(jobs, filename=csv_file)
        
        # Display some stats
        print("\nSummary of scraped data:")
        print(f"Total jobs found: {len(jobs)}")
        
        # Count jobs by company
        companies = {}
        for job in jobs:
            company = job.get('company', 'Unknown')
            companies[company] = companies.get(company, 0) + 1
        
        print("\nTop companies:")
        sorted_companies = sorted(companies.items(), key=lambda x: x[1], reverse=True)
        for company, count in sorted_companies[:5]:
            print(f"  {company}: {count} jobs")
            
        # Print job types if available
        job_types = {}
        for job in jobs:
            job_type = job.get('job_type', 'Not specified')
            job_types[job_type] = job_types.get(job_type, 0) + 1
            
        if len(job_types) > 1:  # Only show if we have job type information
            print("\nJob types:")
            sorted_job_types = sorted(job_types.items(), key=lambda x: x[1], reverse=True)
            for job_type, count in sorted_job_types:
                print(f"  {job_type}: {count} jobs")
    else:
        print("No jobs were found. Please check the error messages above.")
        
        # Additional troubleshooting instructions
        print("\nTroubleshooting steps:")
        print("1. Check if Glassdoor is detecting automation - try these solutions:")
        print("   - Add more random delays between actions")
        print("   - Use a browser profile with existing cookies")
        print("   - Try a headful browser mode to manually bypass CAPTCHA")
        print("2. Try running with different options:")
        print("   - Set get_job_details=False to only scrape basic information")
        print("   - Use a VPN to access from a different location")
        print("   - Try a different browser (Firefox via geckodriver)")
        print("   - Consider using Puppeteer or Playwright instead of Selenium")

Starting Enhanced Glassdoor jobs scraper...
Loaded initial page: https://www.glassdoor.sg/Job/singapore-analytics-internship-jobs-SRCH_IL.0,9_IC3235921_KO10,30.htm
Jobs found so far: 0
Found job listings using selector: li[data-test='jobListing']
Found 30 total job listings
Processing new job: 1
  Added job: Social Media Manager Intern at The Plexxie Global Company Pte. Ltd.
Processing new job: 2
  Added job: Senior Executive Assistant at PERSOL
3.5
Processing new job: 3
  Added job: SATS Internship at SATS
3.3
Processing new job: 4
  Added job: Aon Internship Programme 2026, Singapore - Innovation & Analytics at Aon Corporation
3.8
Processing new job: 5
  Added job: Summer Internship 2026 - External Fund Management at Income Insurance Limited
3.5
Processing new job: 6
  Added job: Data Analyst (Internship) at Infosys Singapore & Australia
3.6
Processing new job: 7
  Added job: Data Analyst (Internship) at ShopBack
3.5
Processing new job: 8
  Added job: Internship: Global Wholesale Ban