In [4]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime, timedelta

In [11]:
def scrape_page(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    job_data = []

    job_announcements = soup.find_all('div', class_='list-announcement-block _job')
    
    if not job_announcements:
        return job_data
    today_date = datetime.today().strftime('%Y-%m-%d')
    yesterday_date = (datetime.today() - timedelta(days=1)).strftime('%Y-%m-%d')

    for job in job_announcements:
        try:
            job_title = job.find('a', class_='announcement-block__title').get_text(strip=True)
            salary_raw = job.find('div', class_='announcement-block__price')
            salary_raw = salary_raw.get_text(strip=True) if salary_raw else 'Not Specified'

            company_raw = job.find('span', class_='announcement-block__company-name').get_text(strip=True)
            date_raw = job.find('div', class_='announcement-block__date').get_text(strip=True)
            
            if "Өнөөдөр" in date_raw:
                date = today_date
            elif "Өчигдөр" in date_raw:
                date = yesterday_date
            else:
                date = date_raw.split(',')[1].strip() if ',' in date_raw else 'Not Specified'
            
            location = date_raw.split(',')[-1].strip() if ',' in date_raw else 'Not Specified'

            category_raw = job.find('div', class_='announcement-block__breadcrumbs').get_text(' » ', strip=True)
            category = category_raw.split(' » » »')[0].strip() if ' » » »' in category_raw else category_raw

            if "сая" in salary_raw:
                salary_numeric = float(salary_raw.replace("сая", "").replace("₮", "").strip()) * 1_000_000
            elif "тохиролцоно" in salary_raw:
                salary_numeric = None
            else:
                salary_numeric = None
                if "₮" in salary_raw:
                    salary_numeric = float(salary_raw.replace("₮", "").strip().replace(",", ""))
                elif salary_raw.isdigit():
                    salary_numeric = float(salary_raw)

            # If salary is less than 999, drop this job
            if salary_numeric is None or salary_numeric < 999:
                continue

            salary = f"{int(salary_numeric):,} ₮" if salary_numeric else "Not Specified"

            company = company_raw if "ХХК" in company_raw or "LLC" in company_raw else "хувь хүн"

            job_data.append({
                'Job Title': job_title,
                'Salary': salary,
                'Company': company,
                'Date': date,
                'Location': location,
                'Category': category
            })
        except AttributeError:
            continue

    return job_data

In [12]:
def pages(base_url, num_pages):
    all_job_data = []
    for page in range(1, num_pages + 1):
        print(f"Page {page}...")
        url = f"{base_url}?page={page}"
        page_data = scrape_page(url)
        all_job_data.extend(page_data)
    return all_job_data

In [13]:
base_url = "https://www.unegui.mn/azhild-avna/"
num_pages = 100
all_jobs = pages(base_url, num_pages)
df = pd.DataFrame(all_jobs)
df = df[df['Salary'].notna()]
df.to_csv('job_listings1.csv', index=False, encoding='utf-8-sig')
print("DONE.")
df.head()

Page 1...
Page 2...
Page 3...
Page 4...
Page 5...
Page 6...
Page 7...
Page 8...
Page 9...
Page 10...
Page 11...
Page 12...
Page 13...
Page 14...
Page 15...
Page 16...
Page 17...
Page 18...
Page 19...
Page 20...
Page 21...
Page 22...
Page 23...
Page 24...
Page 25...
Page 26...
Page 27...
Page 28...
Page 29...
Page 30...
Page 31...
Page 32...
Page 33...
Page 34...
Page 35...
Page 36...
Page 37...
Page 38...
Page 39...
Page 40...
Page 41...
Page 42...
Page 43...
Page 44...
Page 45...
Page 46...
Page 47...
Page 48...
Page 49...
Page 50...
Page 51...
Page 52...
Page 53...
Page 54...
Page 55...
Page 56...
Page 57...
Page 58...
Page 59...
Page 60...
Page 61...
Page 62...
Page 63...
Page 64...
Page 65...
Page 66...
Page 67...
Page 68...
Page 69...
Page 70...
Page 71...
Page 72...
Page 73...
Page 74...
Page 75...
Page 76...
Page 77...
Page 78...
Page 79...
Page 80...
Page 81...
Page 82...
Page 83...
Page 84...
Page 85...
Page 86...
Page 87...
Page 88...
Page 89...
Page 90...
Page 91...
Page 92.

Unnamed: 0,Job Title,Salary,Company,Date,Location,Category
0,24/48 ажиллах ресепшин,"112,500 ₮",хувь хүн,2024-11-19,Алтай хотхон,"Аялал жуулчлал, зочид буудал"
1,Ресторанд үйлчлэгч,"1,500,000 ₮",хувь хүн,2024-11-19,Элчин сайдын гудамж,"Pесторан, кафе, паб"
2,Агент,"3,000,000 ₮",хувь хүн,2024-11-19,19-р хороолол,"Борлуулалт, худалдаа"
3,Барилгын туслах ажилтан,"100,000 ₮",хувь хүн,2024-11-19,Хороо 7,"Барилга, дэд бүтэц"
4,Зочид буудалд үйлчлэгч,"1,500,000 ₮",хувь хүн,2024-11-19,Хороо 4,"Аялал жуулчлал, зочид буудал"
