In [5]:
from typing import Dict, List
import requests
from bs4 import BeautifulSoup
import os


class LinkedinCrawler:
    HOMEPAGE_URL = "https://www.linkedin.com"
    LOGIN_URL = "https://www.linkedin.com/uas/login-submit"

    def __init__(self, login_email: str, login_password: str, jobs_search_url: str):
        self.login_email = login_email
        self.login_password = login_password
        self.jobs_search_url = jobs_search_url
        self._client = requests.Session()
        self._html = self._client.get(self.HOMEPAGE_URL).content
        self._soup = BeautifulSoup(self._html, "html.parser")
        self._csrf = self._soup.find("input", {"name": "loginCsrfParam"}).get("value")
        self._login_information = {
            "session_key": self.login_email,
            "session_password": self.login_password,
            "loginCsrfParam": self._csrf,
            "trk": "guest_homepage-basic_sign-in-submit",
        }
        self._login = self._client.post(self.LOGIN_URL, data=self._login_information)

    def get_jobs_list_from_search(self) -> List[Dict]:
        response = self._client.get(self.jobs_search_url)
        soup = BeautifulSoup(response.content, 'html.parser')
        # Find all job listings
        jobs = soup.find_all('div', class_='base-card')

        jobs_list = []
        for job in jobs:
            title_element = job.find('h3', class_='base-search-card__title')
            title_element = title_element.text.strip() if title_element else "No Job Title"
            link_element = job.find('a', class_='base-card__full-link')
            link_element = link_element.get('href') if link_element else "No Job Link"
            jobs_list.append({"title": title_element, "link": link_element})
        return jobs_list





In [30]:
if __name__ == "__main__":
    from dotenv import load_dotenv

    load_dotenv()

    linkedin_email = os.environ["linkedin_email"]
    linkedin_password = os.environ["linkedin_password"]
    jobs_search_daily = "https://www.linkedin.com/jobs/search/?currentJobId=3908335123&f_TPR=r86400&f_WT=2&geoId=92000000&keywords=(%22Data%20Engineer%22%20OR%20%22Senior%20Data%20Engineer%22%20OR%20%22AWS%20Data%20Engineer%22)&location=Worldwide&origin=JOB_SEARCH_PAGE_JOB_FILTER&refresh=true"

    linkedin_crawler = LinkedinCrawler(linkedin_email, linkedin_password)
    jobs = linkedin_crawler.get_jobs_list_from_search(jobs_search_daily)
    print(f"{len(jobs)}")
    print(jobs)

0
[]


In [29]:
len(jobs)

0

In [8]:
# Raw Response
response = linkedin_crawler._client.get(jobs_search_daily)

In [20]:
soup = BeautifulSoup(response.content, 'html.parser')
number_of_jobs = soup.find_all('div', class_='scaffold-layout__list-detail-inner')

In [19]:
print(number_of_jobs)

[]


In [23]:
# export this content to a file
with open('jobs.html', 'w') as file:
    file.write(str(response.content))
# response.content