In [1]:
pip install --q Selenium

Note: you may need to restart the kernel to use updated packages.


In [2]:
pip install --q python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [3]:
# Loading credentials
from dotenv import load_dotenv
import os

# Load environment variables from the .env file
load_dotenv('credentials.env')

# Get the values of the environment variables
username = os.getenv("user_name")
password = os.getenv("password")

In [4]:
# Importing required Libraries
import csv
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from selenium.common.exceptions import NoSuchElementException, TimeoutException
# from undetected_chromedriver import Chrome, ChromeOptions

In [5]:
# Function to scrape the profiles
def scrape_linkedin_profiles(search_query, username, password):
    start_time = time.time()       # To check the time throughout the whole process.

    try:
        # Set up the Chrome driver
        driver = webdriver.Chrome()
        
        # Log in to LinkedIn
        driver.get('https://www.linkedin.com/login')

        # Find the username and password fields and enter the login credentials
        username_field = driver.find_element(By.ID, 'username')
        username_field.send_keys(username)

        password_field = driver.find_element(By.ID, 'password')
        password_field.send_keys(password)
        password_field.submit()
        
        # Handle OTP if required
        try:
            WebDriverWait(driver, 3).until(EC.visibility_of_element_located((By.ID, 'input__email_verification_pin')))
            otp = input("Please enter the OTP: ")
            otp_field = driver.find_element(By.ID, 'input__email_verification_pin')
            otp_field.send_keys(otp)
            otp_field.submit()
        except TimeoutException:
            pass  # Continue without entering OTP if it's not required
        
        # Wait for the login process to complete
        driver.implicitly_wait(3)

        # Navigate to the search page
        driver.get('https://www.linkedin.com/search/results/people/')

        # Enter the search query and submit the search
        search_bar = driver.find_element(By.CSS_SELECTOR, '.search-global-typeahead__input')
        search_bar.send_keys(search_query)
        search_bar.send_keys(Keys.ENTER)

        # Wait for the search results to load
        WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.CSS_SELECTOR, '.reusable-search__result-container')))

        # Get the list of search results
        search_results = driver.find_elements(By.CSS_SELECTOR, '.reusable-search__result-container')

        # Initialize the CSV file
        csv_file = open('profile_data.csv', 'w', newline='', encoding='utf-8')
        csv_writer = csv.writer(csv_file)
        csv_writer.writerow(['Name', 'Headline', 'Location', 'Profile URL', 'Connections'])

        count = 0
        # Scrape data from each profile page
        for result in search_results:
            try:
                name_element = WebDriverWait(result, 4).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.app-aware-link')))
                
                # Extract profile URL
                profile_url = name_element.get_attribute('href')
                
                # Click on the profile to open it
                ActionChains(driver).key_down(Keys.CONTROL).click(name_element).key_up(Keys.CONTROL).perform()
                driver.switch_to.window(driver.window_handles[-1])

                # Wait for the profile page to load
                WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'application-outlet')))

                # Scrape profile information
                soup = BeautifulSoup(driver.page_source, 'html.parser')

                profile_name_element = soup.find('h1', class_='text-heading-xlarge inline t-24 v-align-middle break-words')
                profile_name = profile_name_element.get_text().strip()

                headline_element = soup.find('div', class_='text-body-medium break-words')
                headline = headline_element.get_text().strip() if headline_element else 'None'

                location_element = soup.find('span', class_='text-body-small inline t-black--light break-words')
                location = location_element.get_text().strip() if location_element else 'None'

                connections_element = soup.find('span', class_='t-bold')
                connections = connections_element.get_text().strip() if connections_element else 'None'
                
                # Save profile information to CSV
                csv_writer.writerow([profile_name, headline, location, profile_url, connections])
                count += 1

            except NoSuchElementException as e:
                print("Element not found:", e)
                
            except TimeoutException as e:
                print("Timeout:", e)
            
            except AttributeError as e:   # used to skip 'LinkedIn Member' profiles.
                print("Skipping private profile...")
                continue

            finally:
                # Close the current tab and switch back to the search results tab
                driver.close()
                driver.switch_to.window(driver.window_handles[0])

    except Exception as e:
        print("An error occurred:", e)

    finally:
        # Close CSV file
        csv_file.close()

        # Close the webdriver
        driver.quit()

    # Calculate the time taken
    end_time = time.time()
    time_taken = end_time - start_time

    print(f"{count} profiles scrapped successfully!")
    print(f"Time taken: {time_taken:.2f} seconds")

In [6]:
# Usage example:
search_query = input('Enter your search query: ')
scrape_linkedin_profiles(search_query,username, password)

Enter your search query: richie
Skipping private profile...
9 profiles scrapped successfully!
Time taken: 215.57 seconds
