# Selenium Basic

## Downloading and Setting Up ChromeDriver
### General Steps
- First, you need to download ChromeDriver and set its path in the system environment variables so that programs like Selenium can use it.
- link for download chromdriver: https://googlechromelabs.github.io/chrome-for-testing/
- download stable version of chromedriver
#### For Windows
- Download and unzip ChromeDriver.
- Follow these steps:
  1. Go to **System Environment** settings.
  2. Click on **Environment Variables**.
  3. Double-click on **Path**.
  4. Click **New**.
  5. Enter the ChromeDriver folder path (e.g., `C:\path\to\chromedriver`) and save.

#### For macOS
- Download and unzip ChromeDriver.
- Follow these steps:
  1. Open the terminal.
  2. Run the following command to add the path to the environment variable:
     ```bash
     export PATH=$PATH:/path/to/chromedriver
     echo 'export PATH=$PATH:/path/to/chromedriver' >> ~/.zshrc
     source ~/.zshrc

#### For Linux
- Download and unzip ChromeDriver.
- Follow these steps:
  1. Open the terminal.
  2. Run the following command to add the path to the environment variable:
     ```bash
     export PATH=$PATH:/path/to/chromedriver
     echo 'export PATH=$PATH:/path/to/chromedriver' >> ~/.bashrc
     source ~/.bashrc
### Important Notes
- After setting the path, restart the terminal or system to apply changes.
- To verify, run `chromedriver --version` in the terminal (or Command Prompt on Windows) to check the version.

## Import essential libraries

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains

## Creating Driver and Opening Browser

In [2]:
options = Options()

# for testing
options.add_experimental_option('detach', True)
options.add_argument('--start-maximized')

# for scraping
# options.add_argument("--headless=new")

# arguments
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--disable-gpu")
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("--disable-client-side-phishing-detection")
# options.add_argument("--enable-automation")
# options.add_argument("--disable-extensions")
# options.add_argument("--disable-infobars")
# options.add_argument("--disable-popup-blocking")
# options.add_argument("--disable-features=TranslateUI,BlinkGenPropertyTrees")
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_argument("--disable-background-networking")
# options.add_argument("--disable-component-update")
# options.add_argument("--disable-default-apps")
# options.add_argument("--disable-sync")
# options.add_argument("--metrics-recording-only")
# options.add_argument("--mute-audio")
# options.add_argument("--no-first-run")
# options.add_argument("--safebrowsing-disable-auto-update")

options.add_experimental_option("excludeSwitches", ["enable-automation"])
options.add_experimental_option("useAutomationExtension", False)

prefs = {
    "profile.default_content_setting_values": {
        "images": 2,
        # "cookies": 2,
        # "plugins": 2,
        # "popups": 2,
        # "geolocation": 2,
        # "notifications": 2,
        # "auto_select_certificate": 2,
        # "fullscreen": 2,
        # "mouselock": 2,
        # "mixed_script": 2,
        # "media_stream": 2,
        # "media_stream_mic": 2,
        # "media_stream_camera": 2,
        # "protocol_handlers": 2,
        # "ppapi_broker": 2,
        # "automatic_downloads": 2,
        # "midi_sysex": 2,
        # "push_messaging": 2,
        # "ssl_cert_decisions": 2,
        # "metro_switch_to_desktop": 2,
        # "protected_media_identifier": 2,
        # "app_banner": 2,
        # "site_engagement": 2,
        # "durable_storage": 2
    }
}
# options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=options)

In [3]:
url = "https://www.imdb.com/"
driver.get(url)

## CSS Selectors and XPath

### Select elements using tag name

#### First element

In [7]:
item = driver.find_element(By.CSS_SELECTOR, "div[class='ipc-poster-card ipc-poster-card--baseAlt ipc-poster-card--media-radius ipc-poster-card--dynamic-width fan-picks-title ipc-sub-grid-item ipc-sub-grid-item--span-2']")
print(item.text)

7.9
F1: The Movie
Watchlist
Trailer


In [8]:
item = driver.find_element(By.XPATH, "//div[@class='ipc-poster-card ipc-poster-card--baseAlt ipc-poster-card--media-radius ipc-poster-card--dynamic-width fan-picks-title ipc-sub-grid-item ipc-sub-grid-item--span-2']")
print(item.text)

7.9
F1: The Movie
Watchlist
Trailer


#### All element

In [10]:
items = driver.find_elements(By.CSS_SELECTOR, "div.fan-picks-title")
len(items)

30

### Select elements using class name

In [11]:
item = driver.find_element(By.CLASS_NAME, "fan-picks-title")
print(item.text)

7.9
F1: The Movie
Watchlist
Trailer


In [None]:
menu = driver.find_element(By.XPATH, '//a[@class="ipc-btn ipc-btn--single-padding ipc-btn--center-align-content ipc-btn--default-height ipc-btn--core-baseAlt ipc-btn--theme-baseAlt ipc-btn--button-radius imdb-header__signin-text"]')
menu.click()

### Select element using ID

In [None]:
# item = driver.find_element(By.CSS_SELECTOR, "label#imdbHeader-navDrawerOpen")
item = driver.find_element(By.XPATH, "//label[@id='imdbHeader-navDrawerOpen']")
item.click()

### Select element using attributes

In [14]:
# item = driver.find_element(By.XPATH, '//a[@aria-label="View title page for 2. F1: The Movie"]')
item = driver.find_element(By.CSS_SELECTOR, 'a[aria-label="View title page for 2. F1: The Movie"]')
item.get_attribute('href')


'https://www.imdb.com/title/tt16311594/?ref_=hm_tpten_i_2'

### Select element using the text inside of it

In [17]:
# We cannot do this with css selectors.
item = driver.find_element(By.XPATH, "//span[text()='1. Jurassic World: Rebirth']")
item.tag_name

'span'

### Select nested elements

In [18]:
tag_a = driver.find_element(By.XPATH, "//div[@class='ipc-poster ipc-poster--baseAlt ipc-poster--media-radius ipc-poster--dynamic-width ipc-poster-card__poster ipc-sub-grid-item ipc-sub-grid-item--span-2']/a")
tag_a.get_attribute('href')

'https://www.imdb.com/title/tt31036941/?ref_=hm_tpten_i_1'

In [19]:
tag_a = driver.find_element(By.CSS_SELECTOR, "div[class='ipc-poster ipc-poster--baseAlt ipc-poster--media-radius ipc-poster--dynamic-width ipc-poster-card__poster ipc-sub-grid-item ipc-sub-grid-item--span-2'] a")
tag_a.get_attribute('href')

'https://www.imdb.com/title/tt31036941/?ref_=hm_tpten_i_1'

### After selecting the element:

#### Get the text inside of the element

In [21]:
item = driver.find_elements(By.CSS_SELECTOR, 'a[class="ipc-poster-card__title ipc-poster-card__title--clamp-2 ipc-poster-card__title--clickable"] span')
for it in item[:10]:
    print(it.text)

1. Jurassic World: Rebirth
2. F1: The Movie
3. Squid Game
4. Sinners
5. Heads of State
6. The Bear
7. The Old Guard 2
8. The Sandman
9. Ironheart
10. 28 Years Later


#### Get values of the attributes of the element

In [24]:
item = driver.find_element(By.CSS_SELECTOR, 'img[class="ipc-image"]')
img_class = item.get_attribute('class')
img_class

'ipc-image'

## Just XPath

### Navigation (siblings - parents)

#### Find parent of an element

In [25]:
item = driver.find_element(By.XPATH, '//span[text()="F1: The Movie"]')
parent = item.find_element(By.XPATH, "./..")
parent.tag_name

'a'

#### Find sibling of an element

In [44]:
from traceback import print_tb


item = driver.find_element(By.XPATH, '//span[text()="F1: The Movie"]')
parent = item.find_element(By.XPATH, "./..")
following_sibling = parent.find_element(By.XPATH, "./following-sibling::div[1]")
print('tag_name:', following_sibling.tag_name, "\ntext in tag:", following_sibling.text)
preceding_sibling = following_sibling.find_element(By.XPATH, "./preceding-sibling::a")
print('tag_name:', preceding_sibling.tag_name, "\ntext in tag:", preceding_sibling.text)

tag_name: div 
text in tag: Watchlist
Trailer
tag_name: a 
text in tag: F1: The Movie


## Action Chains

In [None]:
from selenium.webdriver.common.action_chains import ActionChains
import time
from random import uniform

element = driver.find_element(By.CSS_SELECTOR, 'input[type="text"]')
action = ActionChains(driver)
action.move_to_element(element).perform()
action.click(element).perform()
word = 'parent'
for char in word:
    action.send_keys(char).perform()
    time.sleep(uniform(0, 0.5))

## Scroll

In [None]:
old_height = 0
new_height = driver.execute_script('return document.body,scorllHeight')

while new_height != old_height:
    driver.execute_script('window.scroll(0, document.body.scrollHeight);')
    time.sleep(2)
    old_height = new_height
    new_height = driver.execute_script('return document.body,scorllHeight')


## Wait

In [None]:
img = WebDriverWait(driver, 10).until(
                EC.visibility_of_element_located(
                    (By.CSS_SELECTOR, 'a[aria-label="View title page for Gladiator"]'))
            )
img.get_attribute('href')

## Download Images

In [None]:
import requests
item = driver.find_element(By.CSS_SELECTOR, 'img[alt="28 Weeks Later"]')
img = item.get_attribute('src')

response = requests.get(img)
with open(f'images.jpg', 'wb') as imagefile:
    imagefile.write(response.content)